From cb0208163f5b44651831f217db0d0d4e97dd2463 Mon Sep 17 00:00:00 2001 From: "Matt A. Tobin" Date: Mon, 9 Nov 2020 20:20:18 -0500 Subject: Issue #1677 - Part 3: Create shim definitions for V8-specific code in new regexp implementation --- js/src/regexp/property-sequences.h | 1 + js/src/regexp/regexp-ast.h | 1 + js/src/regexp/regexp-bytecode-peephole.h | 1 + js/src/regexp/regexp-bytecodes.h | 1 + js/src/regexp/regexp-dotprinter.h | 1 + js/src/regexp/regexp-macro-assembler-arch.h | 16 + js/src/regexp/regexp-macro-assembler.h | 1 + js/src/regexp/regexp-shim.cc | 72 ++ js/src/regexp/regexp-shim.h | 999 ++++++++++++++++++++++++++++ js/src/regexp/regexp-stack.h | 1 + js/src/regexp/regexp.h | 1 + js/src/regexp/util/flags.h | 93 +++ js/src/regexp/util/vector.h | 204 ++++++ js/src/regexp/util/zone.h | 356 ++++++++++ 14 files changed, 1748 insertions(+) create mode 100644 js/src/regexp/regexp-macro-assembler-arch.h create mode 100644 js/src/regexp/regexp-shim.cc create mode 100644 js/src/regexp/regexp-shim.h create mode 100644 js/src/regexp/util/flags.h create mode 100644 js/src/regexp/util/vector.h create mode 100644 js/src/regexp/util/zone.h diff --git a/js/src/regexp/property-sequences.h b/js/src/regexp/property-sequences.h index cbf2ea1fc0..ed39e23795 100644 --- a/js/src/regexp/property-sequences.h +++ b/js/src/regexp/property-sequences.h @@ -7,6 +7,7 @@ #ifdef V8_INTL_SUPPORT +#include "regexp/regexp-shim.h" namespace v8 { namespace internal { diff --git a/js/src/regexp/regexp-ast.h b/js/src/regexp/regexp-ast.h index 540fdce80a..fe6913e1d4 100644 --- a/js/src/regexp/regexp-ast.h +++ b/js/src/regexp/regexp-ast.h @@ -5,6 +5,7 @@ #ifndef V8_REGEXP_REGEXP_AST_H_ #define V8_REGEXP_REGEXP_AST_H_ +#include "regexp/regexp-shim.h" namespace v8 { namespace internal { diff --git a/js/src/regexp/regexp-bytecode-peephole.h b/js/src/regexp/regexp-bytecode-peephole.h index 96349f52cf..31d5a2d480 100644 --- a/js/src/regexp/regexp-bytecode-peephole.h +++ b/js/src/regexp/regexp-bytecode-peephole.h @@ -5,6 +5,7 @@ #ifndef V8_REGEXP_REGEXP_BYTECODE_PEEPHOLE_H_ #define V8_REGEXP_REGEXP_BYTECODE_PEEPHOLE_H_ +#include "regexp/regexp-shim.h" namespace v8 { namespace internal { diff --git a/js/src/regexp/regexp-bytecodes.h b/js/src/regexp/regexp-bytecodes.h index 35f7a30dae..24d6925db9 100644 --- a/js/src/regexp/regexp-bytecodes.h +++ b/js/src/regexp/regexp-bytecodes.h @@ -5,6 +5,7 @@ #ifndef V8_REGEXP_REGEXP_BYTECODES_H_ #define V8_REGEXP_REGEXP_BYTECODES_H_ +#include "regexp/regexp-shim.h" namespace v8 { namespace internal { diff --git a/js/src/regexp/regexp-dotprinter.h b/js/src/regexp/regexp-dotprinter.h index b3b268f53a..e5781184c0 100644 --- a/js/src/regexp/regexp-dotprinter.h +++ b/js/src/regexp/regexp-dotprinter.h @@ -5,6 +5,7 @@ #ifndef V8_REGEXP_REGEXP_DOTPRINTER_H_ #define V8_REGEXP_REGEXP_DOTPRINTER_H_ +#include "regexp/regexp-shim.h" namespace v8 { namespace internal { diff --git a/js/src/regexp/regexp-macro-assembler-arch.h b/js/src/regexp/regexp-macro-assembler-arch.h new file mode 100644 index 0000000000..60b5c94de4 --- /dev/null +++ b/js/src/regexp/regexp-macro-assembler-arch.h @@ -0,0 +1,16 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +// This file implements the NativeRegExpMacroAssembler interface for +// SpiderMonkey. It provides the same interface as each of V8's +// architecture-specific implementations. + +#ifndef RegexpMacroAssemblerArch_h +#define RegexpMacroAssemblerArch_h + +#include "regexp/regexp-shim.h" + +#endif // RegexpMacroAssemblerArch_h diff --git a/js/src/regexp/regexp-macro-assembler.h b/js/src/regexp/regexp-macro-assembler.h index 67d97d1228..dd059a43d0 100644 --- a/js/src/regexp/regexp-macro-assembler.h +++ b/js/src/regexp/regexp-macro-assembler.h @@ -6,6 +6,7 @@ #define V8_REGEXP_REGEXP_MACRO_ASSEMBLER_H_ #include "regexp/regexp-ast.h" +#include "regexp/regexp-shim.h" #include "regexp/regexp.h" namespace v8 { diff --git a/js/src/regexp/regexp-shim.cc b/js/src/regexp/regexp-shim.cc new file mode 100644 index 0000000000..5e1da7a605 --- /dev/null +++ b/js/src/regexp/regexp-shim.cc @@ -0,0 +1,72 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +// Copyright 2019 the V8 project authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "regexp/regexp-shim.h" + +namespace v8 { +namespace internal { + +void PrintF(const char* format, ...) { + va_list arguments; + va_start(arguments, format); + vprintf(format, arguments); + va_end(arguments); +} + +void PrintF(FILE* out, const char* format, ...) { + va_list arguments; + va_start(arguments, format); + vfprintf(out, format, arguments); + va_end(arguments); +} + +// Origin: +// https://github.com/v8/v8/blob/855591a54d160303349a5f0a32fab15825c708d1/src/utils/ostreams.cc#L120-L169 +// (This is a hand-simplified version.) +// Writes the given character to the output escaping everything outside +// of printable ASCII range. +std::ostream& operator<<(std::ostream& os, const AsUC16& c) { + uint16_t v = c.value; + bool isPrint = 0x20 < v && v <= 0x7e; + char buf[10]; + const char* format = isPrint ? "%c" : (v <= 0xFF) ? "\\x%02x" : "\\u%04x"; + snprintf(buf, sizeof(buf), format, v); + return os << buf; +} +std::ostream& operator<<(std::ostream& os, const AsUC32& c) { + int32_t v = c.value; + if (v <= String::kMaxUtf16CodeUnit) { + return os << AsUC16(v); + } + char buf[13]; + snprintf(buf, sizeof(buf), "\\u{%06x}", v); + return os << buf; +} + +DisallowJavascriptExecution::DisallowJavascriptExecution(Isolate* isolate) + : nojs_(isolate->cx()) {} + +// TODO: Map flags to jitoptions +bool FLAG_correctness_fuzzer_suppressions = false; +bool FLAG_enable_regexp_unaligned_accesses = false; +bool FLAG_harmony_regexp_sequence = false; +bool FLAG_regexp_interpret_all = false; +bool FLAG_regexp_mode_modifiers = false; +bool FLAG_regexp_optimization = false; +bool FLAG_regexp_peephole_optimization = false; +bool FLAG_regexp_possessive_quantifier = false; +bool FLAG_regexp_tier_up = false; +bool FLAG_trace_regexp_assembler = false; +bool FLAG_trace_regexp_bytecodes = false; +bool FLAG_trace_regexp_parser = false; +bool FLAG_trace_regexp_peephole_optimization = false; + +} // namespace internal +} // namespace v8 diff --git a/js/src/regexp/regexp-shim.h b/js/src/regexp/regexp-shim.h new file mode 100644 index 0000000000..0497ee552d --- /dev/null +++ b/js/src/regexp/regexp-shim.h @@ -0,0 +1,999 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +// Copyright 2019 the V8 project authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef RegexpShim_h +#define RegexpShim_h + +#include "mozilla/Assertions.h" +#include "mozilla/Attributes.h" +#include "mozilla/MathAlgorithms.h" +#include "mozilla/Maybe.h" +#include "mozilla/Types.h" + +#include + +#include "jit/Label.h" +#include "js/Value.h" +#include "regexp/util/flags.h" +#include "regexp/util/vector.h" +#include "regexp/util/zone.h" +#include "vm/NativeObject.h" + +// Forward declaration of classes +namespace v8 { +namespace internal { + +class Heap; +class Isolate; +class RegExpMatchInfo; +class RegExpStack; + +} // namespace internal +} // namespace v8 + +#define V8_WARN_UNUSED_RESULT MOZ_MUST_USE +#define V8_EXPORT_PRIVATE MOZ_EXPORT +#define V8_FALLTHROUGH MOZ_FALLTHROUGH + +#define FATAL(x) MOZ_CRASH(x) +#define UNREACHABLE() MOZ_CRASH("unreachable code") +#define UNIMPLEMENTED() MOZ_CRASH("unimplemented code") +#define STATIC_ASSERT(exp) static_assert(exp, #exp) + +#define DCHECK MOZ_ASSERT +#define DCHECK_EQ(lhs, rhs) MOZ_ASSERT((lhs) == (rhs)) +#define DCHECK_NE(lhs, rhs) MOZ_ASSERT((lhs) != (rhs)) +#define DCHECK_GT(lhs, rhs) MOZ_ASSERT((lhs) > (rhs)) +#define DCHECK_GE(lhs, rhs) MOZ_ASSERT((lhs) >= (rhs)) +#define DCHECK_LT(lhs, rhs) MOZ_ASSERT((lhs) < (rhs)) +#define DCHECK_LE(lhs, rhs) MOZ_ASSERT((lhs) <= (rhs)) +#define DCHECK_NULL(val) MOZ_ASSERT((val) == nullptr) +#define DCHECK_NOT_NULL(val) MOZ_ASSERT((val) != nullptr) +#define DCHECK_IMPLIES(lhs, rhs) MOZ_ASSERT_IF(lhs, rhs) +#define CHECK MOZ_RELEASE_ASSERT + +template +static constexpr inline T Min(T t1, T t2) { + return t1 < t2 ? t1 : t2; +} + +template +static constexpr inline T Max(T t1, T t2) { + return t1 > t2 ? t1 : t2; +} +#define MemCopy memcpy + +// Origin: +// https://github.com/v8/v8/blob/855591a54d160303349a5f0a32fab15825c708d1/src/base/macros.h#L310-L319 +// ptrdiff_t is 't' according to the standard, but MSVC uses 'I'. +#ifdef _MSC_VER +# define V8PRIxPTRDIFF "Ix" +# define V8PRIdPTRDIFF "Id" +# define V8PRIuPTRDIFF "Iu" +#else +# define V8PRIxPTRDIFF "tx" +# define V8PRIdPTRDIFF "td" +# define V8PRIuPTRDIFF "tu" +#endif + +// Origin: +// https://github.com/v8/v8/blob/855591a54d160303349a5f0a32fab15825c708d1/src/base/macros.h#L27-L38 +// The arraysize(arr) macro returns the # of elements in an array arr. +// The expression is a compile-time constant, and therefore can be +// used in defining new arrays, for example. If you use arraysize on +// a pointer by mistake, you will get a compile-time error. +#define arraysize(array) (sizeof(ArraySizeHelper(array))) + +// This template function declaration is used in defining arraysize. +// Note that the function doesn't need an implementation, as we only +// use its type. +template +char (&ArraySizeHelper(T (&array)[N]))[N]; + +// Explicitly declare the assignment operator as deleted. +#define DISALLOW_ASSIGN(TypeName) TypeName& operator=(const TypeName&) = delete + +// Explicitly declare the copy constructor and assignment operator as deleted. +// This also deletes the implicit move constructor and implicit move assignment +// operator, but still allows to manually define them. +#define DISALLOW_COPY_AND_ASSIGN(TypeName) \ + TypeName(const TypeName&) = delete; \ + DISALLOW_ASSIGN(TypeName) + +// Explicitly declare all implicit constructors as deleted, namely the +// default constructor, copy constructor and operator= functions. +// This is especially useful for classes containing only static methods. +#define DISALLOW_IMPLICIT_CONSTRUCTORS(TypeName) \ + TypeName() = delete; \ + DISALLOW_COPY_AND_ASSIGN(TypeName) + +namespace v8 { + +// Origin: +// https://github.com/v8/v8/blob/855591a54d160303349a5f0a32fab15825c708d1/src/base/macros.h#L364-L367 +template +constexpr inline bool IsAligned(T value, U alignment) { + return (value & (alignment - 1)) == 0; +} + +using byte = uint8_t; +using Address = uintptr_t; +static const Address kNullAddress = 0; + +namespace base { + +// Origin: +// https://github.com/v8/v8/blob/855591a54d160303349a5f0a32fab15825c708d1/src/base/macros.h#L247-L258 +// The USE(x, ...) template is used to silence C++ compiler warnings +// issued for (yet) unused variables (typically parameters). +// The arguments are guaranteed to be evaluated from left to right. +struct Use { + template + Use(T&&) {} // NOLINT(runtime/explicit) +}; +#define USE(...) \ + do { \ + ::v8::base::Use unused_tmp_array_for_use_macro[]{__VA_ARGS__}; \ + (void)unused_tmp_array_for_use_macro; \ + } while (false) + +// Origin: +// https://github.com/v8/v8/blob/855591a54d160303349a5f0a32fab15825c708d1/src/base/safe_conversions.h#L35-L39 +// saturated_cast<> is analogous to static_cast<> for numeric types, except +// that the specified numeric conversion will saturate rather than overflow or +// underflow. +template +inline Dst saturated_cast(Src value); + +// This is the only specialization that is needed for regexp code. +// Instead of pulling in dozens of lines of template goo +// to derive it, I used the implementation from uint8_clamped in +// ArrayBufferObject.h. +template <> +inline uint8_t saturated_cast(int x) { + return (x >= 0) ? ((x < 255) ? uint8_t(x) : 255) : 0; +} + +namespace bits { + +inline uint64_t CountTrailingZeros(uint64_t value) { + return mozilla::CountTrailingZeroes64(value); +} + +} // namespace bits +} // namespace base + +namespace unibrow { + +using uchar = unsigned int; + +// Origin: +// https://github.com/v8/v8/blob/1f1e4cdb04c75eab77adbecd5f5514ddc3eb56cf/src/strings/unicode.h#L133-L150 +class Latin1 { + public: + static const uint16_t kMaxChar = 0xff; + + // Convert the character to Latin-1 case equivalent if possible. + static inline uint16_t TryConvertToLatin1(uint16_t c) { + // "GREEK CAPITAL LETTER MU" case maps to "MICRO SIGN". + // "GREEK SMALL LETTER MU" case maps to "MICRO SIGN". + if (c == 0x039C || c == 0x03BC) { + return 0xB5; + } + // "LATIN CAPITAL LETTER Y WITH DIAERESIS" case maps to "LATIN SMALL LETTER + // Y WITH DIAERESIS". + if (c == 0x0178) { + return 0xFF; + } + return c; + } +}; + +// Origin: +// https://github.com/v8/v8/blob/b4bfbce6f91fc2cc72178af42bb3172c5f5eaebb/src/strings/unicode.h#L99-L131 +class Utf16 { + public: + static inline bool IsLeadSurrogate(int code) { + return js::unicode::IsLeadSurrogate(code); + } + static inline bool IsTrailSurrogate(int code) { + return js::unicode::IsTrailSurrogate(code); + } + static inline uint16_t LeadSurrogate(uint32_t char_code) { + return js::unicode::LeadSurrogate(char_code); + } + static inline uint16_t TrailSurrogate(uint32_t char_code) { + return js::unicode::TrailSurrogate(char_code); + } + static inline uint32_t CombineSurrogatePair(char16_t lead, char16_t trail) { + return js::unicode::UTF16Decode(lead, trail); + } + static const uchar kMaxNonSurrogateCharCode = 0xffff; +}; + +// A cache used in case conversion. It caches the value for characters +// that either have no mapping or map to a single character independent +// of context. Characters that map to more than one character or that +// map differently depending on context are always looked up. +// Origin: +// https://github.com/v8/v8/blob/b4bfbce6f91fc2cc72178af42bb3172c5f5eaebb/src/strings/unicode.h#L64-L88 +template +class Mapping { + public: + inline Mapping() = default; + int get(uchar c, uchar n, uchar* result); + + private: + friend class Test; + int CalculateValue(uchar c, uchar n, uchar* result); + struct CacheEntry { + inline CacheEntry() : code_point_(kNoChar), offset_(0) {} + inline CacheEntry(uchar code_point, signed offset) + : code_point_(code_point), offset_(offset) {} + uchar code_point_; + signed offset_; + static const int kNoChar = (1 << 21) - 1; + }; + static const int kSize = size; + static const int kMask = kSize - 1; + CacheEntry entries_[kSize]; +}; + +// Origin: +// https://github.com/v8/v8/blob/b4bfbce6f91fc2cc72178af42bb3172c5f5eaebb/src/strings/unicode.h#L241-L252 +struct Ecma262Canonicalize { + static const int kMaxWidth = 1; + static int Convert(uchar c, uchar n, uchar* result, bool* allow_caching_ptr); +}; +struct Ecma262UnCanonicalize { + static const int kMaxWidth = 1; + static int Convert(uchar c, uchar n, uchar* result, bool* allow_caching_ptr); +}; +struct CanonicalizationRange { + static const int kMaxWidth = 1; + static int Convert(uchar c, uchar n, uchar* result, bool* allow_caching_ptr); +}; + +} // namespace unibrow + +namespace internal { + +#define PRINTF_FORMAT(x, y) MOZ_FORMAT_PRINTF(x, y) +void PRINTF_FORMAT(1, 2) PrintF(const char* format, ...); +void PRINTF_FORMAT(2, 3) PrintF(FILE* out, const char* format, ...); + +// Superclass for classes only using static method functions. +// The subclass of AllStatic cannot be instantiated at all. +class AllStatic { +#ifdef DEBUG + public: + AllStatic() = delete; +#endif +}; + +// Superclass for classes managed with new and delete. +// In irregexp, this is only AlternativeGeneration (in regexp-compiler.cc) +// Compare: +// https://github.com/v8/v8/blob/7b3332844212d78ee87a9426f3a6f7f781a8fbfa/src/utils/allocation.cc#L88-L96 +class Malloced { + public: + static void* operator new(size_t size) { + js::AutoEnterOOMUnsafeRegion oomUnsafe; + void* result = js_malloc(size); + if (!result) { + oomUnsafe.crash("Irregexp Malloced shim"); + } + return result; + } + static void operator delete(void* p) { js_free(p); } +}; + +constexpr int32_t KB = 1024; +constexpr int32_t MB = 1024 * 1024; + +#define kMaxInt JSVAL_INT_MAX +#define kMinInt JSVAL_INT_MIN +constexpr int kSystemPointerSize = sizeof(void*); + +// The largest integer n such that n and n + 1 are both exactly +// representable as a Number value. ES6 section 20.1.2.6 +constexpr double kMaxSafeInteger = 9007199254740991.0; // 2^53-1 + +// Latin1/UTF-16 constants +// Code-point values in Unicode 4.0 are 21 bits wide. +// Code units in UTF-16 are 16 bits wide. +using uc16 = uint16_t; +using uc32 = int32_t; + +constexpr int kBitsPerByte = 8; +constexpr int kBitsPerByteLog2 = 3; +constexpr int kUInt32Size = sizeof(uint32_t); +constexpr int kInt64Size = sizeof(int64_t); +constexpr int kUC16Size = sizeof(uc16); + +inline constexpr bool IsDecimalDigit(uc32 c) { return c >= '0' && c <= '9'; } +inline bool is_uint24(int val) { return (val & 0x00ffffff) == val; } + +inline bool IsIdentifierStart(uc32 c) { + return js::unicode::IsIdentifierStart(uint32_t(c)); +} +inline bool IsIdentifierPart(uc32 c) { + return js::unicode::IsIdentifierPart(uint32_t(c)); +} + +// Wrappers to disambiguate uint16_t and uc16. +struct AsUC16 { + explicit AsUC16(uint16_t v) : value(v) {} + uint16_t value; +}; + +struct AsUC32 { + explicit AsUC32(int32_t v) : value(v) {} + int32_t value; +}; + +class StdoutStream : public std::ostream {}; + +std::ostream& operator<<(std::ostream& os, const AsUC16& c); +std::ostream& operator<<(std::ostream& os, const AsUC32& c); + +// Reuse existing Maybe implementation +using mozilla::Maybe; + +template +Maybe Just(const T& value) { + return mozilla::Some(value); +} + +template +mozilla::Nothing Nothing() { + return mozilla::Nothing(); +} + +// Origin: +// https://github.com/v8/v8/blob/855591a54d160303349a5f0a32fab15825c708d1/src/utils/utils.h#L600-L642 +// Compare 8bit/16bit chars to 8bit/16bit chars. +// Used indirectly by regexp-interpreter.cc +template +inline int CompareCharsUnsigned(const lchar* lhs, const rchar* rhs, + size_t chars) { + const lchar* limit = lhs + chars; + if (sizeof(*lhs) == sizeof(char) && sizeof(*rhs) == sizeof(char)) { + // memcmp compares byte-by-byte, yielding wrong results for two-byte + // strings on little-endian systems. + return memcmp(lhs, rhs, chars); + } + while (lhs < limit) { + int r = static_cast(*lhs) - static_cast(*rhs); + if (r != 0) return r; + ++lhs; + ++rhs; + } + return 0; +} +template +inline int CompareChars(const lchar* lhs, const rchar* rhs, size_t chars) { + DCHECK_LE(sizeof(lchar), 2); + DCHECK_LE(sizeof(rchar), 2); + if (sizeof(lchar) == 1) { + if (sizeof(rchar) == 1) { + return CompareCharsUnsigned(reinterpret_cast(lhs), + reinterpret_cast(rhs), chars); + } else { + return CompareCharsUnsigned(reinterpret_cast(lhs), + reinterpret_cast(rhs), + chars); + } + } else { + if (sizeof(rchar) == 1) { + return CompareCharsUnsigned(reinterpret_cast(lhs), + reinterpret_cast(rhs), chars); + } else { + return CompareCharsUnsigned(reinterpret_cast(lhs), + reinterpret_cast(rhs), + chars); + } + } +} + +// Origin: +// https://github.com/v8/v8/blob/855591a54d160303349a5f0a32fab15825c708d1/src/utils/utils.h#L40-L48 +// Returns the value (0 .. 15) of a hexadecimal character c. +// If c is not a legal hexadecimal character, returns a value < 0. +// Used in regexp-parser.cc +inline int HexValue(uc32 c) { + c -= '0'; + if (static_cast(c) <= 9) return c; + c = (c | 0x20) - ('a' - '0'); // detect 0x11..0x16 and 0x31..0x36. + if (static_cast(c) <= 5) return c + 10; + return -1; +} + +// V8::Object ~= JS::Value +class Object { + public: + // The default object constructor in V8 stores a nullptr, + // which has its low bit clear and is interpreted as Smi(0). + constexpr Object() : value_(JS::Int32Value(0)) {} + + // Conversions to/from SpiderMonkey types + constexpr Object(JS::Value value) : value_(value) {} + operator JS::Value() const { return value_; } + + // Used in regexp-macro-assembler.cc and regexp-interpreter.cc to + // check the return value of isolate->stack_guard()->HandleInterrupts() + // In V8, this will be either an exception object or undefined. + // In SM, we store the exception in the context, so we can use our normal + // idiom: return false iff we are throwing an exception. + inline bool IsException(Isolate*) const { return !value_.toBoolean(); } + + // SpiderMonkey tries to avoid leaking the internal representation of its + // objects. V8 is not so strict. These functions are used when calling / + // being called by native code: objects are converted to Addresses for the + // call, then cast back to objects on the other side. + // We might be able to upstream a patch that eliminates the need for these. + Object(Address bits); + Address ptr() const; + + protected: + JS::Value value_; +}; + +class Smi : public Object { + public: + static Smi FromInt(int32_t value) { + Smi smi; + smi.value_ = JS::Int32Value(value); + return smi; + } + static inline int32_t ToInt(const Object object) { + return JS::Value(object).toInt32(); + } +}; + +// V8::HeapObject ~= JSObject +class HeapObject : public Object { +public: + // Only used for bookkeeping of total code generated in regexp-compiler. + // We may be able to refactor this away. + int Size() const; +}; + +// A fixed-size array with Objects (aka Values) as element types +// Implemented as a wrapper around a regular native object with dense elements. +class FixedArray : public HeapObject { + public: + inline void set(uint32_t index, Object value) { + JS::Value(*this).toObject().as().setDenseElement(index, + value); + } +}; + +// A fixed-size array of bytes. +// TODO: figure out the best implementation for this. Uint8Array might work, +// but it's not currently visible outside of TypedArrayObject.cpp. +class ByteArray : public HeapObject { + public: + uint8_t get(uint32_t index); + void set(uint32_t index, uint8_t val); + uint32_t length(); + byte* GetDataStartAddress(); + byte* GetDataEndAddress(); + + static ByteArray cast(Object object); +}; + +// Like Handles in SM, V8 handles are references to marked pointers. +// Unlike SM, where Rooted pointers are created individually on the +// stack, the target of a V8 handle lives in a HandleScope. +// HandleScopes are created on the stack and register themselves with +// the isolate (~= JSContext). Whenever a Handle is created, the +// outermost HandleScope is retrieved from the isolate, and a new root +// is created in that HandleScope. The Handle remains valid for the +// lifetime of the HandleScope. +class HandleScope { + public: + HandleScope(Isolate* isolate); +}; + +// Origin: +// https://github.com/v8/v8/blob/5792f3587116503fc047d2f68c951c72dced08a5/src/handles/handles.h#L88-L171 +template +class Handle { + public: + Handle(); + Handle(T object, Isolate* isolate); + + // Constructor for handling automatic up casting. + template ::value>::type> + /*inline*/ Handle(Handle handle); + + template + /*inline*/ static const Handle cast(Handle that); + + T* operator->() const; + T operator*() const; + + bool is_null() const; + + Address address(); + + private: + template + friend class Handle; + template + friend class MaybeHandle; +}; + +// A Handle can be converted into a MaybeHandle. Converting a MaybeHandle +// into a Handle requires checking that it does not point to nullptr. This +// ensures nullptr checks before use. +// +// Also note that Handles do not provide default equality comparison or hashing +// operators on purpose. Such operators would be misleading, because intended +// semantics is ambiguous between Handle location and object identity. +// Origin: +// https://github.com/v8/v8/blob/5792f3587116503fc047d2f68c951c72dced08a5/src/handles/maybe-handles.h#L15-L78 +template +class MaybeHandle final { + public: + MaybeHandle() = default; + + // Constructor for handling automatic up casting from Handle. + // Ex. Handle can be passed when MaybeHandle is expected. + template ::value>::type> + MaybeHandle(Handle handle); + + /*inline*/ Handle ToHandleChecked() const; + + // Convert to a Handle with a type that can be upcasted to. + template + /*inline*/ bool ToHandle(Handle* out) const; +}; + +// From v8/src/handles/handles-inl.h + +template +inline Handle handle(T object, Isolate* isolate) { + return Handle(object, isolate); +} + +// RAII Guard classes + +class DisallowHeapAllocation { + public: + DisallowHeapAllocation() {} + operator const JS::AutoAssertNoGC&() const { return no_gc_; } + + private: + const JS::AutoAssertNoGC no_gc_; +}; + +// This is used inside DisallowHeapAllocation regions to enable +// allocation just before throwing an exception, to allocate the +// exception object. Specifically, it only ever guards: +// - isolate->stack_guard()->HandleInterrupts() +// - isolate->StackOverflow() +// Those cases don't allocate in SpiderMonkey, so this can be a no-op. +class AllowHeapAllocation { + public: + // Empty constructor to avoid unused_variable warnings + AllowHeapAllocation() {} +}; + +class DisallowJavascriptExecution { + public: + DisallowJavascriptExecution(Isolate* isolate); + + private: + js::AutoAssertNoContentJS nojs_; +}; + +// Origin: +// https://github.com/v8/v8/blob/84f3877c15bc7f8956d21614da4311337525a3c8/src/objects/string.h#L83-L474 +class String : public HeapObject { + private: + JSString* str() const { return value_.toString(); } + + public: + operator JSString*() const { return str(); } + + // Max char codes. + static const int32_t kMaxOneByteCharCode = unibrow::Latin1::kMaxChar; + static const uint32_t kMaxOneByteCharCodeU = unibrow::Latin1::kMaxChar; + static const int kMaxUtf16CodeUnit = 0xffff; + static const uc32 kMaxCodePoint = 0x10ffff; + + MOZ_ALWAYS_INLINE int length() const { return str()->length(); } + uint16_t Get(uint32_t index); + bool IsFlat() { return str()->isLinear(); }; + + // These are only used in V8 functions that I want to rewrite. + // TODO: Rewrite those functions and delete this + bool IsConsString(); + bool IsExternalString(); + bool IsExternalOneByteString(); + bool IsExternalTwoByteString(); + bool IsSeqString(); + bool IsSeqOneByteString(); + bool IsSeqTwoByteString(); + bool IsSlicedString(); + bool IsThinString(); + + // Origin: + // https://github.com/v8/v8/blob/84f3877c15bc7f8956d21614da4311337525a3c8/src/objects/string.h#L95-L152 + class FlatContent { + public: + FlatContent(JSLinearString* string, const DisallowHeapAllocation& no_gc) + : string_(string), no_gc_(no_gc) {} + inline bool IsOneByte() const { return string_->hasLatin1Chars(); } + inline bool IsTwoByte() const { return !string_->hasLatin1Chars(); } + + Vector ToOneByteVector() const { + MOZ_ASSERT(IsOneByte()); + return Vector(string_->latin1Chars(no_gc_), + string_->length()); + } + Vector ToUC16Vector() const; + // TODO: twoByteChars returns char16_t*, but uc16 is uint16_t, which is + // not compatible :( :( :( + // { + // MOZ_ASSERT(IsTwoByte()); + // return Vector(string_->twoByteChars(no_gc_), + // string_->length()); + // } + private: + const JSLinearString* string_; + const JS::AutoAssertNoGC& no_gc_; + }; + FlatContent GetFlatContent(const DisallowHeapAllocation& no_gc) { + MOZ_ASSERT(IsFlat()); + return FlatContent(&str()->asLinear(), no_gc); + } + + static Handle Flatten(Isolate* isolate, Handle string); + + inline static String cast(Object object) { + String s; + s.value_ = JS::StringValue(JS::Value(object).toString()); + return s; + } + + inline static bool IsOneByteRepresentationUnderneath(String string) { + return string.str()->hasLatin1Chars(); + } + inline bool IsOneByteRepresentation() const { + return str()->hasLatin1Chars(); + } + + std::unique_ptr ToCString(); + + template + Vector GetCharVector(const DisallowHeapAllocation& no_gc); +}; + +// A flat string reader provides random access to the contents of a +// string independent of the character width of the string. The handle +// must be valid as long as the reader is being used. +// Origin: +// https://github.com/v8/v8/blob/84f3877c15bc7f8956d21614da4311337525a3c8/src/objects/string.h#L807-L825 +class MOZ_STACK_CLASS FlatStringReader { + public: + FlatStringReader(JSAtom* string) : string_(string) {} + int length() { return string_->length(); } + + inline char16_t Get(size_t index) { + return string_->latin1OrTwoByteChar(index); + } + + private: + JSAtom* string_; + JS::AutoCheckCannotGC nogc; +}; + +////////////////////////////////////////////////// +// TODO: Refactor NativeRegExpMacroAssembler and delete all of these: +class ConsString : public String { + public: + String first(); + String second(); + + static ConsString cast(Object object); +}; +class ExternalOneByteString : public String { + public: + const uint8_t* GetChars(); + static ExternalOneByteString cast(Object object); +}; +class ExternalTwoByteString : public String { + public: + const uc16* GetChars(); + static ExternalTwoByteString cast(Object object); +}; +class SeqOneByteString : public String { + public: + uint8_t* GetChars(const DisallowHeapAllocation& no_gc); + static SeqOneByteString cast(Object object); +}; +class SeqTwoByteString : public String { + public: + uc16* GetChars(const DisallowHeapAllocation& no_gc); + static SeqTwoByteString cast(Object object); + + static constexpr size_t kMaxCharsSize = JSString::MAX_LENGTH * 2; +}; +class SlicedString : public String { + public: + String parent(); + int offset(); + static SlicedString cast(Object object); +}; +class ThinString : public String { + public: + String actual(); + static ThinString cast(Object object); +}; +class StringShape { + public: + explicit StringShape(const String s); + bool IsCons(); + bool IsSliced(); + bool IsThin(); +}; +// End of "TODO: Delete all of these" +////////////////////////////////////////////////// + +class JSRegExp : public HeapObject { + public: + // ****************************************************** + // Methods that are called from inside the implementation + // ****************************************************** + void TierUpTick(); + bool MarkedForTierUp() const; + + Object Code(bool is_latin1) const; + Object Bytecode(bool is_latin1) const; + + uint32_t BacktrackLimit() const; + + static JSRegExp cast(Object object); + + // ****************************** + // Static constants + // ****************************** + + // Meaning of Type: + // NOT_COMPILED: Initial value. No data has been stored in the JSRegExp yet. + // ATOM: A simple string to match against using an indexOf operation. + // IRREGEXP: Compiled with Irregexp. + enum Type { NOT_COMPILED, ATOM, IRREGEXP }; + + // Maximum number of captures allowed. + static constexpr int kMaxCaptures = 1 << 16; + + // ************************************************** + // JSRegExp::Flags + // ************************************************** + + struct FlagShiftBit { + static constexpr int kGlobal = 0; + static constexpr int kIgnoreCase = 1; + static constexpr int kMultiline = 2; + static constexpr int kSticky = 3; + static constexpr int kUnicode = 4; + static constexpr int kDotAll = 5; + static constexpr int kInvalid = 6; + }; + enum Flag : uint8_t { + kNone = 0, + kGlobal = 1 << FlagShiftBit::kGlobal, + kIgnoreCase = 1 << FlagShiftBit::kIgnoreCase, + kMultiline = 1 << FlagShiftBit::kMultiline, + kSticky = 1 << FlagShiftBit::kSticky, + kUnicode = 1 << FlagShiftBit::kUnicode, + kDotAll = 1 << FlagShiftBit::kDotAll, + kInvalid = 1 << FlagShiftBit::kInvalid, // Not included in FlagCount. + }; + using Flags = base::Flags; + static constexpr int kFlagCount = 6; + + static constexpr int kNoBacktrackLimit = 0; +}; + +class Histogram { + public: + inline void AddSample(int sample) {} +}; + +class Counters { + public: + Histogram* regexp_backtracks() { return ®exp_backtracks_; } + + private: + Histogram regexp_backtracks_; +}; + +#define PROFILE(isolate, call) \ + do { \ + } while (false); + +enum class AllocationType : uint8_t { + kYoung, // Allocate in the nursery + kOld, // Allocate in the tenured heap +}; + +using StackGuard = Isolate; +using Factory = Isolate; + +class Isolate { + public: + //********** Isolate code **********// + RegExpStack* regexp_stack() const { return regexp_stack_; } + bool has_pending_exception() { return cx()->isExceptionPending(); } + void StackOverflow() { js::ReportOverRecursed(cx()); } + + unibrow::Mapping* jsregexp_uncanonicalize(); + unibrow::Mapping* + regexp_macro_assembler_canonicalize(); + unibrow::Mapping* jsregexp_canonrange(); + + // An empty stub for telemetry we don't support + void IncreaseTotalRegexpCodeGenerated(int size) {} + + Counters* counters() { return &counters_; } + + //********** Factory code **********// + inline Factory* factory() { return this; } + + Handle NewByteArray( + int length, AllocationType allocation = AllocationType::kYoung); + MOZ_MUST_USE MaybeHandle NewStringFromOneByte( + const Vector& str, + AllocationType allocation = AllocationType::kYoung); + + // Allocates a fixed array initialized with undefined values. + Handle NewFixedArray( + int length, AllocationType allocation = AllocationType::kYoung); + + template + Handle InternalizeString(const Vector& str); + + //********** Stack guard code **********// + inline StackGuard* stack_guard() { return this; } + Object HandleInterrupts() { + return Object(JS::BooleanValue(cx()->handleInterrupt())); + } + + JSContext* cx() const { return cx_; } + + private: + JSContext* cx_; + RegExpStack* regexp_stack_; + Counters counters_; +}; + +// Origin: +// https://github.com/v8/v8/blob/50dcf2af54ce27801a71c47c1be1d2c5e36b0dd6/src/execution/isolate.h#L1909-L1931 +class StackLimitCheck { + public: + StackLimitCheck(Isolate* isolate) : cx_(isolate->cx()) {} + + // Use this to check for stack-overflows in C++ code. + bool HasOverflowed() { return !CheckRecursionLimitDontReport(cx_); } + + // Use this to check for interrupt request in C++ code. + bool InterruptRequested() { return cx_->hasAnyPendingInterrupt(); } + + // Use this to check for stack-overflow when entering runtime from JS code. + bool JsHasOverflowed() { + return !CheckRecursionLimitConservativeDontReport(cx_); + } + + private: + JSContext* cx_; +}; + +class Code { + public: + bool operator!=(Code& other) const; + + Address raw_instruction_start(); + Address raw_instruction_end(); + Address address(); + + static Code cast(Object object); +}; + +// GeneratedCode provides an interface for calling into jit code. +// It will probably require additional work to hook this up to the +// arm simulator. +// Origin: +// https://github.com/v8/v8/blob/abfbe7687edb5b2dffe0b33b24e0a41bb86a8214/src/execution/simulator.h#L96-L164 +template +class GeneratedCode { + public: + using Signature = Return(Args...); + + static GeneratedCode FromCode(Code code); // TODO: implement + Return Call(Args... args) { return fn_ptr_(args...); } + + private: + friend class GeneratedCode; + Isolate* isolate_; + Signature* fn_ptr_; + GeneratedCode(Isolate* isolate, Signature* fn_ptr) + : isolate_(isolate), fn_ptr_(fn_ptr) {} +}; + +// Allow to use {GeneratedCode} instead of +// {GeneratedCode}. +template +class GeneratedCode : public GeneratedCode { + public: + // Automatically convert from {GeneratedCode} to + // {GeneratedCode}. + GeneratedCode(GeneratedCode other) + : GeneratedCode(other.isolate_, other.fn_ptr_) {} +}; + +enum class MessageTemplate { kStackOverflow }; + +class MessageFormatter { + public: + static const char* TemplateString(MessageTemplate index) { + switch (index) { + case MessageTemplate::kStackOverflow: + return "too much recursion"; + } + } +}; + +// Origin: https://github.com/v8/v8/blob/master/src/codegen/label.h +class Label { + public: + Label() : inner_(js::jit::Label()) {} + + operator js::jit::Label*() { return &inner_; } + + void Unuse() { inner_.reset(); } + + bool is_linked() { return inner_.used(); } + bool is_bound() { return inner_.bound(); } + bool is_unused() { return !inner_.used() && !inner_.bound(); } + + int pos() { return inner_.offset(); } + void link_to(int pos) { inner_.use(pos); } + void bind_to(int pos) { inner_.bind(pos); } + + private: + js::jit::Label inner_; +}; + +// TODO: Map flags to jitoptions +extern bool FLAG_correctness_fuzzer_suppressions; +extern bool FLAG_enable_regexp_unaligned_accesses; +extern bool FLAG_harmony_regexp_sequence; +extern bool FLAG_regexp_interpret_all; +extern bool FLAG_regexp_mode_modifiers; +extern bool FLAG_regexp_optimization; +extern bool FLAG_regexp_peephole_optimization; +extern bool FLAG_regexp_possessive_quantifier; +extern bool FLAG_regexp_tier_up; +extern bool FLAG_trace_regexp_assembler; +extern bool FLAG_trace_regexp_bytecodes; +extern bool FLAG_trace_regexp_parser; +extern bool FLAG_trace_regexp_peephole_optimization; + +} // namespace internal +} // namespace v8 + +#endif // RegexpShim_h diff --git a/js/src/regexp/regexp-stack.h b/js/src/regexp/regexp-stack.h index 2c2cf26304..812195ad12 100644 --- a/js/src/regexp/regexp-stack.h +++ b/js/src/regexp/regexp-stack.h @@ -5,6 +5,7 @@ #ifndef V8_REGEXP_REGEXP_STACK_H_ #define V8_REGEXP_REGEXP_STACK_H_ +#include "regexp/regexp-shim.h" namespace v8 { namespace internal { diff --git a/js/src/regexp/regexp.h b/js/src/regexp/regexp.h index 1ee10da5c4..cce58da384 100644 --- a/js/src/regexp/regexp.h +++ b/js/src/regexp/regexp.h @@ -5,6 +5,7 @@ #ifndef V8_REGEXP_REGEXP_H_ #define V8_REGEXP_REGEXP_H_ +#include "regexp/regexp-shim.h" namespace v8 { namespace internal { diff --git a/js/src/regexp/util/flags.h b/js/src/regexp/util/flags.h new file mode 100644 index 0000000000..1fa421fc0c --- /dev/null +++ b/js/src/regexp/util/flags.h @@ -0,0 +1,93 @@ +// Copyright 2014 the V8 project authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef V8_UTIL_FLAGS_H_ +#define V8_UTIL_FLAGS_H_ + +// Origin: https://github.com/v8/v8/blob/1bafcc6b999b23ea1d394f5d267a08183e3c4e19/src/base/flags.h#L15-L90 + +namespace v8 { +namespace base { + +// The Flags class provides a type-safe way of storing OR-combinations of enum +// values. The Flags class is a template class, where T is an enum type, +// and S is the underlying storage type (usually int). +// +// The traditional C++ approach for storing OR-combinations of enum values is to +// use an int or unsigned int variable. The inconvenience with this approach is +// that there's no type checking at all; any enum value can be OR'd with any +// other enum value and passed on to a function that takes an int or unsigned +// int. +template +class Flags final { + public: + using flag_type = T; + using mask_type = S; + + constexpr Flags() : mask_(0) {} + constexpr Flags(flag_type flag) + : mask_(static_cast(flag)) {} + constexpr explicit Flags(mask_type mask) : mask_(static_cast(mask)) {} + + constexpr bool operator==(flag_type flag) const { + return mask_ == static_cast(flag); + } + constexpr bool operator!=(flag_type flag) const { + return mask_ != static_cast(flag); + } + + Flags& operator&=(const Flags& flags) { + mask_ &= flags.mask_; + return *this; + } + Flags& operator|=(const Flags& flags) { + mask_ |= flags.mask_; + return *this; + } + Flags& operator^=(const Flags& flags) { + mask_ ^= flags.mask_; + return *this; + } + + constexpr Flags operator&(const Flags& flags) const { + return Flags(mask_ & flags.mask_); + } + constexpr Flags operator|(const Flags& flags) const { + return Flags(mask_ | flags.mask_); + } + constexpr Flags operator^(const Flags& flags) const { + return Flags(mask_ ^ flags.mask_); + } + + Flags& operator&=(flag_type flag) { return operator&=(Flags(flag)); } + Flags& operator|=(flag_type flag) { return operator|=(Flags(flag)); } + Flags& operator^=(flag_type flag) { return operator^=(Flags(flag)); } + + constexpr Flags operator&(flag_type flag) const { + return operator&(Flags(flag)); + } + constexpr Flags operator|(flag_type flag) const { + return operator|(Flags(flag)); + } + constexpr Flags operator^(flag_type flag) const { + return operator^(Flags(flag)); + } + + constexpr Flags operator~() const { return Flags(~mask_); } + + constexpr operator mask_type() const { return mask_; } + constexpr bool operator!() const { return !mask_; } + + Flags without(flag_type flag) { return *this & (~Flags(flag)); } + + friend size_t hash_value(const Flags& flags) { return flags.mask_; } + + private: + mask_type mask_; +}; + +} // namespace base +} // namespace v8 + +#endif // V8_UTIL_FLAG_H_ diff --git a/js/src/regexp/util/vector.h b/js/src/regexp/util/vector.h new file mode 100644 index 0000000000..2419447d67 --- /dev/null +++ b/js/src/regexp/util/vector.h @@ -0,0 +1,204 @@ +// Copyright 2014 the V8 project authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef V8_UTIL_VECTOR_H_ +#define V8_UTIL_VECTOR_H_ + +#include +#include +#include +#include + +#include "js/Utility.h" + +namespace v8 { +namespace internal { + +////////////////////////////////////////////////// + +// Adapted from: https://github.com/v8/v8/blob/5f69bbc233c2d1baf149faf869a7901603929914/src/utils/allocation.h#L36-L58 + +template +T* NewArray(size_t size) { + static_assert(std::is_pod::value, ""); + js::AutoEnterOOMUnsafeRegion oomUnsafe; + T* result = static_cast(js_malloc(size * sizeof(T))); + if (!result) { + oomUnsafe.crash("Irregexp NewArray"); + } + return result; +} + +template +void DeleteArray(T* array) { + js_free(array); +} + +////////////////////////////////////////////////// + +// A non-resizable vector containing a pointer and a length. +// The Vector may or may not own the pointer, depending on context. +// Origin: +// https://github.com/v8/v8/blob/5f69bbc233c2d1baf149faf869a7901603929914/src/utils/vector.h#L20-L134 + +template +class Vector { + public: + constexpr Vector() : start_(nullptr), length_(0) {} + + constexpr Vector(T* data, size_t length) : start_(data), length_(length) { + MOZ_ASSERT_IF(length != 0, data != nullptr); + } + + static Vector New(size_t length) { + return Vector(NewArray(length), length); + } + + // Returns a vector using the same backing storage as this one, + // spanning from and including 'from', to but not including 'to'. + Vector SubVector(size_t from, size_t to) const { + MOZ_ASSERT(from < to); + MOZ_ASSERT(to < length_); + return Vector(begin() + from, to - from); + } + + // Returns the length of the vector. Only use this if you really need an + // integer return value. Use {size()} otherwise. + int length() const { + MOZ_ASSERT(length_ <= std::numeric_limits::max()); + return static_cast(length_); + } + + // Returns the length of the vector as a size_t. + constexpr size_t size() const { return length_; } + + // Returns whether or not the vector is empty. + constexpr bool empty() const { return length_ == 0; } + + // Access individual vector elements - checks bounds in debug mode. + T& operator[](size_t index) const { + MOZ_ASSERT(index < length_); + return start_[index]; + } + + const T& at(size_t index) const { return operator[](index); } + + T& first() { return start_[0]; } + + T& last() { + MOZ_ASSERT(length_ > 0); + return start_[length_ - 1]; + } + + // Returns a pointer to the start of the data in the vector. + constexpr T* begin() const { return start_; } + + // Returns a pointer past the end of the data in the vector. + constexpr T* end() const { return start_ + length_; } + + // Returns a clone of this vector with a new backing store. + Vector Clone() const { + T* result = NewArray(length_); + for (size_t i = 0; i < length_; i++) result[i] = start_[i]; + return Vector(result, length_); + } + + void Truncate(size_t length) { + MOZ_ASSERT(length <= length_); + length_ = length; + } + + // Releases the array underlying this vector. Once disposed the + // vector is empty. + void Dispose() { + DeleteArray(start_); + start_ = nullptr; + length_ = 0; + } + + Vector operator+(size_t offset) { + MOZ_ASSERT(offset <= length_); + return Vector(start_ + offset, length_ - offset); + } + + Vector operator+=(size_t offset) { + MOZ_ASSERT(offset <= length_); + start_ += offset; + length_ -= offset; + return *this; + } + + // Implicit conversion from Vector to Vector. + inline operator Vector() const { + return Vector::cast(*this); + } + + template + static constexpr Vector cast(Vector input) { + return Vector(reinterpret_cast(input.begin()), + input.length() * sizeof(S) / sizeof(T)); + } + + bool operator==(const Vector other) const { + if (length_ != other.length_) return false; + if (start_ == other.start_) return true; + for (size_t i = 0; i < length_; ++i) { + if (start_[i] != other.start_[i]) { + return false; + } + } + return true; + } + + private: + T* start_; + size_t length_; +}; + +// The resulting vector does not contain a null-termination byte. If you want +// the null byte, use ArrayVector("foo"). +inline Vector CStrVector(const char* data) { + return Vector(data, strlen(data)); +} + +} // namespace internal + +namespace base { + +// SmallVector uses inline storage first, and reallocates when full. +// It is basically equivalent to js::Vector, and is implemented +// as a thin wrapper. +// V8's implementation: https://github.com/v8/v8/blob/master/src/base/small-vector.h +template +class SmallVector { +public: + inline bool empty() const { return inner_.empty(); } + inline const T& back() const { return inner_.back(); } + inline void pop_back() { inner_.popBack(); }; + template + inline void emplace_back(Args&&... args) { + js::AutoEnterOOMUnsafeRegion oomUnsafe; + if (!inner_.emplaceBack(args...)) { + oomUnsafe.crash("Irregexp SmallVector emplace_back"); + } + }; + inline size_t size() const { return inner_.length(); } + inline const T& at(size_t index) const { return inner_[index]; } + + void resize_no_init(size_t new_size) { + js::AutoEnterOOMUnsafeRegion oomUnsafe; + if (!inner_.resizeUninitialized(new_size)) { + oomUnsafe.crash("Irregexp SmallVector resize"); + } + } +private: + js::Vector inner_; +}; + + +} // namespace base + +} // namespace v8 + +#endif // V8_UTIL_VECTOR_H_ diff --git a/js/src/regexp/util/zone.h b/js/src/regexp/util/zone.h new file mode 100644 index 0000000000..32487f06a6 --- /dev/null +++ b/js/src/regexp/util/zone.h @@ -0,0 +1,356 @@ +// Copyright 2019 the V8 project authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef V8_UTIL_ZONE_H_ +#define V8_UTIL_ZONE_H_ + +#include +#include +#include +#include +#include + +#include "ds/LifoAlloc.h" +#include "regexp/util/vector.h" + +namespace v8 { +namespace internal { + +// V8::Zone ~= LifoAlloc +class Zone { + public: + Zone(size_t defaultChunkSize) : lifoAlloc_(defaultChunkSize) { + lifoAlloc_.setAsInfallibleByDefault(); + } + + void* New(size_t size) { return lifoAlloc_.alloc(size); } + + void DeleteAll() { lifoAlloc_.freeAll(); } + + // Returns true if the total memory allocated exceeds a threshold. + static const size_t kExcessLimit = 256 * 1024 * 1024; + bool excess_allocation() const { + return lifoAlloc_.peakSizeOfExcludingThis() > kExcessLimit; + } +private: + js::LifoAlloc lifoAlloc_; +}; + +// Superclass for classes allocated in a Zone. +// Origin: https://github.com/v8/v8/blob/7b3332844212d78ee87a9426f3a6f7f781a8fbfa/src/zone/zone.h#L138-L155 +class ZoneObject { + public: + // Allocate a new ZoneObject of 'size' bytes in the Zone. + void* operator new(size_t size, Zone* zone) { return zone->New(size); } + + // Ideally, the delete operator should be private instead of + // public, but unfortunately the compiler sometimes synthesizes + // (unused) destructors for classes derived from ZoneObject, which + // require the operator to be visible. MSVC requires the delete + // operator to be public. + + // ZoneObjects should never be deleted individually; use + // Zone::DeleteAll() to delete all zone objects in one go. + void operator delete(void*, size_t) { MOZ_CRASH("unreachable"); } + void operator delete(void* pointer, Zone* zone) { MOZ_CRASH("unreachable"); } +}; + +// ZoneLists are growable lists with constant-time access to the +// elements. The list itself and all its elements are allocated in the +// Zone. ZoneLists cannot be deleted individually; you can delete all +// objects in the Zone by calling Zone::DeleteAll(). +// Used throughout irregexp. +// Origin: https://github.com/v8/v8/blob/5e514a969376dc63517d575b062758efd36cd757/src/zone/zone.h#L173-L318 +// Inlines: https://github.com/v8/v8/blob/5e514a969376dc63517d575b062758efd36cd757/src/zone/zone-list-inl.h#L17-L155 +template +class ZoneList final { + public: + // Construct a new ZoneList with the given capacity; the length is + // always zero. The capacity must be non-negative. + ZoneList(int capacity, Zone* zone) { Initialize(capacity, zone); } + // Construct a new ZoneList from a std::initializer_list + ZoneList(std::initializer_list list, Zone* zone) { + Initialize(static_cast(list.size()), zone); + for (auto& i : list) Add(i, zone); + } + // Construct a new ZoneList by copying the elements of the given ZoneList. + ZoneList(const ZoneList& other, Zone* zone) { + Initialize(other.length(), zone); + AddAll(other, zone); + } + + void* operator new(size_t size, Zone* zone) { return zone->New(size); } + + // Returns a reference to the element at index i. This reference is not safe + // to use after operations that can change the list's backing store + // (e.g. Add). + inline T& operator[](int i) const { + MOZ_ASSERT(0 < i); + MOZ_ASSERT(static_cast(i) < static_cast(length_)); + return data_[i]; + } + inline T& at(int i) const { return operator[](i); } + inline T& last() const { return at(length_ - 1); } + inline T& first() const { return at(0); } + + using iterator = T*; + inline iterator begin() const { return &data_[0]; } + inline iterator end() const { return &data_[length_]; } + + inline bool is_empty() const { return length_ == 0; } + inline int length() const { return length_; } + inline int capacity() const { return capacity_; } + + Vector ToVector() const { return Vector(data_, length_); } + Vector ToVector(int start, int length) const { + return Vector(data_ + start, std::min(length_ - start, length)); + } + + Vector ToConstVector() const { + return Vector(data_, length_); + } + + inline void Initialize(int capacity, Zone* zone) { + MOZ_ASSERT(capacity >= 0); + data_ = (capacity > 0) ? NewData(capacity, zone) : nullptr; + capacity_ = capacity; + length_ = 0; + } + + // Adds a copy of the given 'element' to the end of the list, + // expanding the list if necessary. + void Add(const T& element, Zone* zone) { + if (length_ < capacity_) { + data_[length_++] = element; + } else { + ZoneList::ResizeAdd(element, zone); + } + } + // Add all the elements from the argument list to this list. + void AddAll(const ZoneList& other, Zone* zone) { + AddAll(other.ToVector(), zone); + } + // Add all the elements from the vector to this list. + void AddAll(const Vector& other, Zone* zone) { + int result_length = length_ + other.length(); + if (capacity_ < result_length) { + Resize(result_length, zone); + } + if (std::is_fundamental()) { + memcpy(data_ + length_, other.begin(), sizeof(*data_) * other.length()); + } else { + for (int i = 0; i < other.length(); i++) { + data_[length_ + i] = other.at(i); + } + } + length_ = result_length; + } + + // Overwrites the element at the specific index. + void Set(int index, const T& element) { + MOZ_ASSERT(index >= 0 && index <= length_); + data_[index] = element; + } + + // Removes the i'th element without deleting it even if T is a + // pointer type; moves all elements above i "down". Returns the + // removed element. This function's complexity is linear in the + // size of the list. + T Remove(int i) { + T element = at(i); + length_--; + while (i < length_) { + data_[i] = data_[i + 1]; + i++; + } + return element; + } + + // Removes the last element without deleting it even if T is a + // pointer type. Returns the removed element. + inline T RemoveLast() { return Remove(length_ - 1); } + + // Clears the list by freeing the storage memory. If you want to keep the + // memory, use Rewind(0) instead. Be aware, that even if T is a + // pointer type, clearing the list doesn't delete the entries. + inline void Clear() { + data_ = nullptr; + capacity_ = 0; + length_ = 0; + } + + // Drops all but the first 'pos' elements from the list. + inline void Rewind(int pos) { + MOZ_ASSERT(0 <= pos && pos <= length_); + length_ = pos; + } + + inline bool Contains(const T& elm) const { + for (int i = 0; i < length_; i++) { + if (data_[i] == elm) return true; + } + return false; + } + + template + void StableSort(CompareFunction cmp, size_t s, size_t l) { + std::stable_sort(begin() + s, begin() + s + l, + [cmp](const T& a, const T& b) { return cmp(&a, &b) < 0; }); + } + + void operator delete(void* pointer) { MOZ_CRASH("unreachable"); } + void operator delete(void* pointer, Zone* zone) { MOZ_CRASH("unreachable"); } + + private: + T* data_; + int capacity_; + int length_; + + inline T* NewData(int n, Zone* zone) { + return static_cast(zone->New(n * sizeof(T))); + } + + // Increase the capacity of a full list, and add an element. + // List must be full already. + void ResizeAdd(const T& element, Zone* zone) { + MOZ_ASSERT(length_ >= capacity_); + // Grow the list capacity by 100%, but make sure to let it grow + // even when the capacity is zero (possible initial case). + int new_capacity = 1 + 2 * capacity_; + // Since the element reference could be an element of the list, copy + // it out of the old backing storage before resizing. + T temp = element; + Resize(new_capacity, zone); + data_[length_++] = temp; + } + + // Resize the list. + void Resize(int new_capacity, Zone* zone) { + MOZ_ASSERT(length_ <= new_capacity); + T* new_data = NewData(new_capacity, zone); + if (length_ > 0) { + memcpy(new_data, data_, length_ * sizeof(T)); + } + data_ = new_data; + capacity_ = new_capacity; + } + + ZoneList& operator=(const ZoneList&) = delete; + ZoneList() = delete; + ZoneList(const ZoneList&) = delete; +}; + +// Origin: https://github.com/v8/v8/blob/5e514a969376dc63517d575b062758efd36cd757/src/zone/zone-allocator.h#L14-L77 +template +class ZoneAllocator { +public: + using pointer = T*; + using const_pointer = const T*; + using reference = T&; + using const_reference = const T&; + using value_type = T; + using size_type = size_t; + using difference_type = ptrdiff_t; + template + struct rebind { + using other = ZoneAllocator; + }; + + explicit ZoneAllocator(Zone* zone) : zone_(zone) {} + template + ZoneAllocator(const ZoneAllocator& other) + : ZoneAllocator(other.zone_) {} + template + friend class ZoneAllocator; + + T* allocate(size_t n) { return static_cast(zone_->New(n * sizeof(T))); } + void deallocate(T* p, size_t) {} // noop for zones + + bool operator==(ZoneAllocator const& other) const { + return zone_ == other.zone_; + } + bool operator!=(ZoneAllocator const& other) const { + return zone_ != other.zone_; + } + +private: + Zone* zone_; +}; + +// Zone wrappers for std containers: +// Origin: https://github.com/v8/v8/blob/5e514a969376dc63517d575b062758efd36cd757/src/zone/zone-containers.h#L25-L169 + +// A wrapper subclass for std::vector to make it easy to construct one +// that uses a zone allocator. +// Used throughout irregexp +template +class ZoneVector : public std::vector> { +public: + ZoneVector(Zone* zone) + : std::vector>(ZoneAllocator(zone)) {} + + // Constructs a new vector and fills it with the contents of the range + // [first, last). + template + ZoneVector(Iter first, Iter last, Zone* zone) + : std::vector>(first, last, ZoneAllocator(zone)) {} +}; + +// A wrapper subclass for std::list to make it easy to construct one +// that uses a zone allocator. +// Used in regexp-bytecode-peephole.cc +template +class ZoneLinkedList : public std::list> { + public: + // Constructs an empty list. + explicit ZoneLinkedList(Zone* zone) + : std::list>(ZoneAllocator(zone)) {} +}; + +// A wrapper subclass for std::set to make it easy to construct one that uses +// a zone allocator. +// Used in regexp-parser.cc +template > +class ZoneSet : public std::set> { + public: + // Constructs an empty set. + explicit ZoneSet(Zone* zone) + : std::set>(Compare(), + ZoneAllocator(zone)) {} +}; + +// A wrapper subclass for std::map to make it easy to construct one that uses +// a zone allocator. +// Used in regexp-bytecode-peephole.cc +template > +class ZoneMap + : public std::map>> { + public: + // Constructs an empty map. + explicit ZoneMap(Zone* zone) + : std::map>>( + Compare(), ZoneAllocator>(zone)) {} +}; + +// A wrapper subclass for std::unordered_map to make it easy to construct one +// that uses a zone allocator. +// Used in regexp-bytecode-peephole.cc +template , + typename KeyEqual = std::equal_to> +class ZoneUnorderedMap + : public std::unordered_map>> { + public: + // Constructs an empty map. + explicit ZoneUnorderedMap(Zone* zone, size_t bucket_count = 100) + : std::unordered_map>>( + bucket_count, Hash(), KeyEqual(), + ZoneAllocator>(zone)) {} +}; + +} // namespace internal +} // namespace v8 + +#endif // V8_UTIL_FLAG_H_ -- cgit v1.2.3