summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatt A. Tobin <email@mattatobin.com>2022-10-05 01:01:44 -0500
committerMatt A. Tobin <email@mattatobin.com>2022-10-05 01:01:44 -0500
commit22e29728789d598287d22bc290af666940937dd4 (patch)
tree289b630ca175531885f761eef321c418a17a4624
parent17d6fa34486b3a1ac88960c7e911c045d4645500 (diff)
downloadaura-central-22e29728789d598287d22bc290af666940937dd4.tar.gz
[JS:RegEx] Remove the unfinished and lost cause "updated" regex engine until such time it isn't a lost cause.
We are retaining the minor changes made elsewhere.
-rw-r--r--js/moz.configure13
-rw-r--r--js/src/moz.build3
-rw-r--r--js/src/new-regexp/RegExpTypes.h51
-rw-r--r--js/src/new-regexp/VERSION2
-rw-r--r--js/src/new-regexp/gen-regexp-special-case.cc165
-rw-r--r--js/src/new-regexp/import-irregexp.py143
-rw-r--r--js/src/new-regexp/moz.build42
-rw-r--r--js/src/new-regexp/property-sequences.cc1246
-rw-r--r--js/src/new-regexp/property-sequences.h27
-rw-r--r--js/src/new-regexp/regexp-ast.cc342
-rw-r--r--js/src/new-regexp/regexp-ast.h615
-rw-r--r--js/src/new-regexp/regexp-bytecode-generator-inl.h55
-rw-r--r--js/src/new-regexp/regexp-bytecode-generator.cc395
-rw-r--r--js/src/new-regexp/regexp-bytecode-generator.h119
-rw-r--r--js/src/new-regexp/regexp-bytecode-peephole.cc1028
-rw-r--r--js/src/new-regexp/regexp-bytecode-peephole.h30
-rw-r--r--js/src/new-regexp/regexp-bytecodes.cc45
-rw-r--r--js/src/new-regexp/regexp-bytecodes.h251
-rw-r--r--js/src/new-regexp/regexp-compiler-tonode.cc1589
-rw-r--r--js/src/new-regexp/regexp-compiler.cc3831
-rw-r--r--js/src/new-regexp/regexp-compiler.h621
-rw-r--r--js/src/new-regexp/regexp-dotprinter.cc252
-rw-r--r--js/src/new-regexp/regexp-dotprinter.h23
-rw-r--r--js/src/new-regexp/regexp-error.cc22
-rw-r--r--js/src/new-regexp/regexp-error.h57
-rw-r--r--js/src/new-regexp/regexp-interpreter.cc1039
-rw-r--r--js/src/new-regexp/regexp-interpreter.h61
-rw-r--r--js/src/new-regexp/regexp-macro-assembler-arch.h291
-rw-r--r--js/src/new-regexp/regexp-macro-assembler-tracer.cc418
-rw-r--r--js/src/new-regexp/regexp-macro-assembler-tracer.h80
-rw-r--r--js/src/new-regexp/regexp-macro-assembler.cc344
-rw-r--r--js/src/new-regexp/regexp-macro-assembler.h280
-rw-r--r--js/src/new-regexp/regexp-native-macro-assembler.cc1213
-rw-r--r--js/src/new-regexp/regexp-nodes.h750
-rw-r--r--js/src/new-regexp/regexp-parser.cc2109
-rw-r--r--js/src/new-regexp/regexp-parser.h363
-rw-r--r--js/src/new-regexp/regexp-shim.cc212
-rw-r--r--js/src/new-regexp/regexp-shim.h1181
-rw-r--r--js/src/new-regexp/regexp-stack.cc97
-rw-r--r--js/src/new-regexp/regexp-stack.h141
-rw-r--r--js/src/new-regexp/regexp.h195
-rw-r--r--js/src/new-regexp/special-case.cc88
-rw-r--r--js/src/new-regexp/special-case.h117
-rw-r--r--js/src/new-regexp/util/flags.h93
-rw-r--r--js/src/new-regexp/util/unicode.cc1865
-rw-r--r--js/src/new-regexp/util/vector.h204
-rw-r--r--js/src/new-regexp/util/zone.h375
47 files changed, 0 insertions, 22483 deletions
diff --git a/js/moz.configure b/js/moz.configure
index 3bbaf01a3..7687731f9 100644
--- a/js/moz.configure
+++ b/js/moz.configure
@@ -246,16 +246,3 @@ with only_when('--enable-compile-environment'):
set_config('LIBFUZZER', enable_libfuzzer)
set_define('LIBFUZZER', enable_libfuzzer)
-
-# Initial support for new regexp engine
-# ==================================================
-
-js_option('--enable-new-regexp', default=False, help='Enable new regexp engine')
-
-@depends('--enable-new-regexp')
-def enable_new_regexp(value):
- if value:
- return True
-
-set_config('JS_NEW_REGEXP', enable_new_regexp)
-set_define('JS_NEW_REGEXP', enable_new_regexp)
diff --git a/js/src/moz.build b/js/src/moz.build
index 7d653d828..a3a0f8791 100644
--- a/js/src/moz.build
+++ b/js/src/moz.build
@@ -122,9 +122,6 @@ if CONFIG['JS_HAS_CTYPES']:
if CONFIG['JS_BUNDLED_EDITLINE']:
DIRS += ['editline']
-if CONFIG['JS_NEW_REGEXP']:
- DIRS += ['new-regexp']
-
if not CONFIG['JS_DISABLE_SHELL']:
DIRS += ['shell']
diff --git a/js/src/new-regexp/RegExpTypes.h b/js/src/new-regexp/RegExpTypes.h
deleted file mode 100644
index e260b5bb6..000000000
--- a/js/src/new-regexp/RegExpTypes.h
+++ /dev/null
@@ -1,51 +0,0 @@
-/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
- * vim: set ts=8 sts=2 et sw=2 tw=80:
- * This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-
-// This file forward-defines Irregexp classes that need to be visible
-// to the rest of Spidermonkey and re-exports them into js::irregexp.
-
-#ifndef regexp_RegExpTypes_h
-#define regexp_RegExpTypes_h
-
-namespace js {
-class MatchPairs;
-}
-
-namespace v8 {
-namespace internal {
-
-struct InputOutputData {
- const void* inputStart;
- const void* inputEnd;
-
- // Index into inputStart (in chars) at which to begin matching.
- size_t startIndex;
-
- js::MatchPairs* matches;
-
- template <typename CharT>
- InputOutputData(const CharT* inputStart, const CharT* inputEnd,
- size_t startIndex, js::MatchPairs* matches)
- : inputStart(inputStart),
- inputEnd(inputEnd),
- startIndex(startIndex),
- matches(matches)
- {}
-};
-
-} // namespace internal
-} // namespace v8
-
-
-namespace js {
-namespace irregexp {
-
-using InputOutputData = v8::internal::InputOutputData;
-
-} // namespace irregexp
-} // namespace js
-
-#endif // regexp_RegExpTypes_h
diff --git a/js/src/new-regexp/VERSION b/js/src/new-regexp/VERSION
deleted file mode 100644
index c7d35a2bb..000000000
--- a/js/src/new-regexp/VERSION
+++ /dev/null
@@ -1,2 +0,0 @@
-Imported using import-irregexp.py from:
-https://github.com/v8/v8/tree/560f2d8bb3f3a72d78e1a7d7654235d53fdcc83c/src/regexp
diff --git a/js/src/new-regexp/gen-regexp-special-case.cc b/js/src/new-regexp/gen-regexp-special-case.cc
deleted file mode 100644
index 5a82c5d27..000000000
--- a/js/src/new-regexp/gen-regexp-special-case.cc
+++ /dev/null
@@ -1,165 +0,0 @@
-// Copyright 2020 the V8 project authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#include <fstream>
-#include <iomanip>
-#include <iostream>
-#include <sstream>
-
-#include "new-regexp/special-case.h"
-
-namespace v8 {
-namespace internal {
-
-static const uc32 kSurrogateStart = 0xd800;
-static const uc32 kSurrogateEnd = 0xdfff;
-static const uc32 kNonBmpStart = 0x10000;
-
-// The following code generates "src/regexp/special-case.cc".
-void PrintSet(std::ofstream& out, const char* name,
- const icu::UnicodeSet& set) {
- out << "icu::UnicodeSet Build" << name << "() {\n"
- << " icu::UnicodeSet set;\n";
- for (int32_t i = 0; i < set.getRangeCount(); i++) {
- if (set.getRangeStart(i) == set.getRangeEnd(i)) {
- out << " set.add(0x" << set.getRangeStart(i) << ");\n";
- } else {
- out << " set.add(0x" << set.getRangeStart(i) << ", 0x"
- << set.getRangeEnd(i) << ");\n";
- }
- }
- out << " set.freeze();\n"
- << " return set;\n"
- << "}\n\n";
-
- out << "struct " << name << "Data {\n"
- << " " << name << "Data() : set(Build" << name << "()) {}\n"
- << " const icu::UnicodeSet set;\n"
- << "};\n\n";
-
- out << "//static\n"
- << "const icu::UnicodeSet& RegExpCaseFolding::" << name << "() {\n"
- << " static base::LazyInstance<" << name << "Data>::type set =\n"
- << " LAZY_INSTANCE_INITIALIZER;\n"
- << " return set.Pointer()->set;\n"
- << "}\n\n";
-}
-
-void PrintSpecial(std::ofstream& out) {
- icu::UnicodeSet current;
- icu::UnicodeSet special_add;
- icu::UnicodeSet ignore;
- UErrorCode status = U_ZERO_ERROR;
- icu::UnicodeSet upper("[\\p{Lu}]", status);
- CHECK(U_SUCCESS(status));
-
- // Iterate through all chars in BMP except surrogates.
- for (UChar32 i = 0; i < kNonBmpStart; i++) {
- if (i >= kSurrogateStart && i <= kSurrogateEnd) {
- continue; // Ignore surrogate range
- }
- current.set(i, i);
- current.closeOver(USET_CASE_INSENSITIVE);
-
- // Check to see if all characters in the case-folding equivalence
- // class as defined by UnicodeSet::closeOver all map to the same
- // canonical value.
- UChar32 canonical = RegExpCaseFolding::Canonicalize(i);
- bool class_has_matching_canonical_char = false;
- bool class_has_non_matching_canonical_char = false;
- for (int32_t j = 0; j < current.getRangeCount(); j++) {
- for (UChar32 c = current.getRangeStart(j); c <= current.getRangeEnd(j);
- c++) {
- if (c == i) {
- continue;
- }
- UChar32 other_canonical = RegExpCaseFolding::Canonicalize(c);
- if (canonical == other_canonical) {
- class_has_matching_canonical_char = true;
- } else {
- class_has_non_matching_canonical_char = true;
- }
- }
- }
- // If any other character in i's equivalence class has a
- // different canonical value, then i needs special handling. If
- // no other character shares a canonical value with i, we can
- // ignore i when adding alternatives for case-independent
- // comparison. If at least one other character shares a
- // canonical value, then i needs special handling.
- if (class_has_non_matching_canonical_char) {
- if (class_has_matching_canonical_char) {
- special_add.add(i);
- } else {
- ignore.add(i);
- }
- }
- }
-
- // Verify that no Unicode equivalence class contains two non-trivial
- // JS equivalence classes. Every character in SpecialAddSet has the
- // same canonical value as every other non-IgnoreSet character in
- // its Unicode equivalence class. Therefore, if we call closeOver on
- // a set containing no IgnoreSet characters, the only characters
- // that must be removed from the result are in IgnoreSet. This fact
- // is used in CharacterRange::AddCaseEquivalents.
- for (int32_t i = 0; i < special_add.getRangeCount(); i++) {
- for (UChar32 c = special_add.getRangeStart(i);
- c <= special_add.getRangeEnd(i); c++) {
- UChar32 canonical = RegExpCaseFolding::Canonicalize(c);
- current.set(c, c);
- current.closeOver(USET_CASE_INSENSITIVE);
- current.removeAll(ignore);
- for (int32_t j = 0; j < current.getRangeCount(); j++) {
- for (UChar32 c2 = current.getRangeStart(j);
- c2 <= current.getRangeEnd(j); c2++) {
- CHECK_EQ(canonical, RegExpCaseFolding::Canonicalize(c2));
- }
- }
- }
- }
-
- PrintSet(out, "IgnoreSet", ignore);
- PrintSet(out, "SpecialAddSet", special_add);
-}
-
-void WriteHeader(const char* header_filename) {
- std::ofstream out(header_filename);
- out << std::hex << std::setfill('0') << std::setw(4);
- out << "// Copyright 2020 the V8 project authors. All rights reserved.\n"
- << "// Use of this source code is governed by a BSD-style license that\n"
- << "// can be found in the LICENSE file.\n\n"
- << "// Automatically generated by regexp/gen-regexp-special-case.cc\n\n"
- << "// The following functions are used to build UnicodeSets\n"
- << "// for special cases where the case-folding algorithm used by\n"
- << "// UnicodeSet::closeOver(USET_CASE_INSENSITIVE) does not match\n"
- << "// the algorithm defined in ECMAScript 2020 21.2.2.8.2 (Runtime\n"
- << "// Semantics: Canonicalize) step 3.\n\n"
- << "#ifdef V8_INTL_SUPPORT\n"
- << "#include \"src/base/lazy-instance.h\"\n\n"
- << "#include \"src/regexp/special-case.h\"\n\n"
- << "#include \"unicode/uniset.h\"\n"
- << "namespace v8 {\n"
- << "namespace internal {\n\n";
-
- PrintSpecial(out);
-
- out << "\n"
- << "} // namespace internal\n"
- << "} // namespace v8\n"
- << "#endif // V8_INTL_SUPPORT\n";
-}
-
-} // namespace internal
-} // namespace v8
-
-int main(int argc, const char** argv) {
- if (argc != 2) {
- std::cerr << "Usage: " << argv[0] << " <output filename>\n";
- std::exit(1);
- }
- v8::internal::WriteHeader(argv[1]);
-
- return 0;
-}
diff --git a/js/src/new-regexp/import-irregexp.py b/js/src/new-regexp/import-irregexp.py
deleted file mode 100644
index 870387232..000000000
--- a/js/src/new-regexp/import-irregexp.py
+++ /dev/null
@@ -1,143 +0,0 @@
-#!/usr/bin/env python3
-
-# This Source Code Form is subject to the terms of the Mozilla Public
-# License, v. 2.0. If a copy of the MPL was not distributed with this file,
-# You can obtain one at http://mozilla.org/MPL/2.0/.
-
-# This script handles all the mechanical steps of importing irregexp from v8:
-#
-# 1. Acquire the source: either from github, or optionally from a local copy of v8.
-# 2. Copy the contents of v8/src/regexp into js/src/regexp
-# - Exclude files that we have chosen not to import.
-# 3. While doing so, update #includes:
-# - Change "src/regexp/*" to "regexp/*".
-# - Remove other v8-specific headers completely.
-# 4. Add '#include "regexp/regexp-shim.h" in the necessary places.
-# 5. Update the VERSION file to include the correct git hash.
-#
-# Usage:
-# cd path/to/js/src/regexp
-# ./import-irregexp.py --path path/to/v8/src/regexp
-#
-# Alternatively, without the --path argument, import-irregexp.py will
-# clone v8 from github into a temporary directory.
-#
-# After running this script, changes to the shim code may be necessary
-# to account for changes in upstream irregexp.
-
-import os
-import re
-import subprocess
-import sys
-from pathlib import Path
-
-
-def get_hash(path):
- # Get the hash for the current git revision
- cwd = os.getcwd()
- os.chdir(path)
- command = ['git', 'rev-parse', 'HEAD']
- result = subprocess.check_output(command, encoding='utf-8')
- os.chdir(cwd)
- return result.rstrip()
-
-
-def copy_and_update_includes(src_path, dst_path):
- # List of header files that need to include the shim header
- need_shim = ['property-sequences.h',
- 'regexp-ast.h',
- 'regexp-bytecode-peephole.h',
- 'regexp-bytecodes.h',
- 'regexp-dotprinter.h',
- 'regexp.h',
- 'regexp-macro-assembler.h',
- 'regexp-stack.h',
- 'special-case.h']
-
- src = open(str(src_path), 'r')
- dst = open(str(dst_path), 'w')
-
- # 1. Rewrite includes of V8 regexp headers:
- regexp_include = re.compile('#include "src/regexp')
- regexp_include_new = '#include "regexp'
-
- # 2. Remove includes of other V8 headers
- other_include = re.compile('#include "src/')
-
- # 3. If needed, add '#include "regexp/regexp-shim.h"'.
- # Note: We get a little fancy to ensure that header files are
- # in alphabetic order. `need_to_add_shim` is true if we still
- # have to add the shim header in this file. `adding_shim_now`
- # is true if we have found a '#include "src/*' and we are just
- # waiting to find something alphabetically smaller (or an empty
- # line) so that we can insert the shim header in the right place.
- need_to_add_shim = src_path.name in need_shim
- adding_shim_now = False
-
- for line in src:
- if adding_shim_now:
- if (line == '\n' or line > '#include "src/regexp/regexp-shim.h"'):
- dst.write('#include "regexp/regexp-shim.h"\n')
- need_to_add_shim = False
- adding_shim_now = False
-
- if regexp_include.search(line):
- dst.write(re.sub(regexp_include, regexp_include_new, line))
- elif other_include.search(line):
- if need_to_add_shim:
- adding_shim_now = True
- else:
- dst.write(line)
-
-
-def import_from(srcdir, dstdir):
- excluded = ['OWNERS',
- 'regexp.cc',
- 'regexp-utils.cc',
- 'regexp-utils.h',
- 'regexp-macro-assembler-arch.h']
-
- for file in srcdir.iterdir():
- if file.is_dir():
- continue
- if str(file.name) in excluded:
- continue
- copy_and_update_includes(file, dstdir / file.name)
-
- # Update VERSION file
- hash = get_hash(srcdir)
- version_file = open(str(dstdir / 'VERSION'), 'w')
- version_file.write('Imported using import-irregexp.py from:\n')
- version_file.write('https://github.com/v8/v8/tree/%s/src/regexp\n' % hash)
-
-
-if __name__ == '__main__':
- import argparse
- import tempfile
-
- # This script should be run from js/src/regexp to work correctly.
- current_path = Path(os.getcwd())
- expected_path = 'js/src/regexp'
- if not current_path.match(expected_path):
- raise RuntimeError('%s must be run from %s' % (sys.argv[0],
- expected_path))
-
- parser = argparse.ArgumentParser(description='Import irregexp from v8')
- parser.add_argument('-p', '--path', help='path to v8/src/regexp')
- args = parser.parse_args()
-
- if args.path:
- src_path = Path(args.path)
-
- if not (src_path / 'regexp.h').exists():
- print('Usage:\n import-irregexp.py --path <path/to/v8/src/regexp>')
- sys.exit(1)
- import_from(src_path, current_path)
- sys.exit(0)
-
- with tempfile.TemporaryDirectory() as tempdir:
- v8_git = 'https://github.com/v8/v8.git'
- clone = 'git clone --depth 1 %s %s' % (v8_git, tempdir)
- os.system(clone)
- src_path = Path(tempdir) / 'src/regexp'
- import_from(src_path, current_path)
diff --git a/js/src/new-regexp/moz.build b/js/src/new-regexp/moz.build
deleted file mode 100644
index 2a8fab2ef..000000000
--- a/js/src/new-regexp/moz.build
+++ /dev/null
@@ -1,42 +0,0 @@
-# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*-
-# This Source Code Form is subject to the terms of the Mozilla Public
-# License, v. 2.0. If a copy of the MPL was not distributed with this
-# file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-include('../js-config.mozbuild')
-include('../js-cxxflags.mozbuild')
-
-FINAL_LIBRARY = "js"
-
-# Includes should be relative to parent path
-LOCAL_INCLUDES += ["!..", ".."]
-
-SOURCES += [
- 'regexp-ast.cc',
- 'regexp-bytecode-generator.cc',
- 'regexp-bytecode-peephole.cc',
- 'regexp-bytecodes.cc',
- 'regexp-compiler-tonode.cc',
- 'regexp-compiler.cc',
- 'regexp-dotprinter.cc',
- 'regexp-interpreter.cc',
- 'regexp-macro-assembler-tracer.cc',
- 'regexp-macro-assembler.cc',
- 'regexp-native-macro-assembler.cc',
- 'regexp-parser.cc',
- 'regexp-shim.cc',
- 'regexp-stack.cc',
- 'util/unicode.cc'
-]
-
-if CONFIG['ENABLE_INTL_API']:
- CXXFLAGS += ['-DV8_INTL_SUPPORT']
- SOURCES += [
- 'property-sequences.cc',
- 'special-case.cc'
- ]
-
-if CONFIG['_MSC_VER']:
- # This is intended as a temporary workaround to unblock compilation
- # on VS2015 in warnings as errors mode.
- CXXFLAGS += ['-wd4275'] \ No newline at end of file
diff --git a/js/src/new-regexp/property-sequences.cc b/js/src/new-regexp/property-sequences.cc
deleted file mode 100644
index ca1a7f2c3..000000000
--- a/js/src/new-regexp/property-sequences.cc
+++ /dev/null
@@ -1,1246 +0,0 @@
-// Copyright 2018 the V8 project authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#ifdef V8_INTL_SUPPORT
-
-#include "new-regexp/property-sequences.h"
-
-namespace v8 {
-namespace internal {
-
-/*
-Generated from following Node.js source:
-
-package.json
-
-```
-{
- "private": true,
- "dependencies": {
- "unicode-12.0.0": "^0.7.9"
- }
-}
-```
-
-generate-unicode-sequence-property-data.js
-
-```
-const toHex = (symbol) => {
- return '0x' + symbol.codePointAt(0).toString(16)
- .toUpperCase().padStart(6, '0');
-};
-
-const generateData = (property) => {
- const sequences =
- require(`unicode-12.0.0/Sequence_Property/${ property }/index.js`);
- const id = property.replace(/_/g, '') + 's';
- const buffer = [];
- for (const sequence of sequences) {
- const symbols = [...sequence];
- const codePoints = symbols.map(symbol => toHex(symbol));
- buffer.push(' ' + codePoints.join(', ') + ', 0,');
- }
- const output =
- `const uc32 UnicodePropertySequences::k${ id }[] = {\n` +
- `${ buffer.join('\n') }\n 0 // null-terminating the list\n};\n`;
- return output;
-};
-
-const properties = [
- 'Emoji_Flag_Sequence',
- 'Emoji_Tag_Sequence',
- 'Emoji_ZWJ_Sequence',
-];
-
-for (const property of properties) {
- console.log(generateData(property));
-}
-```
-*/
-
-// clang-format off
-const uc32 UnicodePropertySequences::kEmojiFlagSequences[] = {
- 0x01F1E6, 0x01F1E8, 0,
- 0x01F1FF, 0x01F1FC, 0,
- 0x01F1E6, 0x01F1EA, 0,
- 0x01F1E6, 0x01F1EB, 0,
- 0x01F1E6, 0x01F1EC, 0,
- 0x01F1E6, 0x01F1EE, 0,
- 0x01F1E6, 0x01F1F1, 0,
- 0x01F1E6, 0x01F1F2, 0,
- 0x01F1E6, 0x01F1F4, 0,
- 0x01F1E6, 0x01F1F6, 0,
- 0x01F1E6, 0x01F1F7, 0,
- 0x01F1E6, 0x01F1F8, 0,
- 0x01F1E6, 0x01F1F9, 0,
- 0x01F1E6, 0x01F1FA, 0,
- 0x01F1E6, 0x01F1FC, 0,
- 0x01F1E6, 0x01F1FD, 0,
- 0x01F1E6, 0x01F1FF, 0,
- 0x01F1E7, 0x01F1E6, 0,
- 0x01F1E7, 0x01F1E7, 0,
- 0x01F1E7, 0x01F1E9, 0,
- 0x01F1E7, 0x01F1EA, 0,
- 0x01F1E7, 0x01F1EB, 0,
- 0x01F1E7, 0x01F1EC, 0,
- 0x01F1E7, 0x01F1ED, 0,
- 0x01F1E7, 0x01F1EE, 0,
- 0x01F1E7, 0x01F1EF, 0,
- 0x01F1E7, 0x01F1F1, 0,
- 0x01F1E7, 0x01F1F2, 0,
- 0x01F1E7, 0x01F1F3, 0,
- 0x01F1E7, 0x01F1F4, 0,
- 0x01F1E7, 0x01F1F6, 0,
- 0x01F1E7, 0x01F1F7, 0,
- 0x01F1E7, 0x01F1F8, 0,
- 0x01F1E7, 0x01F1F9, 0,
- 0x01F1E7, 0x01F1FB, 0,
- 0x01F1E7, 0x01F1FC, 0,
- 0x01F1E7, 0x01F1FE, 0,
- 0x01F1E7, 0x01F1FF, 0,
- 0x01F1E8, 0x01F1E6, 0,
- 0x01F1E8, 0x01F1E8, 0,
- 0x01F1E8, 0x01F1E9, 0,
- 0x01F1E8, 0x01F1EB, 0,
- 0x01F1E8, 0x01F1EC, 0,
- 0x01F1E8, 0x01F1ED, 0,
- 0x01F1E8, 0x01F1EE, 0,
- 0x01F1E8, 0x01F1F0, 0,
- 0x01F1E8, 0x01F1F1, 0,
- 0x01F1E8, 0x01F1F2, 0,
- 0x01F1E8, 0x01F1F3, 0,
- 0x01F1E8, 0x01F1F4, 0,
- 0x01F1E8, 0x01F1F5, 0,
- 0x01F1E8, 0x01F1F7, 0,
- 0x01F1E8, 0x01F1FA, 0,
- 0x01F1E8, 0x01F1FB, 0,
- 0x01F1E8, 0x01F1FC, 0,
- 0x01F1E8, 0x01F1FD, 0,
- 0x01F1E8, 0x01F1FE, 0,
- 0x01F1E8, 0x01F1FF, 0,
- 0x01F1E9, 0x01F1EA, 0,
- 0x01F1E9, 0x01F1EC, 0,
- 0x01F1E9, 0x01F1EF, 0,
- 0x01F1E9, 0x01F1F0, 0,
- 0x01F1E9, 0x01F1F2, 0,
- 0x01F1E9, 0x01F1F4, 0,
- 0x01F1E9, 0x01F1FF, 0,
- 0x01F1EA, 0x01F1E6, 0,
- 0x01F1EA, 0x01F1E8, 0,
- 0x01F1EA, 0x01F1EA, 0,
- 0x01F1EA, 0x01F1EC, 0,
- 0x01F1EA, 0x01F1ED, 0,
- 0x01F1EA, 0x01F1F7, 0,
- 0x01F1EA, 0x01F1F8, 0,
- 0x01F1EA, 0x01F1F9, 0,
- 0x01F1EA, 0x01F1FA, 0,
- 0x01F1EB, 0x01F1EE, 0,
- 0x01F1EB, 0x01F1EF, 0,
- 0x01F1EB, 0x01F1F0, 0,
- 0x01F1EB, 0x01F1F2, 0,
- 0x01F1EB, 0x01F1F4, 0,
- 0x01F1EB, 0x01F1F7, 0,
- 0x01F1EC, 0x01F1E6, 0,
- 0x01F1EC, 0x01F1E7, 0,
- 0x01F1EC, 0x01F1E9, 0,
- 0x01F1EC, 0x01F1EA, 0,
- 0x01F1EC, 0x01F1EB, 0,
- 0x01F1EC, 0x01F1EC, 0,
- 0x01F1EC, 0x01F1ED, 0,
- 0x01F1EC, 0x01F1EE, 0,
- 0x01F1EC, 0x01F1F1, 0,
- 0x01F1EC, 0x01F1F2, 0,
- 0x01F1EC, 0x01F1F3, 0,
- 0x01F1EC, 0x01F1F5, 0,
- 0x01F1EC, 0x01F1F6, 0,
- 0x01F1EC, 0x01F1F7, 0,
- 0x01F1EC, 0x01F1F8, 0,
- 0x01F1EC, 0x01F1F9, 0,
- 0x01F1EC, 0x01F1FA, 0,
- 0x01F1EC, 0x01F1FC, 0,
- 0x01F1EC, 0x01F1FE, 0,
- 0x01F1ED, 0x01F1F0, 0,
- 0x01F1ED, 0x01F1F2, 0,
- 0x01F1ED, 0x01F1F3, 0,
- 0x01F1ED, 0x01F1F7, 0,
- 0x01F1ED, 0x01F1F9, 0,
- 0x01F1ED, 0x01F1FA, 0,
- 0x01F1EE, 0x01F1E8, 0,
- 0x01F1EE, 0x01F1E9, 0,
- 0x01F1EE, 0x01F1EA, 0,
- 0x01F1EE, 0x01F1F1, 0,
- 0x01F1EE, 0x01F1F2, 0,
- 0x01F1EE, 0x01F1F3, 0,
- 0x01F1EE, 0x01F1F4, 0,
- 0x01F1EE, 0x01F1F6, 0,
- 0x01F1EE, 0x01F1F7, 0,
- 0x01F1EE, 0x01F1F8, 0,
- 0x01F1EE, 0x01F1F9, 0,
- 0x01F1EF, 0x01F1EA, 0,
- 0x01F1EF, 0x01F1F2, 0,
- 0x01F1EF, 0x01F1F4, 0,
- 0x01F1EF, 0x01F1F5, 0,
- 0x01F1F0, 0x01F1EA, 0,
- 0x01F1F0, 0x01F1EC, 0,
- 0x01F1F0, 0x01F1ED, 0,
- 0x01F1F0, 0x01F1EE, 0,
- 0x01F1F0, 0x01F1F2, 0,
- 0x01F1F0, 0x01F1F3, 0,
- 0x01F1F0, 0x01F1F5, 0,
- 0x01F1F0, 0x01F1F7, 0,
- 0x01F1F0, 0x01F1FC, 0,
- 0x01F1E6, 0x01F1E9, 0,
- 0x01F1F0, 0x01F1FF, 0,
- 0x01F1F1, 0x01F1E6, 0,
- 0x01F1F1, 0x01F1E7, 0,
- 0x01F1F1, 0x01F1E8, 0,
- 0x01F1F1, 0x01F1EE, 0,
- 0x01F1F1, 0x01F1F0, 0,
- 0x01F1F1, 0x01F1F7, 0,
- 0x01F1F1, 0x01F1F8, 0,
- 0x01F1F1, 0x01F1F9, 0,
- 0x01F1F1, 0x01F1FA, 0,
- 0x01F1F1, 0x01F1FB, 0,
- 0x01F1F1, 0x01F1FE, 0,
- 0x01F1F2, 0x01F1E6, 0,
- 0x01F1F2, 0x01F1E8, 0,
- 0x01F1F2, 0x01F1E9, 0,
- 0x01F1F2, 0x01F1EA, 0,
- 0x01F1F2, 0x01F1EB, 0,
- 0x01F1F2, 0x01F1EC, 0,
- 0x01F1F2, 0x01F1ED, 0,
- 0x01F1F2, 0x01F1F0, 0,
- 0x01F1F2, 0x01F1F1, 0,
- 0x01F1F2, 0x01F1F2, 0,
- 0x01F1F2, 0x01F1F3, 0,
- 0x01F1F2, 0x01F1F4, 0,
- 0x01F1F2, 0x01F1F5, 0,
- 0x01F1F2, 0x01F1F6, 0,
- 0x01F1F2, 0x01F1F7, 0,
- 0x01F1F2, 0x01F1F8, 0,
- 0x01F1F2, 0x01F1F9, 0,
- 0x01F1F2, 0x01F1FA, 0,
- 0x01F1F2, 0x01F1FB, 0,
- 0x01F1F2, 0x01F1FC, 0,
- 0x01F1F2, 0x01F1FD, 0,
- 0x01F1F2, 0x01F1FE, 0,
- 0x01F1F2, 0x01F1FF, 0,
- 0x01F1F3, 0x01F1E6, 0,
- 0x01F1F3, 0x01F1E8, 0,
- 0x01F1F3, 0x01F1EA, 0,
- 0x01F1F3, 0x01F1EB, 0,
- 0x01F1F3, 0x01F1EC, 0,
- 0x01F1F3, 0x01F1EE, 0,
- 0x01F1F3, 0x01F1F1, 0,
- 0x01F1F3, 0x01F1F4, 0,
- 0x01F1F3, 0x01F1F5, 0,
- 0x01F1F3, 0x01F1F7, 0,
- 0x01F1F3, 0x01F1FA, 0,
- 0x01F1F3, 0x01F1FF, 0,
- 0x01F1F4, 0x01F1F2, 0,
- 0x01F1F5, 0x01F1E6, 0,
- 0x01F1F5, 0x01F1EA, 0,
- 0x01F1F5, 0x01F1EB, 0,
- 0x01F1F5, 0x01F1EC, 0,
- 0x01F1F5, 0x01F1ED, 0,
- 0x01F1F5, 0x01F1F0, 0,
- 0x01F1F5, 0x01F1F1, 0,
- 0x01F1F5, 0x01F1F2, 0,
- 0x01F1F5, 0x01F1F3, 0,
- 0x01F1F5, 0x01F1F7, 0,
- 0x01F1F5, 0x01F1F8, 0,
- 0x01F1F5, 0x01F1F9, 0,
- 0x01F1F5, 0x01F1FC, 0,
- 0x01F1F5, 0x01F1FE, 0,
- 0x01F1F6, 0x01F1E6, 0,
- 0x01F1F7, 0x01F1EA, 0,
- 0x01F1F7, 0x01F1F4, 0,
- 0x01F1F7, 0x01F1F8, 0,
- 0x01F1F7, 0x01F1FA, 0,
- 0x01F1F7, 0x01F1FC, 0,
- 0x01F1F8, 0x01F1E6, 0,
- 0x01F1F8, 0x01F1E7, 0,
- 0x01F1F8, 0x01F1E8, 0,
- 0x01F1F8, 0x01F1E9, 0,
- 0x01F1F8, 0x01F1EA, 0,
- 0x01F1F8, 0x01F1EC, 0,
- 0x01F1F8, 0x01F1ED, 0,
- 0x01F1F8, 0x01F1EE, 0,
- 0x01F1F8, 0x01F1EF, 0,
- 0x01F1F8, 0x01F1F0, 0,
- 0x01F1F8, 0x01F1F1, 0,
- 0x01F1F8, 0x01F1F2, 0,
- 0x01F1F8, 0x01F1F3, 0,
- 0x01F1F8, 0x01F1F4, 0,
- 0x01F1F8, 0x01F1F7, 0,
- 0x01F1F8, 0x01F1F8, 0,
- 0x01F1F8, 0x01F1F9, 0,
- 0x01F1F8, 0x01F1FB, 0,
- 0x01F1F8, 0x01F1FD, 0,
- 0x01F1F8, 0x01F1FE, 0,
- 0x01F1F8, 0x01F1FF, 0,
- 0x01F1F9, 0x01F1E6, 0,
- 0x01F1F9, 0x01F1E8, 0,
- 0x01F1F9, 0x01F1E9, 0,
- 0x01F1F9, 0x01F1EB, 0,
- 0x01F1F9, 0x01F1EC, 0,
- 0x01F1F9, 0x01F1ED, 0,
- 0x01F1F9, 0x01F1EF, 0,
- 0x01F1F9, 0x01F1F0, 0,
- 0x01F1F9, 0x01F1F1, 0,
- 0x01F1F9, 0x01F1F2, 0,
- 0x01F1F9, 0x01F1F3, 0,
- 0x01F1F9, 0x01F1F4, 0,
- 0x01F1F9, 0x01F1F7, 0,
- 0x01F1F9, 0x01F1F9, 0,
- 0x01F1F9, 0x01F1FB, 0,
- 0x01F1F9, 0x01F1FC, 0,
- 0x01F1F9, 0x01F1FF, 0,
- 0x01F1FA, 0x01F1E6, 0,
- 0x01F1FA, 0x01F1EC, 0,
- 0x01F1FA, 0x01F1F2, 0,
- 0x01F1FA, 0x01F1F3, 0,
- 0x01F1FA, 0x01F1F8, 0,
- 0x01F1FA, 0x01F1FE, 0,
- 0x01F1FA, 0x01F1FF, 0,
- 0x01F1FB, 0x01F1E6, 0,
- 0x01F1FB, 0x01F1E8, 0,
- 0x01F1FB, 0x01F1EA, 0,
- 0x01F1FB, 0x01F1EC, 0,
- 0x01F1FB, 0x01F1EE, 0,
- 0x01F1FB, 0x01F1F3, 0,
- 0x01F1FB, 0x01F1FA, 0,
- 0x01F1FC, 0x01F1EB, 0,
- 0x01F1FC, 0x01F1F8, 0,
- 0x01F1FD, 0x01F1F0, 0,
- 0x01F1FE, 0x01F1EA, 0,
- 0x01F1FE, 0x01F1F9, 0,
- 0x01F1FF, 0x01F1E6, 0,
- 0x01F1FF, 0x01F1F2, 0,
- 0x01F1F0, 0x01F1FE, 0,
- 0 // null-terminating the list
-};
-
-const uc32 UnicodePropertySequences::kEmojiTagSequences[] = {
- 0x01F3F4, 0x0E0067, 0x0E0062, 0x0E0065, 0x0E006E, 0x0E0067, 0x0E007F, 0,
- 0x01F3F4, 0x0E0067, 0x0E0062, 0x0E0073, 0x0E0063, 0x0E0074, 0x0E007F, 0,
- 0x01F3F4, 0x0E0067, 0x0E0062, 0x0E0077, 0x0E006C, 0x0E0073, 0x0E007F, 0,
- 0 // null-terminating the list
-};
-
-const uc32 UnicodePropertySequences::kEmojiZWJSequences[] = {
- 0x01F468, 0x00200D, 0x002764, 0x00FE0F, 0x00200D, 0x01F468, 0,
- 0x01F441, 0x00FE0F, 0x00200D, 0x01F5E8, 0x00FE0F, 0,
- 0x01F468, 0x00200D, 0x01F466, 0,
- 0x01F468, 0x00200D, 0x01F466, 0x00200D, 0x01F466, 0,
- 0x01F468, 0x00200D, 0x01F467, 0,
- 0x01F468, 0x00200D, 0x01F467, 0x00200D, 0x01F466, 0,
- 0x01F468, 0x00200D, 0x01F467, 0x00200D, 0x01F467, 0,
- 0x01F468, 0x00200D, 0x01F468, 0x00200D, 0x01F466, 0,
- 0x01F468, 0x00200D, 0x01F468, 0x00200D, 0x01F466, 0x00200D, 0x01F466, 0,
- 0x01F468, 0x00200D, 0x01F468, 0x00200D, 0x01F467, 0,
- 0x01F468, 0x00200D, 0x01F468, 0x00200D, 0x01F467, 0x00200D, 0x01F466, 0,
- 0x01F468, 0x00200D, 0x01F468, 0x00200D, 0x01F467, 0x00200D, 0x01F467, 0,
- 0x01F468, 0x00200D, 0x01F469, 0x00200D, 0x01F466, 0,
- 0x01F468, 0x00200D, 0x01F469, 0x00200D, 0x01F466, 0x00200D, 0x01F466, 0,
- 0x01F468, 0x00200D, 0x01F469, 0x00200D, 0x01F467, 0,
- 0x01F468, 0x00200D, 0x01F469, 0x00200D, 0x01F467, 0x00200D, 0x01F466, 0,
- 0x01F468, 0x00200D, 0x01F469, 0x00200D, 0x01F467, 0x00200D, 0x01F467, 0,
- 0x01F468, 0x01F3FC, 0x00200D, 0x01F91D, 0x00200D, 0x01F468, 0x01F3FB, 0,
- 0x01F468, 0x01F3FD, 0x00200D, 0x01F91D, 0x00200D, 0x01F468, 0x01F3FB, 0,
- 0x01F468, 0x01F3FD, 0x00200D, 0x01F91D, 0x00200D, 0x01F468, 0x01F3FC, 0,
- 0x01F468, 0x01F3FE, 0x00200D, 0x01F91D, 0x00200D, 0x01F468, 0x01F3FB, 0,
- 0x01F468, 0x01F3FE, 0x00200D, 0x01F91D, 0x00200D, 0x01F468, 0x01F3FC, 0,
- 0x01F468, 0x01F3FE, 0x00200D, 0x01F91D, 0x00200D, 0x01F468, 0x01F3FD, 0,
- 0x01F468, 0x01F3FF, 0x00200D, 0x01F91D, 0x00200D, 0x01F468, 0x01F3FB, 0,
- 0x01F468, 0x01F3FF, 0x00200D, 0x01F91D, 0x00200D, 0x01F468, 0x01F3FC, 0,
- 0x01F468, 0x01F3FF, 0x00200D, 0x01F91D, 0x00200D, 0x01F468, 0x01F3FD, 0,
- 0x01F468, 0x01F3FF, 0x00200D, 0x01F91D, 0x00200D, 0x01F468, 0x01F3FE, 0,
- 0x01F469, 0x00200D, 0x002764, 0x00FE0F, 0x00200D, 0x01F468, 0,
- 0x01F469, 0x00200D, 0x002764, 0x00FE0F, 0x00200D, 0x01F469, 0,
- 0x01F469, 0x00200D, 0x002764, 0x00FE0F, 0x00200D, 0x01F48B, 0x00200D,
- 0x01F468, 0,
- 0x01F469, 0x00200D, 0x002764, 0x00FE0F, 0x00200D, 0x01F48B, 0x00200D,
- 0x01F469, 0,
- 0x01F469, 0x00200D, 0x01F466, 0,
- 0x01F469, 0x00200D, 0x01F466, 0x00200D, 0x01F466, 0,
- 0x01F469, 0x00200D, 0x01F467, 0,
- 0x01F469, 0x00200D, 0x01F467, 0x00200D, 0x01F466, 0,
- 0x01F469, 0x00200D, 0x01F467, 0x00200D, 0x01F467, 0,
- 0x01F469, 0x00200D, 0x01F469, 0x00200D, 0x01F466, 0,
- 0x01F469, 0x00200D, 0x01F469, 0x00200D, 0x01F466, 0x00200D, 0x01F466, 0,
- 0x01F469, 0x00200D, 0x01F469, 0x00200D, 0x01F467, 0,
- 0x01F469, 0x00200D, 0x01F469, 0x00200D, 0x01F467, 0x00200D, 0x01F466, 0,
- 0x01F469, 0x00200D, 0x01F469, 0x00200D, 0x01F467, 0x00200D, 0x01F467, 0,
- 0x01F469, 0x01F3FB, 0x00200D, 0x01F91D, 0x00200D, 0x01F468, 0x01F3FC, 0,
- 0x01F469, 0x01F3FB, 0x00200D, 0x01F91D, 0x00200D, 0x01F468, 0x01F3FD, 0,
- 0x01F469, 0x01F3FB, 0x00200D, 0x01F91D, 0x00200D, 0x01F468, 0x01F3FE, 0,
- 0x01F469, 0x01F3FB, 0x00200D, 0x01F91D, 0x00200D, 0x01F468, 0x01F3FF, 0,
- 0x01F469, 0x01F3FC, 0x00200D, 0x01F91D, 0x00200D, 0x01F468, 0x01F3FB, 0,
- 0x01F469, 0x01F3FC, 0x00200D, 0x01F91D, 0x00200D, 0x01F468, 0x01F3FD, 0,
- 0x01F469, 0x01F3FC, 0x00200D, 0x01F91D, 0x00200D, 0x01F468, 0x01F3FE, 0,
- 0x01F469, 0x01F3FC, 0x00200D, 0x01F91D, 0x00200D, 0x01F468, 0x01F3FF, 0,
- 0x01F469, 0x01F3FC, 0x00200D, 0x01F91D, 0x00200D, 0x01F469, 0x01F3FB, 0,
- 0x01F469, 0x01F3FD, 0x00200D, 0x01F91D, 0x00200D, 0x01F468, 0x01F3FB, 0,
- 0x01F469, 0x01F3FD, 0x00200D, 0x01F91D, 0x00200D, 0x01F468, 0x01F3FC, 0,
- 0x01F469, 0x01F3FD, 0x00200D, 0x01F91D, 0x00200D, 0x01F468, 0x01F3FE, 0,
- 0x01F469, 0x01F3FD, 0x00200D, 0x01F91D, 0x00200D, 0x01F468, 0x01F3FF, 0,
- 0x01F469, 0x01F3FD, 0x00200D, 0x01F91D, 0x00200D, 0x01F469, 0x01F3FB, 0,
- 0x01F469, 0x01F3FD, 0x00200D, 0x01F91D, 0x00200D, 0x01F469, 0x01F3FC, 0,
- 0x01F469, 0x01F3FE, 0x00200D, 0x01F91D, 0x00200D, 0x01F468, 0x01F3FB, 0,
- 0x01F469, 0x01F3FE, 0x00200D, 0x01F91D, 0x00200D, 0x01F468, 0x01F3FC, 0,
- 0x01F469, 0x01F3FE, 0x00200D, 0x01F91D, 0x00200D, 0x01F468, 0x01F3FD, 0,
- 0x01F469, 0x01F3FE, 0x00200D, 0x01F91D, 0x00200D, 0x01F468, 0x01F3FF, 0,
- 0x01F469, 0x01F3FE, 0x00200D, 0x01F91D, 0x00200D, 0x01F469, 0x01F3FB, 0,
- 0x01F469, 0x01F3FE, 0x00200D, 0x01F91D, 0x00200D, 0x01F469, 0x01F3FC, 0,
- 0x01F469, 0x01F3FE, 0x00200D, 0x01F91D, 0x00200D, 0x01F469, 0x01F3FD, 0,
- 0x01F469, 0x01F3FF, 0x00200D, 0x01F91D, 0x00200D, 0x01F468, 0x01F3FB, 0,
- 0x01F469, 0x01F3FF, 0x00200D, 0x01F91D, 0x00200D, 0x01F468, 0x01F3FC, 0,
- 0x01F469, 0x01F3FF, 0x00200D, 0x01F91D, 0x00200D, 0x01F468, 0x01F3FD, 0,
- 0x01F469, 0x01F3FF, 0x00200D, 0x01F91D, 0x00200D, 0x01F468, 0x01F3FE, 0,
- 0x01F469, 0x01F3FF, 0x00200D, 0x01F91D, 0x00200D, 0x01F469, 0x01F3FB, 0,
- 0x01F469, 0x01F3FF, 0x00200D, 0x01F91D, 0x00200D, 0x01F469, 0x01F3FC, 0,
- 0x01F469, 0x01F3FF, 0x00200D, 0x01F91D, 0x00200D, 0x01F469, 0x01F3FD, 0,
- 0x01F469, 0x01F3FF, 0x00200D, 0x01F91D, 0x00200D, 0x01F469, 0x01F3FE, 0,
- 0x01F9D1, 0x00200D, 0x01F91D, 0x00200D, 0x01F9D1, 0,
- 0x01F9D1, 0x01F3FB, 0x00200D, 0x01F91D, 0x00200D, 0x01F9D1, 0x01F3FB, 0,
- 0x01F9D1, 0x01F3FC, 0x00200D, 0x01F91D, 0x00200D, 0x01F9D1, 0x01F3FB, 0,
- 0x01F9D1, 0x01F3FC, 0x00200D, 0x01F91D, 0x00200D, 0x01F9D1, 0x01F3FC, 0,
- 0x01F9D1, 0x01F3FD, 0x00200D, 0x01F91D, 0x00200D, 0x01F9D1, 0x01F3FB, 0,
- 0x01F9D1, 0x01F3FD, 0x00200D, 0x01F91D, 0x00200D, 0x01F9D1, 0x01F3FC, 0,
- 0x01F9D1, 0x01F3FD, 0x00200D, 0x01F91D, 0x00200D, 0x01F9D1, 0x01F3FD, 0,
- 0x01F9D1, 0x01F3FE, 0x00200D, 0x01F91D, 0x00200D, 0x01F9D1, 0x01F3FB, 0,
- 0x01F9D1, 0x01F3FE, 0x00200D, 0x01F91D, 0x00200D, 0x01F9D1, 0x01F3FC, 0,
- 0x01F9D1, 0x01F3FE, 0x00200D, 0x01F91D, 0x00200D, 0x01F9D1, 0x01F3FD, 0,
- 0x01F9D1, 0x01F3FE, 0x00200D, 0x01F91D, 0x00200D, 0x01F9D1, 0x01F3FE, 0,
- 0x01F9D1, 0x01F3FF, 0x00200D, 0x01F91D, 0x00200D, 0x01F9D1, 0x01F3FB, 0,
- 0x01F9D1, 0x01F3FF, 0x00200D, 0x01F91D, 0x00200D, 0x01F9D1, 0x01F3FC, 0,
- 0x01F9D1, 0x01F3FF, 0x00200D, 0x01F91D, 0x00200D, 0x01F9D1, 0x01F3FD, 0,
- 0x01F9D1, 0x01F3FF, 0x00200D, 0x01F91D, 0x00200D, 0x01F9D1, 0x01F3FE, 0,
- 0x01F9D1, 0x01F3FF, 0x00200D, 0x01F91D, 0x00200D, 0x01F9D1, 0x01F3FF, 0,
- 0x01F468, 0x00200D, 0x002695, 0x00FE0F, 0,
- 0x01F468, 0x00200D, 0x002696, 0x00FE0F, 0,
- 0x01F468, 0x00200D, 0x002708, 0x00FE0F, 0,
- 0x01F468, 0x00200D, 0x01F33E, 0,
- 0x01F468, 0x00200D, 0x01F373, 0,
- 0x01F468, 0x00200D, 0x01F393, 0,
- 0x01F468, 0x00200D, 0x01F3A4, 0,
- 0x01F468, 0x00200D, 0x01F3A8, 0,
- 0x01F468, 0x00200D, 0x01F3EB, 0,
- 0x01F468, 0x00200D, 0x01F3ED, 0,
- 0x01F468, 0x00200D, 0x01F4BB, 0,
- 0x01F468, 0x00200D, 0x01F4BC, 0,
- 0x01F468, 0x00200D, 0x01F527, 0,
- 0x01F468, 0x00200D, 0x01F52C, 0,
- 0x01F468, 0x00200D, 0x01F680, 0,
- 0x01F468, 0x00200D, 0x01F692, 0,
- 0x01F468, 0x00200D, 0x01F9AF, 0,
- 0x01F468, 0x00200D, 0x01F9BC, 0,
- 0x01F468, 0x00200D, 0x01F9BD, 0,
- 0x01F468, 0x01F3FB, 0x00200D, 0x002695, 0x00FE0F, 0,
- 0x01F468, 0x01F3FB, 0x00200D, 0x002696, 0x00FE0F, 0,
- 0x01F468, 0x01F3FB, 0x00200D, 0x002708, 0x00FE0F, 0,
- 0x01F468, 0x01F3FB, 0x00200D, 0x01F33E, 0,
- 0x01F468, 0x01F3FB, 0x00200D, 0x01F373, 0,
- 0x01F468, 0x01F3FB, 0x00200D, 0x01F393, 0,
- 0x01F468, 0x01F3FB, 0x00200D, 0x01F3A4, 0,
- 0x01F468, 0x01F3FB, 0x00200D, 0x01F3A8, 0,
- 0x01F468, 0x01F3FB, 0x00200D, 0x01F3EB, 0,
- 0x01F468, 0x01F3FB, 0x00200D, 0x01F3ED, 0,
- 0x01F468, 0x01F3FB, 0x00200D, 0x01F4BB, 0,
- 0x01F468, 0x01F3FB, 0x00200D, 0x01F4BC, 0,
- 0x01F468, 0x01F3FB, 0x00200D, 0x01F527, 0,
- 0x01F468, 0x01F3FB, 0x00200D, 0x01F52C, 0,
- 0x01F468, 0x01F3FB, 0x00200D, 0x01F680, 0,
- 0x01F468, 0x01F3FB, 0x00200D, 0x01F692, 0,
- 0x01F468, 0x01F3FB, 0x00200D, 0x01F9AF, 0,
- 0x01F468, 0x01F3FB, 0x00200D, 0x01F9BC, 0,
- 0x01F468, 0x01F3FB, 0x00200D, 0x01F9BD, 0,
- 0x01F468, 0x01F3FC, 0x00200D, 0x002695, 0x00FE0F, 0,
- 0x01F468, 0x01F3FC, 0x00200D, 0x002696, 0x00FE0F, 0,
- 0x01F468, 0x01F3FC, 0x00200D, 0x002708, 0x00FE0F, 0,
- 0x01F468, 0x01F3FC, 0x00200D, 0x01F33E, 0,
- 0x01F468, 0x01F3FC, 0x00200D, 0x01F373, 0,
- 0x01F468, 0x01F3FC, 0x00200D, 0x01F393, 0,
- 0x01F468, 0x01F3FC, 0x00200D, 0x01F3A4, 0,
- 0x01F468, 0x01F3FC, 0x00200D, 0x01F3A8, 0,
- 0x01F468, 0x01F3FC, 0x00200D, 0x01F3EB, 0,
- 0x01F468, 0x01F3FC, 0x00200D, 0x01F3ED, 0,
- 0x01F468, 0x01F3FC, 0x00200D, 0x01F4BB, 0,
- 0x01F468, 0x01F3FC, 0x00200D, 0x01F4BC, 0,
- 0x01F468, 0x01F3FC, 0x00200D, 0x01F527, 0,
- 0x01F468, 0x01F3FC, 0x00200D, 0x01F52C, 0,
- 0x01F468, 0x01F3FC, 0x00200D, 0x01F680, 0,
- 0x01F468, 0x01F3FC, 0x00200D, 0x01F692, 0,
- 0x01F468, 0x01F3FC, 0x00200D, 0x01F9AF, 0,
- 0x01F468, 0x01F3FC, 0x00200D, 0x01F9BC, 0,
- 0x01F468, 0x01F3FC, 0x00200D, 0x01F9BD, 0,
- 0x01F468, 0x01F3FD, 0x00200D, 0x002695, 0x00FE0F, 0,
- 0x01F468, 0x01F3FD, 0x00200D, 0x002696, 0x00FE0F, 0,
- 0x01F468, 0x01F3FD, 0x00200D, 0x002708, 0x00FE0F, 0,
- 0x01F468, 0x01F3FD, 0x00200D, 0x01F33E, 0,
- 0x01F468, 0x01F3FD, 0x00200D, 0x01F373, 0,
- 0x01F468, 0x01F3FD, 0x00200D, 0x01F393, 0,
- 0x01F468, 0x01F3FD, 0x00200D, 0x01F3A4, 0,
- 0x01F468, 0x01F3FD, 0x00200D, 0x01F3A8, 0,
- 0x01F468, 0x01F3FD, 0x00200D, 0x01F3EB, 0,
- 0x01F468, 0x01F3FD, 0x00200D, 0x01F3ED, 0,
- 0x01F468, 0x01F3FD, 0x00200D, 0x01F4BB, 0,
- 0x01F468, 0x01F3FD, 0x00200D, 0x01F4BC, 0,
- 0x01F468, 0x01F3FD, 0x00200D, 0x01F527, 0,
- 0x01F468, 0x01F3FD, 0x00200D, 0x01F52C, 0,
- 0x01F468, 0x01F3FD, 0x00200D, 0x01F680, 0,
- 0x01F468, 0x01F3FD, 0x00200D, 0x01F692, 0,
- 0x01F468, 0x01F3FD, 0x00200D, 0x01F9AF, 0,
- 0x01F468, 0x01F3FD, 0x00200D, 0x01F9BC, 0,
- 0x01F468, 0x01F3FD, 0x00200D, 0x01F9BD, 0,
- 0x01F468, 0x01F3FE, 0x00200D, 0x002695, 0x00FE0F, 0,
- 0x01F468, 0x01F3FE, 0x00200D, 0x002696, 0x00FE0F, 0,
- 0x01F468, 0x01F3FE, 0x00200D, 0x002708, 0x00FE0F, 0,
- 0x01F468, 0x01F3FE, 0x00200D, 0x01F33E, 0,
- 0x01F468, 0x01F3FE, 0x00200D, 0x01F373, 0,
- 0x01F468, 0x01F3FE, 0x00200D, 0x01F393, 0,
- 0x01F468, 0x01F3FE, 0x00200D, 0x01F3A4, 0,
- 0x01F468, 0x01F3FE, 0x00200D, 0x01F3A8, 0,
- 0x01F468, 0x01F3FE, 0x00200D, 0x01F3EB, 0,
- 0x01F468, 0x01F3FE, 0x00200D, 0x01F3ED, 0,
- 0x01F468, 0x01F3FE, 0x00200D, 0x01F4BB, 0,
- 0x01F468, 0x01F3FE, 0x00200D, 0x01F4BC, 0,
- 0x01F468, 0x01F3FE, 0x00200D, 0x01F527, 0,
- 0x01F468, 0x01F3FE, 0x00200D, 0x01F52C, 0,
- 0x01F468, 0x01F3FE, 0x00200D, 0x01F680, 0,
- 0x01F468, 0x01F3FE, 0x00200D, 0x01F692, 0,
- 0x01F468, 0x01F3FE, 0x00200D, 0x01F9AF, 0,
- 0x01F468, 0x01F3FE, 0x00200D, 0x01F9BC, 0,
- 0x01F468, 0x01F3FE, 0x00200D, 0x01F9BD, 0,
- 0x01F468, 0x01F3FF, 0x00200D, 0x002695, 0x00FE0F, 0,
- 0x01F468, 0x01F3FF, 0x00200D, 0x002696, 0x00FE0F, 0,
- 0x01F468, 0x01F3FF, 0x00200D, 0x002708, 0x00FE0F, 0,
- 0x01F468, 0x01F3FF, 0x00200D, 0x01F33E, 0,
- 0x01F468, 0x01F3FF, 0x00200D, 0x01F373, 0,
- 0x01F468, 0x01F3FF, 0x00200D, 0x01F393, 0,
- 0x01F468, 0x01F3FF, 0x00200D, 0x01F3A4, 0,
- 0x01F468, 0x01F3FF, 0x00200D, 0x01F3A8, 0,
- 0x01F468, 0x01F3FF, 0x00200D, 0x01F3EB, 0,
- 0x01F468, 0x01F3FF, 0x00200D, 0x01F3ED, 0,
- 0x01F468, 0x01F3FF, 0x00200D, 0x01F4BB, 0,
- 0x01F468, 0x01F3FF, 0x00200D, 0x01F4BC, 0,
- 0x01F468, 0x01F3FF, 0x00200D, 0x01F527, 0,
- 0x01F468, 0x01F3FF, 0x00200D, 0x01F52C, 0,
- 0x01F468, 0x01F3FF, 0x00200D, 0x01F680, 0,
- 0x01F468, 0x01F3FF, 0x00200D, 0x01F692, 0,
- 0x01F468, 0x01F3FF, 0x00200D, 0x01F9AF, 0,
- 0x01F468, 0x01F3FF, 0x00200D, 0x01F9BC, 0,
- 0x01F468, 0x01F3FF, 0x00200D, 0x01F9BD, 0,
- 0x01F469, 0x00200D, 0x002695, 0x00FE0F, 0,
- 0x01F469, 0x00200D, 0x002696, 0x00FE0F, 0,
- 0x01F469, 0x00200D, 0x002708, 0x00FE0F, 0,
- 0x01F469, 0x00200D, 0x01F33E, 0,
- 0x01F469, 0x00200D, 0x01F373, 0,
- 0x01F469, 0x00200D, 0x01F393, 0,
- 0x01F469, 0x00200D, 0x01F3A4, 0,
- 0x01F469, 0x00200D, 0x01F3A8, 0,
- 0x01F469, 0x00200D, 0x01F3EB, 0,
- 0x01F469, 0x00200D, 0x01F3ED, 0,
- 0x01F469, 0x00200D, 0x01F4BB, 0,
- 0x01F469, 0x00200D, 0x01F4BC, 0,
- 0x01F469, 0x00200D, 0x01F527, 0,
- 0x01F469, 0x00200D, 0x01F52C, 0,
- 0x01F469, 0x00200D, 0x01F680, 0,
- 0x01F469, 0x00200D, 0x01F692, 0,
- 0x01F469, 0x00200D, 0x01F9AF, 0,
- 0x01F469, 0x00200D, 0x01F9BC, 0,
- 0x01F469, 0x00200D, 0x01F9BD, 0,
- 0x01F469, 0x01F3FB, 0x00200D, 0x002695, 0x00FE0F, 0,
- 0x01F469, 0x01F3FB, 0x00200D, 0x002696, 0x00FE0F, 0,
- 0x01F469, 0x01F3FB, 0x00200D, 0x002708, 0x00FE0F, 0,
- 0x01F469, 0x01F3FB, 0x00200D, 0x01F33E, 0,
- 0x01F469, 0x01F3FB, 0x00200D, 0x01F373, 0,
- 0x01F469, 0x01F3FB, 0x00200D, 0x01F393, 0,
- 0x01F469, 0x01F3FB, 0x00200D, 0x01F3A4, 0,
- 0x01F469, 0x01F3FB, 0x00200D, 0x01F3A8, 0,
- 0x01F469, 0x01F3FB, 0x00200D, 0x01F3EB, 0,
- 0x01F469, 0x01F3FB, 0x00200D, 0x01F3ED, 0,
- 0x01F469, 0x01F3FB, 0x00200D, 0x01F4BB, 0,
- 0x01F469, 0x01F3FB, 0x00200D, 0x01F4BC, 0,
- 0x01F469, 0x01F3FB, 0x00200D, 0x01F527, 0,
- 0x01F469, 0x01F3FB, 0x00200D, 0x01F52C, 0,
- 0x01F469, 0x01F3FB, 0x00200D, 0x01F680, 0,
- 0x01F469, 0x01F3FB, 0x00200D, 0x01F692, 0,
- 0x01F469, 0x01F3FB, 0x00200D, 0x01F9AF, 0,
- 0x01F469, 0x01F3FB, 0x00200D, 0x01F9BC, 0,
- 0x01F469, 0x01F3FB, 0x00200D, 0x01F9BD, 0,
- 0x01F469, 0x01F3FC, 0x00200D, 0x002695, 0x00FE0F, 0,
- 0x01F469, 0x01F3FC, 0x00200D, 0x002696, 0x00FE0F, 0,
- 0x01F469, 0x01F3FC, 0x00200D, 0x002708, 0x00FE0F, 0,
- 0x01F469, 0x01F3FC, 0x00200D, 0x01F33E, 0,
- 0x01F469, 0x01F3FC, 0x00200D, 0x01F373, 0,
- 0x01F469, 0x01F3FC, 0x00200D, 0x01F393, 0,
- 0x01F469, 0x01F3FC, 0x00200D, 0x01F3A4, 0,
- 0x01F469, 0x01F3FC, 0x00200D, 0x01F3A8, 0,
- 0x01F469, 0x01F3FC, 0x00200D, 0x01F3EB, 0,
- 0x01F469, 0x01F3FC, 0x00200D, 0x01F3ED, 0,
- 0x01F469, 0x01F3FC, 0x00200D, 0x01F4BB, 0,
- 0x01F469, 0x01F3FC, 0x00200D, 0x01F4BC, 0,
- 0x01F469, 0x01F3FC, 0x00200D, 0x01F527, 0,
- 0x01F469, 0x01F3FC, 0x00200D, 0x01F52C, 0,
- 0x01F469, 0x01F3FC, 0x00200D, 0x01F680, 0,
- 0x01F469, 0x01F3FC, 0x00200D, 0x01F692, 0,
- 0x01F469, 0x01F3FC, 0x00200D, 0x01F9AF, 0,
- 0x01F469, 0x01F3FC, 0x00200D, 0x01F9BC, 0,
- 0x01F469, 0x01F3FC, 0x00200D, 0x01F9BD, 0,
- 0x01F469, 0x01F3FD, 0x00200D, 0x002695, 0x00FE0F, 0,
- 0x01F469, 0x01F3FD, 0x00200D, 0x002696, 0x00FE0F, 0,
- 0x01F469, 0x01F3FD, 0x00200D, 0x002708, 0x00FE0F, 0,
- 0x01F469, 0x01F3FD, 0x00200D, 0x01F33E, 0,
- 0x01F469, 0x01F3FD, 0x00200D, 0x01F373, 0,
- 0x01F469, 0x01F3FD, 0x00200D, 0x01F393, 0,
- 0x01F469, 0x01F3FD, 0x00200D, 0x01F3A4, 0,
- 0x01F469, 0x01F3FD, 0x00200D, 0x01F3A8, 0,
- 0x01F469, 0x01F3FD, 0x00200D, 0x01F3EB, 0,
- 0x01F469, 0x01F3FD, 0x00200D, 0x01F3ED, 0,
- 0x01F469, 0x01F3FD, 0x00200D, 0x01F4BB, 0,
- 0x01F469, 0x01F3FD, 0x00200D, 0x01F4BC, 0,
- 0x01F469, 0x01F3FD, 0x00200D, 0x01F527, 0,
- 0x01F469, 0x01F3FD, 0x00200D, 0x01F52C, 0,
- 0x01F469, 0x01F3FD, 0x00200D, 0x01F680, 0,
- 0x01F469, 0x01F3FD, 0x00200D, 0x01F692, 0,
- 0x01F469, 0x01F3FD, 0x00200D, 0x01F9AF, 0,
- 0x01F469, 0x01F3FD, 0x00200D, 0x01F9BC, 0,
- 0x01F469, 0x01F3FD, 0x00200D, 0x01F9BD, 0,
- 0x01F469, 0x01F3FE, 0x00200D, 0x002695, 0x00FE0F, 0,
- 0x01F469, 0x01F3FE, 0x00200D, 0x002696, 0x00FE0F, 0,
- 0x01F469, 0x01F3FE, 0x00200D, 0x002708, 0x00FE0F, 0,
- 0x01F469, 0x01F3FE, 0x00200D, 0x01F33E, 0,
- 0x01F469, 0x01F3FE, 0x00200D, 0x01F373, 0,
- 0x01F469, 0x01F3FE, 0x00200D, 0x01F393, 0,
- 0x01F469, 0x01F3FE, 0x00200D, 0x01F3A4, 0,
- 0x01F469, 0x01F3FE, 0x00200D, 0x01F3A8, 0,
- 0x01F469, 0x01F3FE, 0x00200D, 0x01F3EB, 0,
- 0x01F469, 0x01F3FE, 0x00200D, 0x01F3ED, 0,
- 0x01F469, 0x01F3FE, 0x00200D, 0x01F4BB, 0,
- 0x01F469, 0x01F3FE, 0x00200D, 0x01F4BC, 0,
- 0x01F469, 0x01F3FE, 0x00200D, 0x01F527, 0,
- 0x01F469, 0x01F3FE, 0x00200D, 0x01F52C, 0,
- 0x01F469, 0x01F3FE, 0x00200D, 0x01F680, 0,
- 0x01F469, 0x01F3FE, 0x00200D, 0x01F692, 0,
- 0x01F469, 0x01F3FE, 0x00200D, 0x01F9AF, 0,
- 0x01F469, 0x01F3FE, 0x00200D, 0x01F9BC, 0,
- 0x01F469, 0x01F3FE, 0x00200D, 0x01F9BD, 0,
- 0x01F469, 0x01F3FF, 0x00200D, 0x002695, 0x00FE0F, 0,
- 0x01F469, 0x01F3FF, 0x00200D, 0x002696, 0x00FE0F, 0,
- 0x01F469, 0x01F3FF, 0x00200D, 0x002708, 0x00FE0F, 0,
- 0x01F469, 0x01F3FF, 0x00200D, 0x01F33E, 0,
- 0x01F469, 0x01F3FF, 0x00200D, 0x01F373, 0,
- 0x01F469, 0x01F3FF, 0x00200D, 0x01F393, 0,
- 0x01F469, 0x01F3FF, 0x00200D, 0x01F3A4, 0,
- 0x01F469, 0x01F3FF, 0x00200D, 0x01F3A8, 0,
- 0x01F469, 0x01F3FF, 0x00200D, 0x01F3EB, 0,
- 0x01F469, 0x01F3FF, 0x00200D, 0x01F3ED, 0,
- 0x01F469, 0x01F3FF, 0x00200D, 0x01F4BB, 0,
- 0x01F469, 0x01F3FF, 0x00200D, 0x01F4BC, 0,
- 0x01F469, 0x01F3FF, 0x00200D, 0x01F527, 0,
- 0x01F469, 0x01F3FF, 0x00200D, 0x01F52C, 0,
- 0x01F469, 0x01F3FF, 0x00200D, 0x01F680, 0,
- 0x01F469, 0x01F3FF, 0x00200D, 0x01F692, 0,
- 0x01F469, 0x01F3FF, 0x00200D, 0x01F9AF, 0,
- 0x01F469, 0x01F3FF, 0x00200D, 0x01F9BC, 0,
- 0x01F469, 0x01F3FF, 0x00200D, 0x01F9BD, 0,
- 0x0026F9, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x0026F9, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x0026F9, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x0026F9, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x0026F9, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x0026F9, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x0026F9, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x0026F9, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x0026F9, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x0026F9, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x0026F9, 0x00FE0F, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x0026F9, 0x00FE0F, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F3C3, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F3C3, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F3C3, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F3C3, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F3C3, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F3C3, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F3C3, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F3C3, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F3C3, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F3C3, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F3C3, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F3C3, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F3C4, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F3C4, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F3C4, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F3C4, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F3C4, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F3C4, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F3C4, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F3C4, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F3C4, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F3C4, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F3C4, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F3C4, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F3CA, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F3CA, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F3CA, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F3CA, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F3CA, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F3CA, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F3CA, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F3CA, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F3CA, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F3CA, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F3CA, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F3CA, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F3CB, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F3CB, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F3CB, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F3CB, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F3CB, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F3CB, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F3CB, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F3CB, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F3CB, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F3CB, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F3CB, 0x00FE0F, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F3CB, 0x00FE0F, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F3CC, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F3CC, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F3CC, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F3CC, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F3CC, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F3CC, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F3CC, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F3CC, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F3CC, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F3CC, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F3CC, 0x00FE0F, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F3CC, 0x00FE0F, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F46E, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F46E, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F46E, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F46E, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F46E, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F46E, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F46E, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F46E, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F46E, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F46E, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F46E, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F46E, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F46F, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F46F, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F471, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F471, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F471, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F471, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F471, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F471, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F471, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F471, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F471, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F471, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F471, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F471, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F473, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F473, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F473, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F473, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F473, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F473, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F473, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F473, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F473, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F473, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F473, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F473, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F477, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F477, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F477, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F477, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F477, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F477, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F477, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F477, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F477, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F477, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F477, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F477, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F481, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F481, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F481, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F481, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F481, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F481, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F481, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F481, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F481, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F481, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F481, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F481, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F482, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F482, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F468, 0x00200D, 0x002764, 0x00FE0F, 0x00200D, 0x01F48B, 0x00200D,
- 0x01F468, 0,
- 0x01F482, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F482, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F482, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F482, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F482, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F482, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F482, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F482, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F482, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F486, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F486, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F486, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F486, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F486, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F486, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F486, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F486, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F486, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F486, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F486, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F486, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F487, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F487, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F487, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F487, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F487, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F487, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F487, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F487, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F487, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F487, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F487, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F487, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F575, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F575, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F575, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F575, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F575, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F575, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F575, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F575, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F575, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F575, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F575, 0x00FE0F, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F575, 0x00FE0F, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F645, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F645, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F645, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F645, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F645, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F645, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F645, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F645, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F645, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F645, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F645, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F645, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F646, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F646, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F646, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F646, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F646, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F646, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F646, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F646, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F646, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F646, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F646, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F646, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F647, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F647, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F647, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F647, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F647, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F647, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F647, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F647, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F647, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F647, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F647, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F647, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F64B, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F64B, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F64B, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F64B, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F64B, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F64B, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F64B, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F64B, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F64B, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F64B, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F64B, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F64B, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F64D, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F64D, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F64D, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F64D, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F64D, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F64D, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F64D, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F64D, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F64D, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F64D, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F64D, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F64D, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F64E, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F64E, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F64E, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F64E, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F64E, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F64E, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F64E, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F64E, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F64E, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F64E, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F64E, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F64E, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F6A3, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F6A3, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F6A3, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F6A3, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F6A3, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F6A3, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F6A3, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F6A3, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F6A3, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F6A3, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F6A3, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F6A3, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F6B4, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F6B4, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F6B4, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F6B4, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F6B4, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F6B4, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F6B4, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F6B4, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F6B4, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F6B4, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F6B4, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F6B4, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F6B5, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F6B5, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F6B5, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F6B5, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F6B5, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F6B5, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F6B5, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F6B5, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F6B5, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F6B5, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F6B5, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F6B5, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F6B6, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F6B6, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F6B6, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F6B6, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F6B6, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F6B6, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F6B6, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F6B6, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F6B6, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F6B6, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F6B6, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F6B6, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F926, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F926, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F926, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F926, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F926, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F926, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F926, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F926, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F926, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F926, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F926, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F926, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F937, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F937, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F937, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F937, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F937, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F937, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F937, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F937, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F937, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F937, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F937, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F937, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F938, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F938, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F938, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F938, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F938, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F938, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F938, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F938, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F938, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F938, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F938, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F938, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F939, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F939, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F939, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F939, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F939, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F939, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F939, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F939, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F939, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F939, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F939, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F939, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F93C, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F93C, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F93D, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F93D, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F93D, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F93D, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F93D, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F93D, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F93D, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F93D, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F93D, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F93D, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F93D, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F93D, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F93E, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F93E, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F93E, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F93E, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F93E, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F93E, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F93E, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F93E, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F93E, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F93E, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F93E, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F93E, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F9B8, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F9B8, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F9B8, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F9B8, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F9B8, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F9B8, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F9B8, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F9B8, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F9B8, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F9B8, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F9B8, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F9B8, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F9B9, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F9B9, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F9B9, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F9B9, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F9B9, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F9B9, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F9B9, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F9B9, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F9B9, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F9B9, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F9B9, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F9B9, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F9CD, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F9CD, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F9CD, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F9CD, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F9CD, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F9CD, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F9CD, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F9CD, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F9CD, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F9CD, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F9CD, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F9CD, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F9CE, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F9CE, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F9CE, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F9CE, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F9CE, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F9CE, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F9CE, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F9CE, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F9CE, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F9CE, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F9CE, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F9CE, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F9CF, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F9CF, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F9CF, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F9CF, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F9CF, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F9CF, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F9CF, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F9CF, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F9CF, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F9CF, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F9CF, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F9CF, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F9D6, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F9D6, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F9D6, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F9D6, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F9D6, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F9D6, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F9D6, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F9D6, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F9D6, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F9D6, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F9D6, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F9D6, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F9D7, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F9D7, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F9D7, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F9D7, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F9D7, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F9D7, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F9D7, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F9D7, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F9D7, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F9D7, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F9D7, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F9D7, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F9D8, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F9D8, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F9D8, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F9D8, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F9D8, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F9D8, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F9D8, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F9D8, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F9D8, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F9D8, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F9D8, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F9D8, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F9D9, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F9D9, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F9D9, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F9D9, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F9D9, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F9D9, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F9D9, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F9D9, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F9D9, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F9D9, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F9D9, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F9D9, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F9DA, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F9DA, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F9DA, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F9DA, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F9DA, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F9DA, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F9DA, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F9DA, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F9DA, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F9DA, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F9DA, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F9DA, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F9DB, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F9DB, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F9DB, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F9DB, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F9DB, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F9DB, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F9DB, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F9DB, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F9DB, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F9DB, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F9DB, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F9DB, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F9DC, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F9DC, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F9DC, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F9DC, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F9DC, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F9DC, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F9DC, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F9DC, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F9DC, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F9DC, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F9DC, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F9DC, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F9DD, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F9DD, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F9DD, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F9DD, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F9DD, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F9DD, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F9DD, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F9DD, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F9DD, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F9DD, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F9DD, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F9DD, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F9DE, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F9DE, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F9DF, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0x01F9DF, 0x00200D, 0x002642, 0x00FE0F, 0,
- 0x01F468, 0x00200D, 0x01F9B0, 0,
- 0x01F468, 0x00200D, 0x01F9B1, 0,
- 0x01F468, 0x00200D, 0x01F9B2, 0,
- 0x01F468, 0x00200D, 0x01F9B3, 0,
- 0x01F468, 0x01F3FB, 0x00200D, 0x01F9B0, 0,
- 0x01F468, 0x01F3FB, 0x00200D, 0x01F9B1, 0,
- 0x01F468, 0x01F3FB, 0x00200D, 0x01F9B2, 0,
- 0x01F468, 0x01F3FB, 0x00200D, 0x01F9B3, 0,
- 0x01F468, 0x01F3FC, 0x00200D, 0x01F9B0, 0,
- 0x01F468, 0x01F3FC, 0x00200D, 0x01F9B1, 0,
- 0x01F468, 0x01F3FC, 0x00200D, 0x01F9B2, 0,
- 0x01F468, 0x01F3FC, 0x00200D, 0x01F9B3, 0,
- 0x01F468, 0x01F3FD, 0x00200D, 0x01F9B0, 0,
- 0x01F468, 0x01F3FD, 0x00200D, 0x01F9B1, 0,
- 0x01F468, 0x01F3FD, 0x00200D, 0x01F9B2, 0,
- 0x01F468, 0x01F3FD, 0x00200D, 0x01F9B3, 0,
- 0x01F468, 0x01F3FE, 0x00200D, 0x01F9B0, 0,
- 0x01F468, 0x01F3FE, 0x00200D, 0x01F9B1, 0,
- 0x01F468, 0x01F3FE, 0x00200D, 0x01F9B2, 0,
- 0x01F468, 0x01F3FE, 0x00200D, 0x01F9B3, 0,
- 0x01F468, 0x01F3FF, 0x00200D, 0x01F9B0, 0,
- 0x01F468, 0x01F3FF, 0x00200D, 0x01F9B1, 0,
- 0x01F468, 0x01F3FF, 0x00200D, 0x01F9B2, 0,
- 0x01F468, 0x01F3FF, 0x00200D, 0x01F9B3, 0,
- 0x01F469, 0x00200D, 0x01F9B0, 0,
- 0x01F469, 0x00200D, 0x01F9B1, 0,
- 0x01F469, 0x00200D, 0x01F9B2, 0,
- 0x01F469, 0x00200D, 0x01F9B3, 0,
- 0x01F469, 0x01F3FB, 0x00200D, 0x01F9B0, 0,
- 0x01F469, 0x01F3FB, 0x00200D, 0x01F9B1, 0,
- 0x01F469, 0x01F3FB, 0x00200D, 0x01F9B2, 0,
- 0x01F469, 0x01F3FB, 0x00200D, 0x01F9B3, 0,
- 0x01F469, 0x01F3FC, 0x00200D, 0x01F9B0, 0,
- 0x01F469, 0x01F3FC, 0x00200D, 0x01F9B1, 0,
- 0x01F469, 0x01F3FC, 0x00200D, 0x01F9B2, 0,
- 0x01F469, 0x01F3FC, 0x00200D, 0x01F9B3, 0,
- 0x01F469, 0x01F3FD, 0x00200D, 0x01F9B0, 0,
- 0x01F469, 0x01F3FD, 0x00200D, 0x01F9B1, 0,
- 0x01F469, 0x01F3FD, 0x00200D, 0x01F9B2, 0,
- 0x01F469, 0x01F3FD, 0x00200D, 0x01F9B3, 0,
- 0x01F469, 0x01F3FE, 0x00200D, 0x01F9B0, 0,
- 0x01F469, 0x01F3FE, 0x00200D, 0x01F9B1, 0,
- 0x01F469, 0x01F3FE, 0x00200D, 0x01F9B2, 0,
- 0x01F469, 0x01F3FE, 0x00200D, 0x01F9B3, 0,
- 0x01F469, 0x01F3FF, 0x00200D, 0x01F9B0, 0,
- 0x01F469, 0x01F3FF, 0x00200D, 0x01F9B1, 0,
- 0x01F469, 0x01F3FF, 0x00200D, 0x01F9B2, 0,
- 0x01F469, 0x01F3FF, 0x00200D, 0x01F9B3, 0,
- 0x01F3F3, 0x00FE0F, 0x00200D, 0x01F308, 0,
- 0x01F3F4, 0x00200D, 0x002620, 0x00FE0F, 0,
- 0x01F415, 0x00200D, 0x01F9BA, 0,
- 0x01F482, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0,
- 0 // null-terminating the list
-};
-// clang-format on
-
-} // namespace internal
-} // namespace v8
-
-#endif // V8_INTL_SUPPORT
diff --git a/js/src/new-regexp/property-sequences.h b/js/src/new-regexp/property-sequences.h
deleted file mode 100644
index f079da7ac..000000000
--- a/js/src/new-regexp/property-sequences.h
+++ /dev/null
@@ -1,27 +0,0 @@
-// Copyright 2018 the V8 project authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#ifndef V8_REGEXP_PROPERTY_SEQUENCES_H_
-#define V8_REGEXP_PROPERTY_SEQUENCES_H_
-
-#ifdef V8_INTL_SUPPORT
-
-#include "new-regexp/regexp-shim.h"
-
-namespace v8 {
-namespace internal {
-
-class UnicodePropertySequences : public AllStatic {
- public:
- static const uc32 kEmojiFlagSequences[];
- static const uc32 kEmojiTagSequences[];
- static const uc32 kEmojiZWJSequences[];
-};
-
-} // namespace internal
-} // namespace v8
-
-#endif // V8_INTL_SUPPORT
-
-#endif // V8_REGEXP_PROPERTY_SEQUENCES_H_
diff --git a/js/src/new-regexp/regexp-ast.cc b/js/src/new-regexp/regexp-ast.cc
deleted file mode 100644
index 8de26720f..000000000
--- a/js/src/new-regexp/regexp-ast.cc
+++ /dev/null
@@ -1,342 +0,0 @@
-// Copyright 2016 the V8 project authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#include "new-regexp/regexp-ast.h"
-
-namespace v8 {
-namespace internal {
-
-#define MAKE_ACCEPT(Name) \
- void* RegExp##Name::Accept(RegExpVisitor* visitor, void* data) { \
- return visitor->Visit##Name(this, data); \
- }
-FOR_EACH_REG_EXP_TREE_TYPE(MAKE_ACCEPT)
-#undef MAKE_ACCEPT
-
-#define MAKE_TYPE_CASE(Name) \
- RegExp##Name* RegExpTree::As##Name() { return nullptr; } \
- bool RegExpTree::Is##Name() { return false; }
-FOR_EACH_REG_EXP_TREE_TYPE(MAKE_TYPE_CASE)
-#undef MAKE_TYPE_CASE
-
-#define MAKE_TYPE_CASE(Name) \
- RegExp##Name* RegExp##Name::As##Name() { return this; } \
- bool RegExp##Name::Is##Name() { return true; }
-FOR_EACH_REG_EXP_TREE_TYPE(MAKE_TYPE_CASE)
-#undef MAKE_TYPE_CASE
-
-
-static Interval ListCaptureRegisters(ZoneList<RegExpTree*>* children) {
- Interval result = Interval::Empty();
- for (int i = 0; i < children->length(); i++)
- result = result.Union(children->at(i)->CaptureRegisters());
- return result;
-}
-
-
-Interval RegExpAlternative::CaptureRegisters() {
- return ListCaptureRegisters(nodes());
-}
-
-
-Interval RegExpDisjunction::CaptureRegisters() {
- return ListCaptureRegisters(alternatives());
-}
-
-
-Interval RegExpLookaround::CaptureRegisters() {
- return body()->CaptureRegisters();
-}
-
-
-Interval RegExpCapture::CaptureRegisters() {
- Interval self(StartRegister(index()), EndRegister(index()));
- return self.Union(body()->CaptureRegisters());
-}
-
-
-Interval RegExpQuantifier::CaptureRegisters() {
- return body()->CaptureRegisters();
-}
-
-
-bool RegExpAssertion::IsAnchoredAtStart() {
- return assertion_type() == RegExpAssertion::START_OF_INPUT;
-}
-
-
-bool RegExpAssertion::IsAnchoredAtEnd() {
- return assertion_type() == RegExpAssertion::END_OF_INPUT;
-}
-
-
-bool RegExpAlternative::IsAnchoredAtStart() {
- ZoneList<RegExpTree*>* nodes = this->nodes();
- for (int i = 0; i < nodes->length(); i++) {
- RegExpTree* node = nodes->at(i);
- if (node->IsAnchoredAtStart()) {
- return true;
- }
- if (node->max_match() > 0) {
- return false;
- }
- }
- return false;
-}
-
-
-bool RegExpAlternative::IsAnchoredAtEnd() {
- ZoneList<RegExpTree*>* nodes = this->nodes();
- for (int i = nodes->length() - 1; i >= 0; i--) {
- RegExpTree* node = nodes->at(i);
- if (node->IsAnchoredAtEnd()) {
- return true;
- }
- if (node->max_match() > 0) {
- return false;
- }
- }
- return false;
-}
-
-
-bool RegExpDisjunction::IsAnchoredAtStart() {
- ZoneList<RegExpTree*>* alternatives = this->alternatives();
- for (int i = 0; i < alternatives->length(); i++) {
- if (!alternatives->at(i)->IsAnchoredAtStart()) return false;
- }
- return true;
-}
-
-
-bool RegExpDisjunction::IsAnchoredAtEnd() {
- ZoneList<RegExpTree*>* alternatives = this->alternatives();
- for (int i = 0; i < alternatives->length(); i++) {
- if (!alternatives->at(i)->IsAnchoredAtEnd()) return false;
- }
- return true;
-}
-
-
-bool RegExpLookaround::IsAnchoredAtStart() {
- return is_positive() && type() == LOOKAHEAD && body()->IsAnchoredAtStart();
-}
-
-
-bool RegExpCapture::IsAnchoredAtStart() { return body()->IsAnchoredAtStart(); }
-
-
-bool RegExpCapture::IsAnchoredAtEnd() { return body()->IsAnchoredAtEnd(); }
-
-
-// Convert regular expression trees to a simple sexp representation.
-// This representation should be different from the input grammar
-// in as many cases as possible, to make it more difficult for incorrect
-// parses to look as correct ones which is likely if the input and
-// output formats are alike.
-class RegExpUnparser final : public RegExpVisitor {
- public:
- RegExpUnparser(std::ostream& os, Zone* zone) : os_(os), zone_(zone) {}
- void VisitCharacterRange(CharacterRange that);
-#define MAKE_CASE(Name) void* Visit##Name(RegExp##Name*, void* data) override;
- FOR_EACH_REG_EXP_TREE_TYPE(MAKE_CASE)
-#undef MAKE_CASE
- private:
- std::ostream& os_;
- Zone* zone_;
-};
-
-
-void* RegExpUnparser::VisitDisjunction(RegExpDisjunction* that, void* data) {
- os_ << "(|";
- for (int i = 0; i < that->alternatives()->length(); i++) {
- os_ << " ";
- that->alternatives()->at(i)->Accept(this, data);
- }
- os_ << ")";
- return nullptr;
-}
-
-
-void* RegExpUnparser::VisitAlternative(RegExpAlternative* that, void* data) {
- os_ << "(:";
- for (int i = 0; i < that->nodes()->length(); i++) {
- os_ << " ";
- that->nodes()->at(i)->Accept(this, data);
- }
- os_ << ")";
- return nullptr;
-}
-
-
-void RegExpUnparser::VisitCharacterRange(CharacterRange that) {
- os_ << AsUC32(that.from());
- if (!that.IsSingleton()) {
- os_ << "-" << AsUC32(that.to());
- }
-}
-
-
-void* RegExpUnparser::VisitCharacterClass(RegExpCharacterClass* that,
- void* data) {
- if (that->is_negated()) os_ << "^";
- os_ << "[";
- for (int i = 0; i < that->ranges(zone_)->length(); i++) {
- if (i > 0) os_ << " ";
- VisitCharacterRange(that->ranges(zone_)->at(i));
- }
- os_ << "]";
- return nullptr;
-}
-
-
-void* RegExpUnparser::VisitAssertion(RegExpAssertion* that, void* data) {
- switch (that->assertion_type()) {
- case RegExpAssertion::START_OF_INPUT:
- os_ << "@^i";
- break;
- case RegExpAssertion::END_OF_INPUT:
- os_ << "@$i";
- break;
- case RegExpAssertion::START_OF_LINE:
- os_ << "@^l";
- break;
- case RegExpAssertion::END_OF_LINE:
- os_ << "@$l";
- break;
- case RegExpAssertion::BOUNDARY:
- os_ << "@b";
- break;
- case RegExpAssertion::NON_BOUNDARY:
- os_ << "@B";
- break;
- }
- return nullptr;
-}
-
-
-void* RegExpUnparser::VisitAtom(RegExpAtom* that, void* data) {
- os_ << "'";
- Vector<const uc16> chardata = that->data();
- for (int i = 0; i < chardata.length(); i++) {
- os_ << AsUC16(chardata[i]);
- }
- os_ << "'";
- return nullptr;
-}
-
-
-void* RegExpUnparser::VisitText(RegExpText* that, void* data) {
- if (that->elements()->length() == 1) {
- that->elements()->at(0).tree()->Accept(this, data);
- } else {
- os_ << "(!";
- for (int i = 0; i < that->elements()->length(); i++) {
- os_ << " ";
- that->elements()->at(i).tree()->Accept(this, data);
- }
- os_ << ")";
- }
- return nullptr;
-}
-
-
-void* RegExpUnparser::VisitQuantifier(RegExpQuantifier* that, void* data) {
- os_ << "(# " << that->min() << " ";
- if (that->max() == RegExpTree::kInfinity) {
- os_ << "- ";
- } else {
- os_ << that->max() << " ";
- }
- os_ << (that->is_greedy() ? "g " : that->is_possessive() ? "p " : "n ");
- that->body()->Accept(this, data);
- os_ << ")";
- return nullptr;
-}
-
-
-void* RegExpUnparser::VisitCapture(RegExpCapture* that, void* data) {
- os_ << "(^ ";
- that->body()->Accept(this, data);
- os_ << ")";
- return nullptr;
-}
-
-void* RegExpUnparser::VisitGroup(RegExpGroup* that, void* data) {
- os_ << "(?: ";
- that->body()->Accept(this, data);
- os_ << ")";
- return nullptr;
-}
-
-void* RegExpUnparser::VisitLookaround(RegExpLookaround* that, void* data) {
- os_ << "(";
- os_ << (that->type() == RegExpLookaround::LOOKAHEAD ? "->" : "<-");
- os_ << (that->is_positive() ? " + " : " - ");
- that->body()->Accept(this, data);
- os_ << ")";
- return nullptr;
-}
-
-
-void* RegExpUnparser::VisitBackReference(RegExpBackReference* that,
- void* data) {
- os_ << "(<- " << that->index() << ")";
- return nullptr;
-}
-
-
-void* RegExpUnparser::VisitEmpty(RegExpEmpty* that, void* data) {
- os_ << '%';
- return nullptr;
-}
-
-
-std::ostream& RegExpTree::Print(std::ostream& os, Zone* zone) { // NOLINT
- RegExpUnparser unparser(os, zone);
- Accept(&unparser, nullptr);
- return os;
-}
-
-
-RegExpDisjunction::RegExpDisjunction(ZoneList<RegExpTree*>* alternatives)
- : alternatives_(alternatives) {
- DCHECK_LT(1, alternatives->length());
- RegExpTree* first_alternative = alternatives->at(0);
- min_match_ = first_alternative->min_match();
- max_match_ = first_alternative->max_match();
- for (int i = 1; i < alternatives->length(); i++) {
- RegExpTree* alternative = alternatives->at(i);
- min_match_ = Min(min_match_, alternative->min_match());
- max_match_ = Max(max_match_, alternative->max_match());
- }
-}
-
-
-static int IncreaseBy(int previous, int increase) {
- if (RegExpTree::kInfinity - previous < increase) {
- return RegExpTree::kInfinity;
- } else {
- return previous + increase;
- }
-}
-
-
-RegExpAlternative::RegExpAlternative(ZoneList<RegExpTree*>* nodes)
- : nodes_(nodes) {
- DCHECK_LT(1, nodes->length());
- min_match_ = 0;
- max_match_ = 0;
- for (int i = 0; i < nodes->length(); i++) {
- RegExpTree* node = nodes->at(i);
- int node_min_match = node->min_match();
- min_match_ = IncreaseBy(min_match_, node_min_match);
- int node_max_match = node->max_match();
- max_match_ = IncreaseBy(max_match_, node_max_match);
- }
-}
-
-
-} // namespace internal
-} // namespace v8
diff --git a/js/src/new-regexp/regexp-ast.h b/js/src/new-regexp/regexp-ast.h
deleted file mode 100644
index 32bbcf0bf..000000000
--- a/js/src/new-regexp/regexp-ast.h
+++ /dev/null
@@ -1,615 +0,0 @@
-// Copyright 2016 the V8 project authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#ifndef V8_REGEXP_REGEXP_AST_H_
-#define V8_REGEXP_REGEXP_AST_H_
-
-#include "new-regexp/regexp-shim.h"
-
-namespace v8 {
-namespace internal {
-
-#define FOR_EACH_REG_EXP_TREE_TYPE(VISIT) \
- VISIT(Disjunction) \
- VISIT(Alternative) \
- VISIT(Assertion) \
- VISIT(CharacterClass) \
- VISIT(Atom) \
- VISIT(Quantifier) \
- VISIT(Capture) \
- VISIT(Group) \
- VISIT(Lookaround) \
- VISIT(BackReference) \
- VISIT(Empty) \
- VISIT(Text)
-
-#define FORWARD_DECLARE(Name) class RegExp##Name;
-FOR_EACH_REG_EXP_TREE_TYPE(FORWARD_DECLARE)
-#undef FORWARD_DECLARE
-
-class RegExpCompiler;
-class RegExpNode;
-class RegExpTree;
-
-class RegExpVisitor {
- public:
- virtual ~RegExpVisitor() = default;
-#define MAKE_CASE(Name) \
- virtual void* Visit##Name(RegExp##Name*, void* data) = 0;
- FOR_EACH_REG_EXP_TREE_TYPE(MAKE_CASE)
-#undef MAKE_CASE
-};
-
-
-// A simple closed interval.
-class Interval {
- public:
- Interval() : from_(kNone), to_(kNone - 1) {} // '- 1' for branchless size().
- Interval(int from, int to) : from_(from), to_(to) {}
- Interval Union(Interval that) {
- if (that.from_ == kNone)
- return *this;
- else if (from_ == kNone)
- return that;
- else
- return Interval(Min(from_, that.from_), Max(to_, that.to_));
- }
-
- bool Contains(int value) { return (from_ <= value) && (value <= to_); }
- bool is_empty() { return from_ == kNone; }
- int from() const { return from_; }
- int to() const { return to_; }
- int size() const { return to_ - from_ + 1; }
-
- static Interval Empty() { return Interval(); }
-
- static constexpr int kNone = -1;
-
- private:
- int from_;
- int to_;
-};
-
-
-// Represents code units in the range from from_ to to_, both ends are
-// inclusive.
-class CharacterRange {
- public:
- CharacterRange() : from_(0), to_(0) {}
- // For compatibility with the CHECK_OK macro
- CharacterRange(void* null) { DCHECK_NULL(null); } // NOLINT
- V8_EXPORT_PRIVATE static void AddClassEscape(char type,
- ZoneList<CharacterRange>* ranges,
- Zone* zone);
- // Add class escapes. Add case equivalent closure for \w and \W if necessary.
- V8_EXPORT_PRIVATE static void AddClassEscape(
- char type, ZoneList<CharacterRange>* ranges,
- bool add_unicode_case_equivalents, Zone* zone);
- static Vector<const int> GetWordBounds();
- static inline CharacterRange Singleton(uc32 value) {
- return CharacterRange(value, value);
- }
- static inline CharacterRange Range(uc32 from, uc32 to) {
- DCHECK(0 <= from && to <= String::kMaxCodePoint);
- DCHECK(static_cast<uint32_t>(from) <= static_cast<uint32_t>(to));
- return CharacterRange(from, to);
- }
- static inline CharacterRange Everything() {
- return CharacterRange(0, String::kMaxCodePoint);
- }
- static inline ZoneList<CharacterRange>* List(Zone* zone,
- CharacterRange range) {
- ZoneList<CharacterRange>* list =
- new (zone) ZoneList<CharacterRange>(1, zone);
- list->Add(range, zone);
- return list;
- }
- bool Contains(uc32 i) { return from_ <= i && i <= to_; }
- uc32 from() const { return from_; }
- void set_from(uc32 value) { from_ = value; }
- uc32 to() const { return to_; }
- void set_to(uc32 value) { to_ = value; }
- bool is_valid() { return from_ <= to_; }
- bool IsEverything(uc32 max) { return from_ == 0 && to_ >= max; }
- bool IsSingleton() { return (from_ == to_); }
- V8_EXPORT_PRIVATE static void AddCaseEquivalents(
- Isolate* isolate, Zone* zone, ZoneList<CharacterRange>* ranges,
- bool is_one_byte);
- // Whether a range list is in canonical form: Ranges ordered by from value,
- // and ranges non-overlapping and non-adjacent.
- V8_EXPORT_PRIVATE static bool IsCanonical(ZoneList<CharacterRange>* ranges);
- // Convert range list to canonical form. The characters covered by the ranges
- // will still be the same, but no character is in more than one range, and
- // adjacent ranges are merged. The resulting list may be shorter than the
- // original, but cannot be longer.
- static void Canonicalize(ZoneList<CharacterRange>* ranges);
- // Negate the contents of a character range in canonical form.
- static void Negate(ZoneList<CharacterRange>* src,
- ZoneList<CharacterRange>* dst, Zone* zone);
- static const int kStartMarker = (1 << 24);
- static const int kPayloadMask = (1 << 24) - 1;
-
- private:
- CharacterRange(uc32 from, uc32 to) : from_(from), to_(to) {}
-
- uc32 from_;
- uc32 to_;
-};
-
-class CharacterSet final {
- public:
- explicit CharacterSet(uc16 standard_set_type)
- : ranges_(nullptr), standard_set_type_(standard_set_type) {}
- explicit CharacterSet(ZoneList<CharacterRange>* ranges)
- : ranges_(ranges), standard_set_type_(0) {}
- ZoneList<CharacterRange>* ranges(Zone* zone);
- uc16 standard_set_type() const { return standard_set_type_; }
- void set_standard_set_type(uc16 special_set_type) {
- standard_set_type_ = special_set_type;
- }
- bool is_standard() { return standard_set_type_ != 0; }
- V8_EXPORT_PRIVATE void Canonicalize();
-
- private:
- ZoneList<CharacterRange>* ranges_;
- // If non-zero, the value represents a standard set (e.g., all whitespace
- // characters) without having to expand the ranges.
- uc16 standard_set_type_;
-};
-
-class TextElement final {
- public:
- enum TextType { ATOM, CHAR_CLASS };
-
- static TextElement Atom(RegExpAtom* atom);
- static TextElement CharClass(RegExpCharacterClass* char_class);
-
- int cp_offset() const { return cp_offset_; }
- void set_cp_offset(int cp_offset) { cp_offset_ = cp_offset; }
- int length() const;
-
- TextType text_type() const { return text_type_; }
-
- RegExpTree* tree() const { return tree_; }
-
- RegExpAtom* atom() const {
- DCHECK(text_type() == ATOM);
- return reinterpret_cast<RegExpAtom*>(tree());
- }
-
- RegExpCharacterClass* char_class() const {
- DCHECK(text_type() == CHAR_CLASS);
- return reinterpret_cast<RegExpCharacterClass*>(tree());
- }
-
- private:
- TextElement(TextType text_type, RegExpTree* tree)
- : cp_offset_(-1), text_type_(text_type), tree_(tree) {}
-
- int cp_offset_;
- TextType text_type_;
- RegExpTree* tree_;
-};
-
-
-class RegExpTree : public ZoneObject {
- public:
- static const int kInfinity = kMaxInt;
- virtual ~RegExpTree() = default;
- virtual void* Accept(RegExpVisitor* visitor, void* data) = 0;
- virtual RegExpNode* ToNode(RegExpCompiler* compiler,
- RegExpNode* on_success) = 0;
- virtual bool IsTextElement() { return false; }
- virtual bool IsAnchoredAtStart() { return false; }
- virtual bool IsAnchoredAtEnd() { return false; }
- virtual int min_match() = 0;
- virtual int max_match() = 0;
- // Returns the interval of registers used for captures within this
- // expression.
- virtual Interval CaptureRegisters() { return Interval::Empty(); }
- virtual void AppendToText(RegExpText* text, Zone* zone);
- V8_EXPORT_PRIVATE std::ostream& Print(std::ostream& os,
- Zone* zone); // NOLINT
-#define MAKE_ASTYPE(Name) \
- virtual RegExp##Name* As##Name(); \
- virtual bool Is##Name();
- FOR_EACH_REG_EXP_TREE_TYPE(MAKE_ASTYPE)
-#undef MAKE_ASTYPE
-};
-
-
-class RegExpDisjunction final : public RegExpTree {
- public:
- explicit RegExpDisjunction(ZoneList<RegExpTree*>* alternatives);
- void* Accept(RegExpVisitor* visitor, void* data) override;
- RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override;
- RegExpDisjunction* AsDisjunction() override;
- Interval CaptureRegisters() override;
- bool IsDisjunction() override;
- bool IsAnchoredAtStart() override;
- bool IsAnchoredAtEnd() override;
- int min_match() override { return min_match_; }
- int max_match() override { return max_match_; }
- ZoneList<RegExpTree*>* alternatives() { return alternatives_; }
-
- private:
- bool SortConsecutiveAtoms(RegExpCompiler* compiler);
- void RationalizeConsecutiveAtoms(RegExpCompiler* compiler);
- void FixSingleCharacterDisjunctions(RegExpCompiler* compiler);
- ZoneList<RegExpTree*>* alternatives_;
- int min_match_;
- int max_match_;
-};
-
-
-class RegExpAlternative final : public RegExpTree {
- public:
- explicit RegExpAlternative(ZoneList<RegExpTree*>* nodes);
- void* Accept(RegExpVisitor* visitor, void* data) override;
- RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override;
- RegExpAlternative* AsAlternative() override;
- Interval CaptureRegisters() override;
- bool IsAlternative() override;
- bool IsAnchoredAtStart() override;
- bool IsAnchoredAtEnd() override;
- int min_match() override { return min_match_; }
- int max_match() override { return max_match_; }
- ZoneList<RegExpTree*>* nodes() { return nodes_; }
-
- private:
- ZoneList<RegExpTree*>* nodes_;
- int min_match_;
- int max_match_;
-};
-
-
-class RegExpAssertion final : public RegExpTree {
- public:
- enum AssertionType {
- START_OF_LINE = 0,
- START_OF_INPUT = 1,
- END_OF_LINE = 2,
- END_OF_INPUT = 3,
- BOUNDARY = 4,
- NON_BOUNDARY = 5,
- LAST_TYPE = NON_BOUNDARY,
- };
- RegExpAssertion(AssertionType type, JSRegExp::Flags flags)
- : assertion_type_(type), flags_(flags) {}
- void* Accept(RegExpVisitor* visitor, void* data) override;
- RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override;
- RegExpAssertion* AsAssertion() override;
- bool IsAssertion() override;
- bool IsAnchoredAtStart() override;
- bool IsAnchoredAtEnd() override;
- int min_match() override { return 0; }
- int max_match() override { return 0; }
- AssertionType assertion_type() const { return assertion_type_; }
- JSRegExp::Flags flags() const { return flags_; }
-
- private:
- const AssertionType assertion_type_;
- const JSRegExp::Flags flags_;
-};
-
-
-class RegExpCharacterClass final : public RegExpTree {
- public:
- // NEGATED: The character class is negated and should match everything but
- // the specified ranges.
- // CONTAINS_SPLIT_SURROGATE: The character class contains part of a split
- // surrogate and should not be unicode-desugared (crbug.com/641091).
- enum Flag {
- NEGATED = 1 << 0,
- CONTAINS_SPLIT_SURROGATE = 1 << 1,
- };
- using CharacterClassFlags = base::Flags<Flag>;
-
- RegExpCharacterClass(
- Zone* zone, ZoneList<CharacterRange>* ranges, JSRegExp::Flags flags,
- CharacterClassFlags character_class_flags = CharacterClassFlags())
- : set_(ranges),
- flags_(flags),
- character_class_flags_(character_class_flags) {
- // Convert the empty set of ranges to the negated Everything() range.
- if (ranges->is_empty()) {
- ranges->Add(CharacterRange::Everything(), zone);
- character_class_flags_ ^= NEGATED;
- }
- }
- RegExpCharacterClass(uc16 type, JSRegExp::Flags flags)
- : set_(type),
- flags_(flags),
- character_class_flags_(CharacterClassFlags()) {}
- void* Accept(RegExpVisitor* visitor, void* data) override;
- RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override;
- RegExpCharacterClass* AsCharacterClass() override;
- bool IsCharacterClass() override;
- bool IsTextElement() override { return true; }
- int min_match() override { return 1; }
- // The character class may match two code units for unicode regexps.
- // TODO(yangguo): we should split this class for usage in TextElement, and
- // make max_match() dependent on the character class content.
- int max_match() override { return 2; }
- void AppendToText(RegExpText* text, Zone* zone) override;
- CharacterSet character_set() { return set_; }
- // TODO(lrn): Remove need for complex version if is_standard that
- // recognizes a mangled standard set and just do { return set_.is_special(); }
- bool is_standard(Zone* zone);
- // Returns a value representing the standard character set if is_standard()
- // returns true.
- // Currently used values are:
- // s : unicode whitespace
- // S : unicode non-whitespace
- // w : ASCII word character (digit, letter, underscore)
- // W : non-ASCII word character
- // d : ASCII digit
- // D : non-ASCII digit
- // . : non-newline
- // * : All characters, for advancing unanchored regexp
- uc16 standard_type() const { return set_.standard_set_type(); }
- ZoneList<CharacterRange>* ranges(Zone* zone) { return set_.ranges(zone); }
- bool is_negated() const { return (character_class_flags_ & NEGATED) != 0; }
- JSRegExp::Flags flags() const { return flags_; }
- bool contains_split_surrogate() const {
- return (character_class_flags_ & CONTAINS_SPLIT_SURROGATE) != 0;
- }
-
- private:
- CharacterSet set_;
- const JSRegExp::Flags flags_;
- CharacterClassFlags character_class_flags_;
-};
-
-
-class RegExpAtom final : public RegExpTree {
- public:
- explicit RegExpAtom(Vector<const uc16> data, JSRegExp::Flags flags)
- : data_(data), flags_(flags) {}
- void* Accept(RegExpVisitor* visitor, void* data) override;
- RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override;
- RegExpAtom* AsAtom() override;
- bool IsAtom() override;
- bool IsTextElement() override { return true; }
- int min_match() override { return data_.length(); }
- int max_match() override { return data_.length(); }
- void AppendToText(RegExpText* text, Zone* zone) override;
- Vector<const uc16> data() { return data_; }
- int length() { return data_.length(); }
- JSRegExp::Flags flags() const { return flags_; }
- bool ignore_case() const { return (flags_ & JSRegExp::kIgnoreCase) != 0; }
-
- private:
- Vector<const uc16> data_;
- const JSRegExp::Flags flags_;
-};
-
-
-class RegExpText final : public RegExpTree {
- public:
- explicit RegExpText(Zone* zone) : elements_(2, zone), length_(0) {}
- void* Accept(RegExpVisitor* visitor, void* data) override;
- RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override;
- RegExpText* AsText() override;
- bool IsText() override;
- bool IsTextElement() override { return true; }
- int min_match() override { return length_; }
- int max_match() override { return length_; }
- void AppendToText(RegExpText* text, Zone* zone) override;
- void AddElement(TextElement elm, Zone* zone) {
- elements_.Add(elm, zone);
- length_ += elm.length();
- }
- ZoneList<TextElement>* elements() { return &elements_; }
-
- private:
- ZoneList<TextElement> elements_;
- int length_;
-};
-
-
-class RegExpQuantifier final : public RegExpTree {
- public:
- enum QuantifierType { GREEDY, NON_GREEDY, POSSESSIVE };
- RegExpQuantifier(int min, int max, QuantifierType type, RegExpTree* body)
- : body_(body),
- min_(min),
- max_(max),
- quantifier_type_(type) {
- if (min > 0 && body->min_match() > kInfinity / min) {
- min_match_ = kInfinity;
- } else {
- min_match_ = min * body->min_match();
- }
- if (max > 0 && body->max_match() > kInfinity / max) {
- max_match_ = kInfinity;
- } else {
- max_match_ = max * body->max_match();
- }
- }
- void* Accept(RegExpVisitor* visitor, void* data) override;
- RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override;
- static RegExpNode* ToNode(int min, int max, bool is_greedy, RegExpTree* body,
- RegExpCompiler* compiler, RegExpNode* on_success,
- bool not_at_start = false);
- RegExpQuantifier* AsQuantifier() override;
- Interval CaptureRegisters() override;
- bool IsQuantifier() override;
- int min_match() override { return min_match_; }
- int max_match() override { return max_match_; }
- int min() { return min_; }
- int max() { return max_; }
- bool is_possessive() { return quantifier_type_ == POSSESSIVE; }
- bool is_non_greedy() { return quantifier_type_ == NON_GREEDY; }
- bool is_greedy() { return quantifier_type_ == GREEDY; }
- RegExpTree* body() { return body_; }
-
- private:
- RegExpTree* body_;
- int min_;
- int max_;
- int min_match_;
- int max_match_;
- QuantifierType quantifier_type_;
-};
-
-
-class RegExpCapture final : public RegExpTree {
- public:
- explicit RegExpCapture(int index)
- : body_(nullptr),
- index_(index),
- min_match_(0),
- max_match_(0),
- name_(nullptr) {}
- void* Accept(RegExpVisitor* visitor, void* data) override;
- RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override;
- static RegExpNode* ToNode(RegExpTree* body, int index,
- RegExpCompiler* compiler, RegExpNode* on_success);
- RegExpCapture* AsCapture() override;
- bool IsAnchoredAtStart() override;
- bool IsAnchoredAtEnd() override;
- Interval CaptureRegisters() override;
- bool IsCapture() override;
- int min_match() override { return min_match_; }
- int max_match() override { return max_match_; }
- RegExpTree* body() { return body_; }
- void set_body(RegExpTree* body) {
- body_ = body;
- min_match_ = body->min_match();
- max_match_ = body->max_match();
- }
- int index() const { return index_; }
- const ZoneVector<uc16>* name() const { return name_; }
- void set_name(const ZoneVector<uc16>* name) { name_ = name; }
- static int StartRegister(int index) { return index * 2; }
- static int EndRegister(int index) { return index * 2 + 1; }
-
- private:
- RegExpTree* body_;
- int index_;
- int min_match_;
- int max_match_;
- const ZoneVector<uc16>* name_;
-};
-
-class RegExpGroup final : public RegExpTree {
- public:
- explicit RegExpGroup(RegExpTree* body)
- : body_(body),
- min_match_(body->min_match()),
- max_match_(body->max_match()) {}
- void* Accept(RegExpVisitor* visitor, void* data) override;
- RegExpNode* ToNode(RegExpCompiler* compiler,
- RegExpNode* on_success) override {
- return body_->ToNode(compiler, on_success);
- }
- RegExpGroup* AsGroup() override;
- bool IsAnchoredAtStart() override { return body_->IsAnchoredAtStart(); }
- bool IsAnchoredAtEnd() override { return body_->IsAnchoredAtEnd(); }
- bool IsGroup() override;
- int min_match() override { return min_match_; }
- int max_match() override { return max_match_; }
- Interval CaptureRegisters() override { return body_->CaptureRegisters(); }
- RegExpTree* body() { return body_; }
-
- private:
- RegExpTree* body_;
- int min_match_;
- int max_match_;
-};
-
-class RegExpLookaround final : public RegExpTree {
- public:
- enum Type { LOOKAHEAD, LOOKBEHIND };
-
- RegExpLookaround(RegExpTree* body, bool is_positive, int capture_count,
- int capture_from, Type type)
- : body_(body),
- is_positive_(is_positive),
- capture_count_(capture_count),
- capture_from_(capture_from),
- type_(type) {}
-
- void* Accept(RegExpVisitor* visitor, void* data) override;
- RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override;
- RegExpLookaround* AsLookaround() override;
- Interval CaptureRegisters() override;
- bool IsLookaround() override;
- bool IsAnchoredAtStart() override;
- int min_match() override { return 0; }
- int max_match() override { return 0; }
- RegExpTree* body() { return body_; }
- bool is_positive() { return is_positive_; }
- int capture_count() { return capture_count_; }
- int capture_from() { return capture_from_; }
- Type type() { return type_; }
-
- class Builder {
- public:
- Builder(bool is_positive, RegExpNode* on_success,
- int stack_pointer_register, int position_register,
- int capture_register_count = 0, int capture_register_start = 0);
- RegExpNode* on_match_success() { return on_match_success_; }
- RegExpNode* ForMatch(RegExpNode* match);
-
- private:
- bool is_positive_;
- RegExpNode* on_match_success_;
- RegExpNode* on_success_;
- int stack_pointer_register_;
- int position_register_;
- };
-
- private:
- RegExpTree* body_;
- bool is_positive_;
- int capture_count_;
- int capture_from_;
- Type type_;
-};
-
-
-class RegExpBackReference final : public RegExpTree {
- public:
- explicit RegExpBackReference(JSRegExp::Flags flags)
- : capture_(nullptr), name_(nullptr), flags_(flags) {}
- RegExpBackReference(RegExpCapture* capture, JSRegExp::Flags flags)
- : capture_(capture), name_(nullptr), flags_(flags) {}
- void* Accept(RegExpVisitor* visitor, void* data) override;
- RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override;
- RegExpBackReference* AsBackReference() override;
- bool IsBackReference() override;
- int min_match() override { return 0; }
- // The back reference may be recursive, e.g. /(\2)(\1)/. To avoid infinite
- // recursion, we give up. Ignorance is bliss.
- int max_match() override { return kInfinity; }
- int index() { return capture_->index(); }
- RegExpCapture* capture() { return capture_; }
- void set_capture(RegExpCapture* capture) { capture_ = capture; }
- const ZoneVector<uc16>* name() const { return name_; }
- void set_name(const ZoneVector<uc16>* name) { name_ = name; }
-
- private:
- RegExpCapture* capture_;
- const ZoneVector<uc16>* name_;
- const JSRegExp::Flags flags_;
-};
-
-
-class RegExpEmpty final : public RegExpTree {
- public:
- RegExpEmpty() = default;
- void* Accept(RegExpVisitor* visitor, void* data) override;
- RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override;
- RegExpEmpty* AsEmpty() override;
- bool IsEmpty() override;
- int min_match() override { return 0; }
- int max_match() override { return 0; }
-};
-
-} // namespace internal
-} // namespace v8
-
-#endif // V8_REGEXP_REGEXP_AST_H_
diff --git a/js/src/new-regexp/regexp-bytecode-generator-inl.h b/js/src/new-regexp/regexp-bytecode-generator-inl.h
deleted file mode 100644
index a2d1ac1cb..000000000
--- a/js/src/new-regexp/regexp-bytecode-generator-inl.h
+++ /dev/null
@@ -1,55 +0,0 @@
-// Copyright 2008-2009 the V8 project authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#ifndef V8_REGEXP_REGEXP_BYTECODE_GENERATOR_INL_H_
-#define V8_REGEXP_REGEXP_BYTECODE_GENERATOR_INL_H_
-
-#include "new-regexp/regexp-bytecode-generator.h"
-
-#include "new-regexp/regexp-bytecodes.h"
-
-namespace v8 {
-namespace internal {
-
-void RegExpBytecodeGenerator::Emit(uint32_t byte, uint32_t twenty_four_bits) {
- uint32_t word = ((twenty_four_bits << BYTECODE_SHIFT) | byte);
- DCHECK(pc_ <= buffer_.length());
- if (pc_ + 3 >= buffer_.length()) {
- Expand();
- }
- *reinterpret_cast<uint32_t*>(buffer_.begin() + pc_) = word;
- pc_ += 4;
-}
-
-void RegExpBytecodeGenerator::Emit16(uint32_t word) {
- DCHECK(pc_ <= buffer_.length());
- if (pc_ + 1 >= buffer_.length()) {
- Expand();
- }
- *reinterpret_cast<uint16_t*>(buffer_.begin() + pc_) = word;
- pc_ += 2;
-}
-
-void RegExpBytecodeGenerator::Emit8(uint32_t word) {
- DCHECK(pc_ <= buffer_.length());
- if (pc_ == buffer_.length()) {
- Expand();
- }
- *reinterpret_cast<unsigned char*>(buffer_.begin() + pc_) = word;
- pc_ += 1;
-}
-
-void RegExpBytecodeGenerator::Emit32(uint32_t word) {
- DCHECK(pc_ <= buffer_.length());
- if (pc_ + 3 >= buffer_.length()) {
- Expand();
- }
- *reinterpret_cast<uint32_t*>(buffer_.begin() + pc_) = word;
- pc_ += 4;
-}
-
-} // namespace internal
-} // namespace v8
-
-#endif // V8_REGEXP_REGEXP_BYTECODE_GENERATOR_INL_H_
diff --git a/js/src/new-regexp/regexp-bytecode-generator.cc b/js/src/new-regexp/regexp-bytecode-generator.cc
deleted file mode 100644
index 2670322d3..000000000
--- a/js/src/new-regexp/regexp-bytecode-generator.cc
+++ /dev/null
@@ -1,395 +0,0 @@
-// Copyright 2008-2009 the V8 project authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#include "new-regexp/regexp-bytecode-generator.h"
-
-#include "new-regexp/regexp-bytecode-generator-inl.h"
-#include "new-regexp/regexp-bytecode-peephole.h"
-#include "new-regexp/regexp-bytecodes.h"
-#include "new-regexp/regexp-macro-assembler.h"
-
-namespace v8 {
-namespace internal {
-
-RegExpBytecodeGenerator::RegExpBytecodeGenerator(Isolate* isolate, Zone* zone)
- : RegExpMacroAssembler(isolate, zone),
- buffer_(Vector<byte>::New(1024)),
- pc_(0),
- advance_current_end_(kInvalidPC),
- jump_edges_(zone),
- isolate_(isolate) {}
-
-RegExpBytecodeGenerator::~RegExpBytecodeGenerator() {
- if (backtrack_.is_linked()) backtrack_.Unuse();
- buffer_.Dispose();
-}
-
-RegExpBytecodeGenerator::IrregexpImplementation
-RegExpBytecodeGenerator::Implementation() {
- return kBytecodeImplementation;
-}
-
-void RegExpBytecodeGenerator::Bind(Label* l) {
- advance_current_end_ = kInvalidPC;
- DCHECK(!l->is_bound());
- if (l->is_linked()) {
- int pos = l->pos();
- while (pos != 0) {
- int fixup = pos;
- pos = *reinterpret_cast<int32_t*>(buffer_.begin() + fixup);
- *reinterpret_cast<uint32_t*>(buffer_.begin() + fixup) = pc_;
- jump_edges_.emplace(fixup, pc_);
- }
- }
- l->bind_to(pc_);
-}
-
-void RegExpBytecodeGenerator::EmitOrLink(Label* l) {
- if (l == nullptr) l = &backtrack_;
- int pos = 0;
- if (l->is_bound()) {
- pos = l->pos();
- jump_edges_.emplace(pc_, pos);
- } else {
- if (l->is_linked()) {
- pos = l->pos();
- }
- l->link_to(pc_);
- }
- Emit32(pos);
-}
-
-void RegExpBytecodeGenerator::PopRegister(int register_index) {
- DCHECK_LE(0, register_index);
- DCHECK_GE(kMaxRegister, register_index);
- Emit(BC_POP_REGISTER, register_index);
-}
-
-void RegExpBytecodeGenerator::PushRegister(int register_index,
- StackCheckFlag check_stack_limit) {
- DCHECK_LE(0, register_index);
- DCHECK_GE(kMaxRegister, register_index);
- Emit(BC_PUSH_REGISTER, register_index);
-}
-
-void RegExpBytecodeGenerator::WriteCurrentPositionToRegister(int register_index,
- int cp_offset) {
- DCHECK_LE(0, register_index);
- DCHECK_GE(kMaxRegister, register_index);
- Emit(BC_SET_REGISTER_TO_CP, register_index);
- Emit32(cp_offset); // Current position offset.
-}
-
-void RegExpBytecodeGenerator::ClearRegisters(int reg_from, int reg_to) {
- DCHECK(reg_from <= reg_to);
- for (int reg = reg_from; reg <= reg_to; reg++) {
- SetRegister(reg, -1);
- }
-}
-
-void RegExpBytecodeGenerator::ReadCurrentPositionFromRegister(
- int register_index) {
- DCHECK_LE(0, register_index);
- DCHECK_GE(kMaxRegister, register_index);
- Emit(BC_SET_CP_TO_REGISTER, register_index);
-}
-
-void RegExpBytecodeGenerator::WriteStackPointerToRegister(int register_index) {
- DCHECK_LE(0, register_index);
- DCHECK_GE(kMaxRegister, register_index);
- Emit(BC_SET_REGISTER_TO_SP, register_index);
-}
-
-void RegExpBytecodeGenerator::ReadStackPointerFromRegister(int register_index) {
- DCHECK_LE(0, register_index);
- DCHECK_GE(kMaxRegister, register_index);
- Emit(BC_SET_SP_TO_REGISTER, register_index);
-}
-
-void RegExpBytecodeGenerator::SetCurrentPositionFromEnd(int by) {
- DCHECK(is_uint24(by));
- Emit(BC_SET_CURRENT_POSITION_FROM_END, by);
-}
-
-void RegExpBytecodeGenerator::SetRegister(int register_index, int to) {
- DCHECK_LE(0, register_index);
- DCHECK_GE(kMaxRegister, register_index);
- Emit(BC_SET_REGISTER, register_index);
- Emit32(to);
-}
-
-void RegExpBytecodeGenerator::AdvanceRegister(int register_index, int by) {
- DCHECK_LE(0, register_index);
- DCHECK_GE(kMaxRegister, register_index);
- Emit(BC_ADVANCE_REGISTER, register_index);
- Emit32(by);
-}
-
-void RegExpBytecodeGenerator::PopCurrentPosition() { Emit(BC_POP_CP, 0); }
-
-void RegExpBytecodeGenerator::PushCurrentPosition() { Emit(BC_PUSH_CP, 0); }
-
-void RegExpBytecodeGenerator::Backtrack() { Emit(BC_POP_BT, 0); }
-
-void RegExpBytecodeGenerator::GoTo(Label* l) {
- if (advance_current_end_ == pc_) {
- // Combine advance current and goto.
- pc_ = advance_current_start_;
- Emit(BC_ADVANCE_CP_AND_GOTO, advance_current_offset_);
- EmitOrLink(l);
- advance_current_end_ = kInvalidPC;
- } else {
- // Regular goto.
- Emit(BC_GOTO, 0);
- EmitOrLink(l);
- }
-}
-
-void RegExpBytecodeGenerator::PushBacktrack(Label* l) {
- Emit(BC_PUSH_BT, 0);
- EmitOrLink(l);
-}
-
-bool RegExpBytecodeGenerator::Succeed() {
- Emit(BC_SUCCEED, 0);
- return false; // Restart matching for global regexp not supported.
-}
-
-void RegExpBytecodeGenerator::Fail() { Emit(BC_FAIL, 0); }
-
-void RegExpBytecodeGenerator::AdvanceCurrentPosition(int by) {
- DCHECK_LE(kMinCPOffset, by);
- DCHECK_GE(kMaxCPOffset, by);
- advance_current_start_ = pc_;
- advance_current_offset_ = by;
- Emit(BC_ADVANCE_CP, by);
- advance_current_end_ = pc_;
-}
-
-void RegExpBytecodeGenerator::CheckGreedyLoop(
- Label* on_tos_equals_current_position) {
- Emit(BC_CHECK_GREEDY, 0);
- EmitOrLink(on_tos_equals_current_position);
-}
-
-void RegExpBytecodeGenerator::LoadCurrentCharacterImpl(int cp_offset,
- Label* on_failure,
- bool check_bounds,
- int characters,
- int eats_at_least) {
- DCHECK_GE(eats_at_least, characters);
- if (eats_at_least > characters && check_bounds) {
- DCHECK(is_uint24(cp_offset + eats_at_least));
- Emit(BC_CHECK_CURRENT_POSITION, cp_offset + eats_at_least);
- EmitOrLink(on_failure);
- check_bounds = false; // Load below doesn't need to check.
- }
-
- DCHECK_LE(kMinCPOffset, cp_offset);
- DCHECK_GE(kMaxCPOffset, cp_offset);
- int bytecode;
- if (check_bounds) {
- if (characters == 4) {
- bytecode = BC_LOAD_4_CURRENT_CHARS;
- } else if (characters == 2) {
- bytecode = BC_LOAD_2_CURRENT_CHARS;
- } else {
- DCHECK_EQ(1, characters);
- bytecode = BC_LOAD_CURRENT_CHAR;
- }
- } else {
- if (characters == 4) {
- bytecode = BC_LOAD_4_CURRENT_CHARS_UNCHECKED;
- } else if (characters == 2) {
- bytecode = BC_LOAD_2_CURRENT_CHARS_UNCHECKED;
- } else {
- DCHECK_EQ(1, characters);
- bytecode = BC_LOAD_CURRENT_CHAR_UNCHECKED;
- }
- }
- Emit(bytecode, cp_offset);
- if (check_bounds) EmitOrLink(on_failure);
-}
-
-void RegExpBytecodeGenerator::CheckCharacterLT(uc16 limit, Label* on_less) {
- Emit(BC_CHECK_LT, limit);
- EmitOrLink(on_less);
-}
-
-void RegExpBytecodeGenerator::CheckCharacterGT(uc16 limit, Label* on_greater) {
- Emit(BC_CHECK_GT, limit);
- EmitOrLink(on_greater);
-}
-
-void RegExpBytecodeGenerator::CheckCharacter(uint32_t c, Label* on_equal) {
- if (c > MAX_FIRST_ARG) {
- Emit(BC_CHECK_4_CHARS, 0);
- Emit32(c);
- } else {
- Emit(BC_CHECK_CHAR, c);
- }
- EmitOrLink(on_equal);
-}
-
-void RegExpBytecodeGenerator::CheckAtStart(int cp_offset, Label* on_at_start) {
- Emit(BC_CHECK_AT_START, cp_offset);
- EmitOrLink(on_at_start);
-}
-
-void RegExpBytecodeGenerator::CheckNotAtStart(int cp_offset,
- Label* on_not_at_start) {
- Emit(BC_CHECK_NOT_AT_START, cp_offset);
- EmitOrLink(on_not_at_start);
-}
-
-void RegExpBytecodeGenerator::CheckNotCharacter(uint32_t c,
- Label* on_not_equal) {
- if (c > MAX_FIRST_ARG) {
- Emit(BC_CHECK_NOT_4_CHARS, 0);
- Emit32(c);
- } else {
- Emit(BC_CHECK_NOT_CHAR, c);
- }
- EmitOrLink(on_not_equal);
-}
-
-void RegExpBytecodeGenerator::CheckCharacterAfterAnd(uint32_t c, uint32_t mask,
- Label* on_equal) {
- if (c > MAX_FIRST_ARG) {
- Emit(BC_AND_CHECK_4_CHARS, 0);
- Emit32(c);
- } else {
- Emit(BC_AND_CHECK_CHAR, c);
- }
- Emit32(mask);
- EmitOrLink(on_equal);
-}
-
-void RegExpBytecodeGenerator::CheckNotCharacterAfterAnd(uint32_t c,
- uint32_t mask,
- Label* on_not_equal) {
- if (c > MAX_FIRST_ARG) {
- Emit(BC_AND_CHECK_NOT_4_CHARS, 0);
- Emit32(c);
- } else {
- Emit(BC_AND_CHECK_NOT_CHAR, c);
- }
- Emit32(mask);
- EmitOrLink(on_not_equal);
-}
-
-void RegExpBytecodeGenerator::CheckNotCharacterAfterMinusAnd(
- uc16 c, uc16 minus, uc16 mask, Label* on_not_equal) {
- Emit(BC_MINUS_AND_CHECK_NOT_CHAR, c);
- Emit16(minus);
- Emit16(mask);
- EmitOrLink(on_not_equal);
-}
-
-void RegExpBytecodeGenerator::CheckCharacterInRange(uc16 from, uc16 to,
- Label* on_in_range) {
- Emit(BC_CHECK_CHAR_IN_RANGE, 0);
- Emit16(from);
- Emit16(to);
- EmitOrLink(on_in_range);
-}
-
-void RegExpBytecodeGenerator::CheckCharacterNotInRange(uc16 from, uc16 to,
- Label* on_not_in_range) {
- Emit(BC_CHECK_CHAR_NOT_IN_RANGE, 0);
- Emit16(from);
- Emit16(to);
- EmitOrLink(on_not_in_range);
-}
-
-void RegExpBytecodeGenerator::CheckBitInTable(Handle<ByteArray> table,
- Label* on_bit_set) {
- Emit(BC_CHECK_BIT_IN_TABLE, 0);
- EmitOrLink(on_bit_set);
- for (int i = 0; i < kTableSize; i += kBitsPerByte) {
- int byte = 0;
- for (int j = 0; j < kBitsPerByte; j++) {
- if (table->get(i + j) != 0) byte |= 1 << j;
- }
- Emit8(byte);
- }
-}
-
-void RegExpBytecodeGenerator::CheckNotBackReference(int start_reg,
- bool read_backward,
- Label* on_not_equal) {
- DCHECK_LE(0, start_reg);
- DCHECK_GE(kMaxRegister, start_reg);
- Emit(read_backward ? BC_CHECK_NOT_BACK_REF_BACKWARD : BC_CHECK_NOT_BACK_REF,
- start_reg);
- EmitOrLink(on_not_equal);
-}
-
-void RegExpBytecodeGenerator::CheckNotBackReferenceIgnoreCase(
- int start_reg, bool read_backward, Label* on_not_equal) {
- DCHECK_LE(0, start_reg);
- DCHECK_GE(kMaxRegister, start_reg);
- Emit(read_backward ? BC_CHECK_NOT_BACK_REF_NO_CASE_BACKWARD
- : BC_CHECK_NOT_BACK_REF_NO_CASE,
- start_reg);
- EmitOrLink(on_not_equal);
-}
-
-void RegExpBytecodeGenerator::IfRegisterLT(int register_index, int comparand,
- Label* on_less_than) {
- DCHECK_LE(0, register_index);
- DCHECK_GE(kMaxRegister, register_index);
- Emit(BC_CHECK_REGISTER_LT, register_index);
- Emit32(comparand);
- EmitOrLink(on_less_than);
-}
-
-void RegExpBytecodeGenerator::IfRegisterGE(int register_index, int comparand,
- Label* on_greater_or_equal) {
- DCHECK_LE(0, register_index);
- DCHECK_GE(kMaxRegister, register_index);
- Emit(BC_CHECK_REGISTER_GE, register_index);
- Emit32(comparand);
- EmitOrLink(on_greater_or_equal);
-}
-
-void RegExpBytecodeGenerator::IfRegisterEqPos(int register_index,
- Label* on_eq) {
- DCHECK_LE(0, register_index);
- DCHECK_GE(kMaxRegister, register_index);
- Emit(BC_CHECK_REGISTER_EQ_POS, register_index);
- EmitOrLink(on_eq);
-}
-
-Handle<HeapObject> RegExpBytecodeGenerator::GetCode(Handle<String> source) {
- Bind(&backtrack_);
- Emit(BC_POP_BT, 0);
-
- Handle<ByteArray> array;
- if (FLAG_regexp_peephole_optimization) {
- array = RegExpBytecodePeepholeOptimization::OptimizeBytecode(
- isolate_, zone(), source, buffer_.begin(), length(), jump_edges_);
- } else {
- array = isolate_->factory()->NewByteArray(length());
- Copy(array->GetDataStartAddress());
- }
-
- return array;
-}
-
-int RegExpBytecodeGenerator::length() { return pc_; }
-
-void RegExpBytecodeGenerator::Copy(byte* a) {
- MemCopy(a, buffer_.begin(), length());
-}
-
-void RegExpBytecodeGenerator::Expand() {
- Vector<byte> old_buffer = buffer_;
- buffer_ = Vector<byte>::New(old_buffer.length() * 2);
- MemCopy(buffer_.begin(), old_buffer.begin(), old_buffer.length());
- old_buffer.Dispose();
-}
-
-} // namespace internal
-} // namespace v8
diff --git a/js/src/new-regexp/regexp-bytecode-generator.h b/js/src/new-regexp/regexp-bytecode-generator.h
deleted file mode 100644
index 274fd3953..000000000
--- a/js/src/new-regexp/regexp-bytecode-generator.h
+++ /dev/null
@@ -1,119 +0,0 @@
-// Copyright 2012 the V8 project authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#ifndef V8_REGEXP_REGEXP_BYTECODE_GENERATOR_H_
-#define V8_REGEXP_REGEXP_BYTECODE_GENERATOR_H_
-
-#include "new-regexp/regexp-macro-assembler.h"
-
-namespace v8 {
-namespace internal {
-
-// An assembler/generator for the Irregexp byte code.
-class V8_EXPORT_PRIVATE RegExpBytecodeGenerator : public RegExpMacroAssembler {
- public:
- // Create an assembler. Instructions and relocation information are emitted
- // into a buffer, with the instructions starting from the beginning and the
- // relocation information starting from the end of the buffer. See CodeDesc
- // for a detailed comment on the layout (globals.h).
- //
- // The assembler allocates and grows its own buffer, and buffer_size
- // determines the initial buffer size. The buffer is owned by the assembler
- // and deallocated upon destruction of the assembler.
- RegExpBytecodeGenerator(Isolate* isolate, Zone* zone);
- virtual ~RegExpBytecodeGenerator();
- // The byte-code interpreter checks on each push anyway.
- virtual int stack_limit_slack() { return 1; }
- virtual bool CanReadUnaligned() { return false; }
- virtual void Bind(Label* label);
- virtual void AdvanceCurrentPosition(int by); // Signed cp change.
- virtual void PopCurrentPosition();
- virtual void PushCurrentPosition();
- virtual void Backtrack();
- virtual void GoTo(Label* label);
- virtual void PushBacktrack(Label* label);
- virtual bool Succeed();
- virtual void Fail();
- virtual void PopRegister(int register_index);
- virtual void PushRegister(int register_index,
- StackCheckFlag check_stack_limit);
- virtual void AdvanceRegister(int reg, int by); // r[reg] += by.
- virtual void SetCurrentPositionFromEnd(int by);
- virtual void SetRegister(int register_index, int to);
- virtual void WriteCurrentPositionToRegister(int reg, int cp_offset);
- virtual void ClearRegisters(int reg_from, int reg_to);
- virtual void ReadCurrentPositionFromRegister(int reg);
- virtual void WriteStackPointerToRegister(int reg);
- virtual void ReadStackPointerFromRegister(int reg);
- virtual void LoadCurrentCharacterImpl(int cp_offset, Label* on_end_of_input,
- bool check_bounds, int characters,
- int eats_at_least);
- virtual void CheckCharacter(unsigned c, Label* on_equal);
- virtual void CheckCharacterAfterAnd(unsigned c, unsigned mask,
- Label* on_equal);
- virtual void CheckCharacterGT(uc16 limit, Label* on_greater);
- virtual void CheckCharacterLT(uc16 limit, Label* on_less);
- virtual void CheckGreedyLoop(Label* on_tos_equals_current_position);
- virtual void CheckAtStart(int cp_offset, Label* on_at_start);
- virtual void CheckNotAtStart(int cp_offset, Label* on_not_at_start);
- virtual void CheckNotCharacter(unsigned c, Label* on_not_equal);
- virtual void CheckNotCharacterAfterAnd(unsigned c, unsigned mask,
- Label* on_not_equal);
- virtual void CheckNotCharacterAfterMinusAnd(uc16 c, uc16 minus, uc16 mask,
- Label* on_not_equal);
- virtual void CheckCharacterInRange(uc16 from, uc16 to, Label* on_in_range);
- virtual void CheckCharacterNotInRange(uc16 from, uc16 to,
- Label* on_not_in_range);
- virtual void CheckBitInTable(Handle<ByteArray> table, Label* on_bit_set);
- virtual void CheckNotBackReference(int start_reg, bool read_backward,
- Label* on_no_match);
- virtual void CheckNotBackReferenceIgnoreCase(int start_reg,
- bool read_backward,
- Label* on_no_match);
- virtual void IfRegisterLT(int register_index, int comparand, Label* if_lt);
- virtual void IfRegisterGE(int register_index, int comparand, Label* if_ge);
- virtual void IfRegisterEqPos(int register_index, Label* if_eq);
-
- virtual IrregexpImplementation Implementation();
- virtual Handle<HeapObject> GetCode(Handle<String> source);
-
- private:
- void Expand();
- // Code and bitmap emission.
- inline void EmitOrLink(Label* label);
- inline void Emit32(uint32_t x);
- inline void Emit16(uint32_t x);
- inline void Emit8(uint32_t x);
- inline void Emit(uint32_t bc, uint32_t arg);
- // Bytecode buffer.
- int length();
- void Copy(byte* a);
-
- // The buffer into which code and relocation info are generated.
- Vector<byte> buffer_;
- // The program counter.
- int pc_;
- Label backtrack_;
-
- int advance_current_start_;
- int advance_current_offset_;
- int advance_current_end_;
-
- // Stores jump edges emitted for the bytecode (used by
- // RegExpBytecodePeepholeOptimization).
- // Key: jump source (offset in buffer_ where jump destination is stored).
- // Value: jump destination (offset in buffer_ to jump to).
- ZoneUnorderedMap<int, int> jump_edges_;
-
- Isolate* isolate_;
-
- static const int kInvalidPC = -1;
-
- DISALLOW_IMPLICIT_CONSTRUCTORS(RegExpBytecodeGenerator);
-};
-
-} // namespace internal
-} // namespace v8
-
-#endif // V8_REGEXP_REGEXP_BYTECODE_GENERATOR_H_
diff --git a/js/src/new-regexp/regexp-bytecode-peephole.cc b/js/src/new-regexp/regexp-bytecode-peephole.cc
deleted file mode 100644
index f105a5094..000000000
--- a/js/src/new-regexp/regexp-bytecode-peephole.cc
+++ /dev/null
@@ -1,1028 +0,0 @@
-// Copyright 2019 the V8 project authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#include "new-regexp/regexp-bytecode-peephole.h"
-
-#include "new-regexp/regexp-bytecodes.h"
-
-namespace v8 {
-namespace internal {
-
-namespace {
-
-struct BytecodeArgument {
- int offset;
- int length;
-
- BytecodeArgument(int offset, int length) : offset(offset), length(length) {}
-};
-
-struct BytecodeArgumentMapping : BytecodeArgument {
- int new_length;
-
- BytecodeArgumentMapping(int offset, int length, int new_length)
- : BytecodeArgument(offset, length), new_length(new_length) {}
-};
-
-struct BytecodeArgumentCheck : BytecodeArgument {
- enum CheckType { kCheckAddress = 0, kCheckValue };
- CheckType type;
- int check_offset;
- int check_length;
-
- BytecodeArgumentCheck(int offset, int length, int check_offset)
- : BytecodeArgument(offset, length),
- type(kCheckAddress),
- check_offset(check_offset) {}
- BytecodeArgumentCheck(int offset, int length, int check_offset,
- int check_length)
- : BytecodeArgument(offset, length),
- type(kCheckValue),
- check_offset(check_offset),
- check_length(check_length) {}
-};
-
-// Trie-Node for storing bytecode sequences we want to optimize.
-class BytecodeSequenceNode {
- public:
- // Dummy bytecode used when we need to store/return a bytecode but it's not a
- // valid bytecode in the current context.
- static constexpr int kDummyBytecode = -1;
-
- BytecodeSequenceNode(int bytecode, Zone* zone);
- // Adds a new node as child of the current node if it isn't a child already.
- BytecodeSequenceNode& FollowedBy(int bytecode);
- // Marks the end of a sequence and sets optimized bytecode to replace all
- // bytecodes of the sequence with.
- BytecodeSequenceNode& ReplaceWith(int bytecode);
- // Maps arguments of bytecodes in the sequence to the optimized bytecode.
- // Order of invocation determines order of arguments in the optimized
- // bytecode.
- // Invoking this method is only allowed on nodes that mark the end of a valid
- // sequence (i.e. after ReplaceWith()).
- // bytecode_index_in_sequence: Zero-based index of the referred bytecode
- // within the sequence (e.g. the bytecode passed to CreateSequence() has
- // index 0).
- // argument_offset: Zero-based offset to the argument within the bytecode
- // (e.g. the first argument that's not packed with the bytecode has offset 4).
- // argument_byte_length: Length of the argument.
- // new_argument_byte_length: Length of the argument in the new bytecode
- // (= argument_byte_length if omitted).
- BytecodeSequenceNode& MapArgument(int bytecode_index_in_sequence,
- int argument_offset,
- int argument_byte_length,
- int new_argument_byte_length = 0);
- // Adds a check to the sequence node making it only a valid sequence when the
- // argument of the current bytecode at the specified offset matches the offset
- // to check against.
- // argument_offset: Zero-based offset to the argument within the bytecode
- // (e.g. the first argument that's not packed with the bytecode has offset 4).
- // argument_byte_length: Length of the argument.
- // check_byte_offset: Zero-based offset relative to the beginning of the
- // sequence that needs to match the value given by argument_offset. (e.g.
- // check_byte_offset 0 matches the address of the first bytecode in the
- // sequence).
- BytecodeSequenceNode& IfArgumentEqualsOffset(int argument_offset,
- int argument_byte_length,
- int check_byte_offset);
- // Adds a check to the sequence node making it only a valid sequence when the
- // argument of the current bytecode at the specified offset matches the
- // argument of another bytecode in the sequence.
- // This is similar to IfArgumentEqualsOffset, except that this method matches
- // the values of both arguments.
- BytecodeSequenceNode& IfArgumentEqualsValueAtOffset(
- int argument_offset, int argument_byte_length,
- int other_bytecode_index_in_sequence, int other_argument_offset,
- int other_argument_byte_length);
- // Marks an argument as unused.
- // All arguments that are not mapped explicitly have to be marked as unused.
- // bytecode_index_in_sequence: Zero-based index of the referred bytecode
- // within the sequence (e.g. the bytecode passed to CreateSequence() has
- // index 0).
- // argument_offset: Zero-based offset to the argument within the bytecode
- // (e.g. the first argument that's not packed with the bytecode has offset 4).
- // argument_byte_length: Length of the argument.
- BytecodeSequenceNode& IgnoreArgument(int bytecode_index_in_sequence,
- int argument_offset,
- int argument_byte_length);
- // Checks if the current node is valid for the sequence. I.e. all conditions
- // set by IfArgumentEqualsOffset and IfArgumentEquals are fulfilled by this
- // node for the actual bytecode sequence.
- bool CheckArguments(const byte* bytecode, int pc);
- // Returns whether this node marks the end of a valid sequence (i.e. can be
- // replaced with an optimized bytecode).
- bool IsSequence() const;
- // Returns the length of the sequence in bytes.
- int SequenceLength() const;
- // Returns the optimized bytecode for the node or kDummyBytecode if it is not
- // the end of a valid sequence.
- int OptimizedBytecode() const;
- // Returns the child of the current node matching the given bytecode or
- // nullptr if no such child is found.
- BytecodeSequenceNode* Find(int bytecode) const;
- // Returns number of arguments mapped to the current node.
- // Invoking this method is only allowed on nodes that mark the end of a valid
- // sequence (i.e. if IsSequence())
- size_t ArgumentSize() const;
- // Returns the argument-mapping of the argument at index.
- // Invoking this method is only allowed on nodes that mark the end of a valid
- // sequence (i.e. if IsSequence())
- BytecodeArgumentMapping ArgumentMapping(size_t index) const;
- // Returns an iterator to begin of ignored arguments.
- // Invoking this method is only allowed on nodes that mark the end of a valid
- // sequence (i.e. if IsSequence())
- ZoneLinkedList<BytecodeArgument>::iterator ArgumentIgnoredBegin() const;
- // Returns an iterator to end of ignored arguments.
- // Invoking this method is only allowed on nodes that mark the end of a valid
- // sequence (i.e. if IsSequence())
- ZoneLinkedList<BytecodeArgument>::iterator ArgumentIgnoredEnd() const;
- // Returns whether the current node has ignored argument or not.
- bool HasIgnoredArguments() const;
-
- private:
- // Returns a node in the sequence specified by its index within the sequence.
- BytecodeSequenceNode& GetNodeByIndexInSequence(int index_in_sequence);
- Zone* zone() const;
-
- int bytecode_;
- int bytecode_replacement_;
- int index_in_sequence_;
- int start_offset_;
- BytecodeSequenceNode* parent_;
- ZoneUnorderedMap<int, BytecodeSequenceNode*> children_;
- ZoneVector<BytecodeArgumentMapping>* argument_mapping_;
- ZoneLinkedList<BytecodeArgumentCheck>* argument_check_;
- ZoneLinkedList<BytecodeArgument>* argument_ignored_;
-
- Zone* zone_;
-};
-
-class RegExpBytecodePeephole {
- public:
- RegExpBytecodePeephole(Zone* zone, size_t buffer_size,
- const ZoneUnorderedMap<int, int>& jump_edges);
-
- // Parses bytecode and fills the internal buffer with the potentially
- // optimized bytecode. Returns true when optimizations were performed, false
- // otherwise.
- bool OptimizeBytecode(const byte* bytecode, int length);
- // Copies the internal bytecode buffer to another buffer. The caller is
- // responsible for allocating/freeing the memory.
- void CopyOptimizedBytecode(byte* to_address) const;
- int Length() const;
-
- private:
- // Sets up all sequences that are going to be used.
- void DefineStandardSequences();
- // Starts a new bytecode sequence.
- BytecodeSequenceNode& CreateSequence(int bytecode);
- // Checks for optimization candidates at pc and emits optimized bytecode to
- // the internal buffer. Returns the length of replaced bytecodes in bytes.
- int TryOptimizeSequence(const byte* bytecode, int start_pc);
- // Emits optimized bytecode to the internal buffer. start_pc points to the
- // start of the sequence in bytecode and last_node is the last
- // BytecodeSequenceNode of the matching sequence found.
- void EmitOptimization(int start_pc, const byte* bytecode,
- const BytecodeSequenceNode& last_node);
- // Adds a relative jump source fixup at pos.
- // Jump source fixups are used to find offsets in the new bytecode that
- // contain jump sources.
- void AddJumpSourceFixup(int fixup, int pos);
- // Adds a relative jump destination fixup at pos.
- // Jump destination fixups are used to find offsets in the new bytecode that
- // can be jumped to.
- void AddJumpDestinationFixup(int fixup, int pos);
- // Sets an absolute jump destination fixup at pos.
- void SetJumpDestinationFixup(int fixup, int pos);
- // Prepare internal structures used to fixup jumps.
- void PrepareJumpStructures(const ZoneUnorderedMap<int, int>& jump_edges);
- // Updates all jump targets in the new bytecode.
- void FixJumps();
- // Update a single jump.
- void FixJump(int jump_source, int jump_destination);
- void AddSentinelFixups(int pos);
- template <typename T>
- void EmitValue(T value);
- template <typename T>
- void OverwriteValue(int offset, T value);
- void CopyRangeToOutput(const byte* orig_bytecode, int start, int length);
- void SetRange(byte value, int count);
- void EmitArgument(int start_pc, const byte* bytecode,
- BytecodeArgumentMapping arg);
- int pc() const;
- Zone* zone() const;
-
- ZoneVector<byte> optimized_bytecode_buffer_;
- BytecodeSequenceNode* sequences_;
- // Jumps used in old bytecode.
- // Key: Jump source (offset where destination is stored in old bytecode)
- // Value: Destination
- ZoneMap<int, int> jump_edges_;
- // Jumps used in new bytecode.
- // Key: Jump source (offset where destination is stored in new bytecode)
- // Value: Destination
- ZoneMap<int, int> jump_edges_mapped_;
- // Number of times a jump destination is used within the bytecode.
- // Key: Jump destination (offset in old bytecode).
- // Value: Number of times jump destination is used.
- ZoneMap<int, int> jump_usage_counts_;
- // Maps offsets in old bytecode to fixups of sources (delta to new bytecode).
- // Key: Offset in old bytecode from where the fixup is valid.
- // Value: Delta to map jump source from old bytecode to new bytecode in bytes.
- ZoneMap<int, int> jump_source_fixups_;
- // Maps offsets in old bytecode to fixups of destinations (delta to new
- // bytecode).
- // Key: Offset in old bytecode from where the fixup is valid.
- // Value: Delta to map jump destinations from old bytecode to new bytecode in
- // bytes.
- ZoneMap<int, int> jump_destination_fixups_;
-
- Zone* zone_;
-
- DISALLOW_IMPLICIT_CONSTRUCTORS(RegExpBytecodePeephole);
-};
-
-template <typename T>
-T GetValue(const byte* buffer, int pos) {
- DCHECK(IsAligned(reinterpret_cast<Address>(buffer + pos), alignof(T)));
- return *reinterpret_cast<const T*>(buffer + pos);
-}
-
-int32_t GetArgumentValue(const byte* bytecode, int offset, int length) {
- switch (length) {
- case 1:
- return GetValue<byte>(bytecode, offset);
- break;
- case 2:
- return GetValue<int16_t>(bytecode, offset);
- break;
- case 4:
- return GetValue<int32_t>(bytecode, offset);
- break;
- default:
- UNREACHABLE();
- }
-}
-
-BytecodeSequenceNode::BytecodeSequenceNode(int bytecode, Zone* zone)
- : bytecode_(bytecode),
- bytecode_replacement_(kDummyBytecode),
- index_in_sequence_(0),
- start_offset_(0),
- parent_(nullptr),
- children_(ZoneUnorderedMap<int, BytecodeSequenceNode*>(zone)),
- argument_mapping_(new (zone->New(sizeof(*argument_mapping_)))
- ZoneVector<BytecodeArgumentMapping>(zone)),
- argument_check_(new (zone->New(sizeof(*argument_check_)))
- ZoneLinkedList<BytecodeArgumentCheck>(zone)),
- argument_ignored_(new (zone->New(sizeof(*argument_ignored_)))
- ZoneLinkedList<BytecodeArgument>(zone)),
- zone_(zone) {}
-
-BytecodeSequenceNode& BytecodeSequenceNode::FollowedBy(int bytecode) {
- DCHECK(0 <= bytecode && bytecode < kRegExpBytecodeCount);
-
- if (children_.find(bytecode) == children_.end()) {
- BytecodeSequenceNode* new_node =
- new (zone()->New(sizeof(BytecodeSequenceNode)))
- BytecodeSequenceNode(bytecode, zone());
- // If node is not the first in the sequence, set offsets and parent.
- if (bytecode_ != kDummyBytecode) {
- new_node->start_offset_ = start_offset_ + RegExpBytecodeLength(bytecode_);
- new_node->index_in_sequence_ = index_in_sequence_ + 1;
- new_node->parent_ = this;
- }
- children_[bytecode] = new_node;
- }
-
- return *children_[bytecode];
-}
-
-BytecodeSequenceNode& BytecodeSequenceNode::ReplaceWith(int bytecode) {
- DCHECK(0 <= bytecode && bytecode < kRegExpBytecodeCount);
-
- bytecode_replacement_ = bytecode;
-
- return *this;
-}
-
-BytecodeSequenceNode& BytecodeSequenceNode::MapArgument(
- int bytecode_index_in_sequence, int argument_offset,
- int argument_byte_length, int new_argument_byte_length) {
- DCHECK(IsSequence());
- DCHECK_LE(bytecode_index_in_sequence, index_in_sequence_);
-
- BytecodeSequenceNode& ref_node =
- GetNodeByIndexInSequence(bytecode_index_in_sequence);
- DCHECK_LT(argument_offset, RegExpBytecodeLength(ref_node.bytecode_));
-
- int absolute_offset = ref_node.start_offset_ + argument_offset;
- if (new_argument_byte_length == 0) {
- new_argument_byte_length = argument_byte_length;
- }
-
- argument_mapping_->push_back(BytecodeArgumentMapping{
- absolute_offset, argument_byte_length, new_argument_byte_length});
-
- return *this;
-}
-
-BytecodeSequenceNode& BytecodeSequenceNode::IfArgumentEqualsOffset(
- int argument_offset, int argument_byte_length, int check_byte_offset) {
- DCHECK_LT(argument_offset, RegExpBytecodeLength(bytecode_));
- DCHECK(argument_byte_length == 1 || argument_byte_length == 2 ||
- argument_byte_length == 4);
-
- int absolute_offset = start_offset_ + argument_offset;
-
- argument_check_->push_back(BytecodeArgumentCheck{
- absolute_offset, argument_byte_length, check_byte_offset});
-
- return *this;
-}
-
-BytecodeSequenceNode& BytecodeSequenceNode::IfArgumentEqualsValueAtOffset(
- int argument_offset, int argument_byte_length,
- int other_bytecode_index_in_sequence, int other_argument_offset,
- int other_argument_byte_length) {
- DCHECK_LT(argument_offset, RegExpBytecodeLength(bytecode_));
- DCHECK_LE(other_bytecode_index_in_sequence, index_in_sequence_);
- DCHECK_EQ(argument_byte_length, other_argument_byte_length);
-
- BytecodeSequenceNode& ref_node =
- GetNodeByIndexInSequence(other_bytecode_index_in_sequence);
- DCHECK_LT(other_argument_offset, RegExpBytecodeLength(ref_node.bytecode_));
-
- int absolute_offset = start_offset_ + argument_offset;
- int other_absolute_offset = ref_node.start_offset_ + other_argument_offset;
-
- argument_check_->push_back(
- BytecodeArgumentCheck{absolute_offset, argument_byte_length,
- other_absolute_offset, other_argument_byte_length});
-
- return *this;
-}
-
-BytecodeSequenceNode& BytecodeSequenceNode::IgnoreArgument(
- int bytecode_index_in_sequence, int argument_offset,
- int argument_byte_length) {
- DCHECK(IsSequence());
- DCHECK_LE(bytecode_index_in_sequence, index_in_sequence_);
-
- BytecodeSequenceNode& ref_node =
- GetNodeByIndexInSequence(bytecode_index_in_sequence);
- DCHECK_LT(argument_offset, RegExpBytecodeLength(ref_node.bytecode_));
-
- int absolute_offset = ref_node.start_offset_ + argument_offset;
-
- argument_ignored_->push_back(
- BytecodeArgument{absolute_offset, argument_byte_length});
-
- return *this;
-}
-
-bool BytecodeSequenceNode::CheckArguments(const byte* bytecode, int pc) {
- bool is_valid = true;
- for (auto check_iter = argument_check_->begin();
- check_iter != argument_check_->end() && is_valid; check_iter++) {
- auto value =
- GetArgumentValue(bytecode, pc + check_iter->offset, check_iter->length);
- if (check_iter->type == BytecodeArgumentCheck::kCheckAddress) {
- is_valid &= value == pc + check_iter->check_offset;
- } else if (check_iter->type == BytecodeArgumentCheck::kCheckValue) {
- auto other_value = GetArgumentValue(
- bytecode, pc + check_iter->check_offset, check_iter->check_length);
- is_valid &= value == other_value;
- } else {
- UNREACHABLE();
- }
- }
- return is_valid;
-}
-
-bool BytecodeSequenceNode::IsSequence() const {
- return bytecode_replacement_ != kDummyBytecode;
-}
-
-int BytecodeSequenceNode::SequenceLength() const {
- return start_offset_ + RegExpBytecodeLength(bytecode_);
-}
-
-int BytecodeSequenceNode::OptimizedBytecode() const {
- return bytecode_replacement_;
-}
-
-BytecodeSequenceNode* BytecodeSequenceNode::Find(int bytecode) const {
- auto found = children_.find(bytecode);
- if (found == children_.end()) return nullptr;
- return found->second;
-}
-
-size_t BytecodeSequenceNode::ArgumentSize() const {
- DCHECK(IsSequence());
- return argument_mapping_->size();
-}
-
-BytecodeArgumentMapping BytecodeSequenceNode::ArgumentMapping(
- size_t index) const {
- DCHECK(IsSequence());
- DCHECK(argument_mapping_ != nullptr);
- DCHECK_LT(index, argument_mapping_->size());
-
- return argument_mapping_->at(index);
-}
-
-ZoneLinkedList<BytecodeArgument>::iterator
-BytecodeSequenceNode::ArgumentIgnoredBegin() const {
- DCHECK(IsSequence());
- DCHECK(argument_ignored_ != nullptr);
- return argument_ignored_->begin();
-}
-
-ZoneLinkedList<BytecodeArgument>::iterator
-BytecodeSequenceNode::ArgumentIgnoredEnd() const {
- DCHECK(IsSequence());
- DCHECK(argument_ignored_ != nullptr);
- return argument_ignored_->end();
-}
-
-bool BytecodeSequenceNode::HasIgnoredArguments() const {
- return argument_ignored_ != nullptr;
-}
-
-BytecodeSequenceNode& BytecodeSequenceNode::GetNodeByIndexInSequence(
- int index_in_sequence) {
- DCHECK_LE(index_in_sequence, index_in_sequence_);
-
- if (index_in_sequence < index_in_sequence_) {
- DCHECK(parent_ != nullptr);
- return parent_->GetNodeByIndexInSequence(index_in_sequence);
- } else {
- return *this;
- }
-}
-
-Zone* BytecodeSequenceNode::zone() const { return zone_; }
-
-RegExpBytecodePeephole::RegExpBytecodePeephole(
- Zone* zone, size_t buffer_size,
- const ZoneUnorderedMap<int, int>& jump_edges)
- : optimized_bytecode_buffer_(zone),
- sequences_(new (zone->New(sizeof(*sequences_))) BytecodeSequenceNode(
- BytecodeSequenceNode::kDummyBytecode, zone)),
- jump_edges_(zone),
- jump_edges_mapped_(zone),
- jump_usage_counts_(zone),
- jump_source_fixups_(zone),
- jump_destination_fixups_(zone),
- zone_(zone) {
- optimized_bytecode_buffer_.reserve(buffer_size);
- PrepareJumpStructures(jump_edges);
- DefineStandardSequences();
- // Sentinel fixups at beginning of bytecode (position -1) so we don't have to
- // check for end of iterator inside the fixup loop.
- // In general fixups are deltas of original offsets of jump
- // sources/destinations (in the old bytecode) to find them in the new
- // bytecode. All jump targets are fixed after the new bytecode is fully
- // emitted in the internal buffer.
- AddSentinelFixups(-1);
- // Sentinel fixups at end of (old) bytecode so we don't have to check for
- // end of iterator inside the fixup loop.
- DCHECK_LE(buffer_size, std::numeric_limits<int>::max());
- AddSentinelFixups(static_cast<int>(buffer_size));
-}
-
-void RegExpBytecodePeephole::DefineStandardSequences() {
- // Commonly used sequences can be found by creating regexp bytecode traces
- // (--trace-regexp-bytecodes) and using v8/tools/regexp-sequences.py.
- CreateSequence(BC_LOAD_CURRENT_CHAR)
- .FollowedBy(BC_CHECK_BIT_IN_TABLE)
- .FollowedBy(BC_ADVANCE_CP_AND_GOTO)
- // Sequence is only valid if the jump target of ADVANCE_CP_AND_GOTO is the
- // first bytecode in this sequence.
- .IfArgumentEqualsOffset(4, 4, 0)
- .ReplaceWith(BC_SKIP_UNTIL_BIT_IN_TABLE)
- .MapArgument(0, 1, 3) // load offset
- .MapArgument(2, 1, 3, 4) // advance by
- .MapArgument(1, 8, 16) // bit table
- .MapArgument(1, 4, 4) // goto when match
- .MapArgument(0, 4, 4) // goto on failure
- .IgnoreArgument(2, 4, 4); // loop jump
-
- CreateSequence(BC_CHECK_CURRENT_POSITION)
- .FollowedBy(BC_LOAD_CURRENT_CHAR_UNCHECKED)
- .FollowedBy(BC_CHECK_CHAR)
- .FollowedBy(BC_ADVANCE_CP_AND_GOTO)
- // Sequence is only valid if the jump target of ADVANCE_CP_AND_GOTO is the
- // first bytecode in this sequence.
- .IfArgumentEqualsOffset(4, 4, 0)
- .ReplaceWith(BC_SKIP_UNTIL_CHAR_POS_CHECKED)
- .MapArgument(1, 1, 3) // load offset
- .MapArgument(3, 1, 3, 2) // advance_by
- .MapArgument(2, 1, 3, 2) // c
- .MapArgument(0, 1, 3, 4) // eats at least
- .MapArgument(2, 4, 4) // goto when match
- .MapArgument(0, 4, 4) // goto on failure
- .IgnoreArgument(3, 4, 4); // loop jump
-
- CreateSequence(BC_CHECK_CURRENT_POSITION)
- .FollowedBy(BC_LOAD_CURRENT_CHAR_UNCHECKED)
- .FollowedBy(BC_AND_CHECK_CHAR)
- .FollowedBy(BC_ADVANCE_CP_AND_GOTO)
- // Sequence is only valid if the jump target of ADVANCE_CP_AND_GOTO is the
- // first bytecode in this sequence.
- .IfArgumentEqualsOffset(4, 4, 0)
- .ReplaceWith(BC_SKIP_UNTIL_CHAR_AND)
- .MapArgument(1, 1, 3) // load offset
- .MapArgument(3, 1, 3, 2) // advance_by
- .MapArgument(2, 1, 3, 2) // c
- .MapArgument(2, 4, 4) // mask
- .MapArgument(0, 1, 3, 4) // eats at least
- .MapArgument(2, 8, 4) // goto when match
- .MapArgument(0, 4, 4) // goto on failure
- .IgnoreArgument(3, 4, 4); // loop jump
-
- // TODO(pthier): It might make sense for short sequences like this one to only
- // optimize them if the resulting optimization is not longer than the current
- // one. This could be the case if there are jumps inside the sequence and we
- // have to replicate parts of the sequence. A method to mark such sequences
- // might be useful.
- CreateSequence(BC_LOAD_CURRENT_CHAR)
- .FollowedBy(BC_CHECK_CHAR)
- .FollowedBy(BC_ADVANCE_CP_AND_GOTO)
- // Sequence is only valid if the jump target of ADVANCE_CP_AND_GOTO is the
- // first bytecode in this sequence.
- .IfArgumentEqualsOffset(4, 4, 0)
- .ReplaceWith(BC_SKIP_UNTIL_CHAR)
- .MapArgument(0, 1, 3) // load offset
- .MapArgument(2, 1, 3, 2) // advance by
- .MapArgument(1, 1, 3, 2) // character
- .MapArgument(1, 4, 4) // goto when match
- .MapArgument(0, 4, 4) // goto on failure
- .IgnoreArgument(2, 4, 4); // loop jump
-
- CreateSequence(BC_LOAD_CURRENT_CHAR)
- .FollowedBy(BC_CHECK_CHAR)
- .FollowedBy(BC_CHECK_CHAR)
- // Sequence is only valid if the jump targets of both CHECK_CHAR bytecodes
- // are equal.
- .IfArgumentEqualsValueAtOffset(4, 4, 1, 4, 4)
- .FollowedBy(BC_ADVANCE_CP_AND_GOTO)
- // Sequence is only valid if the jump target of ADVANCE_CP_AND_GOTO is the
- // first bytecode in this sequence.
- .IfArgumentEqualsOffset(4, 4, 0)
- .ReplaceWith(BC_SKIP_UNTIL_CHAR_OR_CHAR)
- .MapArgument(0, 1, 3) // load offset
- .MapArgument(3, 1, 3, 4) // advance by
- .MapArgument(1, 1, 3, 2) // character 1
- .MapArgument(2, 1, 3, 2) // character 2
- .MapArgument(1, 4, 4) // goto when match
- .MapArgument(0, 4, 4) // goto on failure
- .IgnoreArgument(2, 4, 4) // goto when match 2
- .IgnoreArgument(3, 4, 4); // loop jump
-
- CreateSequence(BC_LOAD_CURRENT_CHAR)
- .FollowedBy(BC_CHECK_GT)
- // Sequence is only valid if the jump target of CHECK_GT is the first
- // bytecode AFTER the whole sequence.
- .IfArgumentEqualsOffset(4, 4, 56)
- .FollowedBy(BC_CHECK_BIT_IN_TABLE)
- // Sequence is only valid if the jump target of CHECK_BIT_IN_TABLE is
- // the ADVANCE_CP_AND_GOTO bytecode at the end of the sequence.
- .IfArgumentEqualsOffset(4, 4, 48)
- .FollowedBy(BC_GOTO)
- // Sequence is only valid if the jump target of GOTO is the same as the
- // jump target of CHECK_GT (i.e. both jump to the first bytecode AFTER the
- // whole sequence.
- .IfArgumentEqualsValueAtOffset(4, 4, 1, 4, 4)
- .FollowedBy(BC_ADVANCE_CP_AND_GOTO)
- // Sequence is only valid if the jump target of ADVANCE_CP_AND_GOTO is the
- // first bytecode in this sequence.
- .IfArgumentEqualsOffset(4, 4, 0)
- .ReplaceWith(BC_SKIP_UNTIL_GT_OR_NOT_BIT_IN_TABLE)
- .MapArgument(0, 1, 3) // load offset
- .MapArgument(4, 1, 3, 2) // advance by
- .MapArgument(1, 1, 3, 2) // character
- .MapArgument(2, 8, 16) // bit table
- .MapArgument(1, 4, 4) // goto when match
- .MapArgument(0, 4, 4) // goto on failure
- .IgnoreArgument(2, 4, 4) // indirect loop jump
- .IgnoreArgument(3, 4, 4) // jump out of loop
- .IgnoreArgument(4, 4, 4); // loop jump
-}
-
-bool RegExpBytecodePeephole::OptimizeBytecode(const byte* bytecode,
- int length) {
- int old_pc = 0;
- bool did_optimize = false;
-
- while (old_pc < length) {
- int replaced_len = TryOptimizeSequence(bytecode, old_pc);
- if (replaced_len > 0) {
- old_pc += replaced_len;
- did_optimize = true;
- } else {
- int bc = bytecode[old_pc];
- int bc_len = RegExpBytecodeLength(bc);
- CopyRangeToOutput(bytecode, old_pc, bc_len);
- old_pc += bc_len;
- }
- }
-
- if (did_optimize) {
- FixJumps();
- }
-
- return did_optimize;
-}
-
-void RegExpBytecodePeephole::CopyOptimizedBytecode(byte* to_address) const {
- MemCopy(to_address, &(*optimized_bytecode_buffer_.begin()), Length());
-}
-
-int RegExpBytecodePeephole::Length() const { return pc(); }
-
-BytecodeSequenceNode& RegExpBytecodePeephole::CreateSequence(int bytecode) {
- DCHECK(sequences_ != nullptr);
- DCHECK(0 <= bytecode && bytecode < kRegExpBytecodeCount);
-
- return sequences_->FollowedBy(bytecode);
-}
-
-int RegExpBytecodePeephole::TryOptimizeSequence(const byte* bytecode,
- int start_pc) {
- BytecodeSequenceNode* seq_node = sequences_;
- BytecodeSequenceNode* valid_seq_end = nullptr;
-
- int current_pc = start_pc;
-
- // Check for the longest valid sequence matching any of the pre-defined
- // sequences in the Trie data structure.
- while ((seq_node = seq_node->Find(bytecode[current_pc]))) {
- if (!seq_node->CheckArguments(bytecode, start_pc)) {
- break;
- }
- if (seq_node->IsSequence()) {
- valid_seq_end = seq_node;
- }
- current_pc += RegExpBytecodeLength(bytecode[current_pc]);
- }
-
- if (valid_seq_end) {
- EmitOptimization(start_pc, bytecode, *valid_seq_end);
- return valid_seq_end->SequenceLength();
- }
-
- return 0;
-}
-
-void RegExpBytecodePeephole::EmitOptimization(
- int start_pc, const byte* bytecode, const BytecodeSequenceNode& last_node) {
-#ifdef DEBUG
- int optimized_start_pc = pc();
-#endif
- // Jump sources that are mapped or marked as unused will be deleted at the end
- // of this method. We don't delete them immediately as we might need the
- // information when we have to preserve bytecodes at the end.
- // TODO(pthier): Replace with a stack-allocated data structure.
- ZoneLinkedList<int> delete_jumps = ZoneLinkedList<int>(zone());
-
- uint32_t bc = last_node.OptimizedBytecode();
- EmitValue(bc);
-
- for (size_t arg = 0; arg < last_node.ArgumentSize(); arg++) {
- BytecodeArgumentMapping arg_map = last_node.ArgumentMapping(arg);
- int arg_pos = start_pc + arg_map.offset;
- // If we map any jump source we mark the old source for deletion and insert
- // a new jump.
- auto jump_edge_iter = jump_edges_.find(arg_pos);
- if (jump_edge_iter != jump_edges_.end()) {
- int jump_source = jump_edge_iter->first;
- int jump_destination = jump_edge_iter->second;
- // Add new jump edge add current position.
- jump_edges_mapped_.emplace(Length(), jump_destination);
- // Mark old jump edge for deletion.
- delete_jumps.push_back(jump_source);
- // Decrement usage count of jump destination.
- auto jump_count_iter = jump_usage_counts_.find(jump_destination);
- DCHECK(jump_count_iter != jump_usage_counts_.end());
- int& usage_count = jump_count_iter->second;
- --usage_count;
- }
- // TODO(pthier): DCHECK that mapped arguments are never sources of jumps
- // to destinations inside the sequence.
- EmitArgument(start_pc, bytecode, arg_map);
- }
- DCHECK_EQ(pc(), optimized_start_pc +
- RegExpBytecodeLength(last_node.OptimizedBytecode()));
-
- // Remove jumps from arguments we ignore.
- if (last_node.HasIgnoredArguments()) {
- for (auto ignored_arg = last_node.ArgumentIgnoredBegin();
- ignored_arg != last_node.ArgumentIgnoredEnd(); ignored_arg++) {
- auto jump_edge_iter = jump_edges_.find(start_pc + ignored_arg->offset);
- if (jump_edge_iter != jump_edges_.end()) {
- int jump_source = jump_edge_iter->first;
- int jump_destination = jump_edge_iter->second;
- // Mark old jump edge for deletion.
- delete_jumps.push_back(jump_source);
- // Decrement usage count of jump destination.
- auto jump_count_iter = jump_usage_counts_.find(jump_destination);
- DCHECK(jump_count_iter != jump_usage_counts_.end());
- int& usage_count = jump_count_iter->second;
- --usage_count;
- }
- }
- }
-
- int fixup_length = RegExpBytecodeLength(bc) - last_node.SequenceLength();
-
- // Check if there are any jumps inside the old sequence.
- // If so we have to keep the bytecodes that are jumped to around.
- auto jump_destination_candidate = jump_usage_counts_.upper_bound(start_pc);
- int jump_candidate_destination = jump_destination_candidate->first;
- int jump_candidate_count = jump_destination_candidate->second;
- // Jump destinations only jumped to from inside the sequence will be ignored.
- while (jump_destination_candidate != jump_usage_counts_.end() &&
- jump_candidate_count == 0) {
- ++jump_destination_candidate;
- jump_candidate_destination = jump_destination_candidate->first;
- jump_candidate_count = jump_destination_candidate->second;
- }
-
- int preserve_from = start_pc + last_node.SequenceLength();
- if (jump_destination_candidate != jump_usage_counts_.end() &&
- jump_candidate_destination < start_pc + last_node.SequenceLength()) {
- preserve_from = jump_candidate_destination;
- // Check if any jump in the sequence we are preserving has a jump
- // destination inside the optimized sequence before the current position we
- // want to preserve. If so we have to preserve all bytecodes starting at
- // this jump destination.
- for (auto jump_iter = jump_edges_.lower_bound(preserve_from);
- jump_iter != jump_edges_.end() &&
- jump_iter->first /* jump source */ <
- start_pc + last_node.SequenceLength();
- ++jump_iter) {
- int jump_destination = jump_iter->second;
- if (jump_destination > start_pc && jump_destination < preserve_from) {
- preserve_from = jump_destination;
- }
- }
-
- // We preserve everything to the end of the sequence. This is conservative
- // since it would be enough to preserve all bytecudes up to an unconditional
- // jump.
- int preserve_length = start_pc + last_node.SequenceLength() - preserve_from;
- fixup_length += preserve_length;
- // Jumps after the start of the preserved sequence need fixup.
- AddJumpSourceFixup(fixup_length,
- start_pc + last_node.SequenceLength() - preserve_length);
- // All jump targets after the start of the optimized sequence need to be
- // fixed relative to the length of the optimized sequence including
- // bytecodes we preserved.
- AddJumpDestinationFixup(fixup_length, start_pc + 1);
- // Jumps to the sequence we preserved need absolute fixup as they could
- // occur before or after the sequence.
- SetJumpDestinationFixup(pc() - preserve_from, preserve_from);
- CopyRangeToOutput(bytecode, preserve_from, preserve_length);
- } else {
- AddJumpDestinationFixup(fixup_length, start_pc + 1);
- // Jumps after the end of the old sequence need fixup.
- AddJumpSourceFixup(fixup_length, start_pc + last_node.SequenceLength());
- }
-
- // Delete jumps we definitely don't need anymore
- for (int del : delete_jumps) {
- if (del < preserve_from) {
- jump_edges_.erase(del);
- }
- }
-}
-
-void RegExpBytecodePeephole::AddJumpSourceFixup(int fixup, int pos) {
- auto previous_fixup = jump_source_fixups_.lower_bound(pos);
- DCHECK(previous_fixup != jump_source_fixups_.end());
- DCHECK(previous_fixup != jump_source_fixups_.begin());
-
- int previous_fixup_value = (--previous_fixup)->second;
- jump_source_fixups_[pos] = previous_fixup_value + fixup;
-}
-
-void RegExpBytecodePeephole::AddJumpDestinationFixup(int fixup, int pos) {
- auto previous_fixup = jump_destination_fixups_.lower_bound(pos);
- DCHECK(previous_fixup != jump_destination_fixups_.end());
- DCHECK(previous_fixup != jump_destination_fixups_.begin());
-
- int previous_fixup_value = (--previous_fixup)->second;
- jump_destination_fixups_[pos] = previous_fixup_value + fixup;
-}
-
-void RegExpBytecodePeephole::SetJumpDestinationFixup(int fixup, int pos) {
- auto previous_fixup = jump_destination_fixups_.lower_bound(pos);
- DCHECK(previous_fixup != jump_destination_fixups_.end());
- DCHECK(previous_fixup != jump_destination_fixups_.begin());
-
- int previous_fixup_value = (--previous_fixup)->second;
- jump_destination_fixups_.emplace(pos, fixup);
- jump_destination_fixups_.emplace(pos + 1, previous_fixup_value);
-}
-
-void RegExpBytecodePeephole::PrepareJumpStructures(
- const ZoneUnorderedMap<int, int>& jump_edges) {
- for (auto jump_edge : jump_edges) {
- int jump_source = jump_edge.first;
- int jump_destination = jump_edge.second;
-
- jump_edges_.emplace(jump_source, jump_destination);
- jump_usage_counts_[jump_destination]++;
- }
-}
-
-void RegExpBytecodePeephole::FixJumps() {
- int position_fixup = 0;
- // Next position where fixup changes.
- auto next_source_fixup = jump_source_fixups_.lower_bound(0);
- int next_source_fixup_offset = next_source_fixup->first;
- int next_source_fixup_value = next_source_fixup->second;
-
- for (auto jump_edge : jump_edges_) {
- int jump_source = jump_edge.first;
- int jump_destination = jump_edge.second;
- while (jump_source >= next_source_fixup_offset) {
- position_fixup = next_source_fixup_value;
- ++next_source_fixup;
- next_source_fixup_offset = next_source_fixup->first;
- next_source_fixup_value = next_source_fixup->second;
- }
- jump_source += position_fixup;
-
- FixJump(jump_source, jump_destination);
- }
-
- // Mapped jump edges don't need source fixups, as the position already is an
- // offset in the new bytecode.
- for (auto jump_edge : jump_edges_mapped_) {
- int jump_source = jump_edge.first;
- int jump_destination = jump_edge.second;
-
- FixJump(jump_source, jump_destination);
- }
-}
-
-void RegExpBytecodePeephole::FixJump(int jump_source, int jump_destination) {
- int fixed_jump_destination =
- jump_destination +
- (--jump_destination_fixups_.upper_bound(jump_destination))->second;
- DCHECK_LT(fixed_jump_destination, Length());
-#ifdef DEBUG
- // TODO(pthier): This check could be better if we track the bytecodes
- // actually used and check if we jump to one of them.
- byte jump_bc = optimized_bytecode_buffer_[fixed_jump_destination];
- DCHECK_GT(jump_bc, 0);
- DCHECK_LT(jump_bc, kRegExpBytecodeCount);
-#endif
-
- if (jump_destination != fixed_jump_destination) {
- OverwriteValue<uint32_t>(jump_source, fixed_jump_destination);
- }
-}
-
-void RegExpBytecodePeephole::AddSentinelFixups(int pos) {
- jump_source_fixups_.emplace(pos, 0);
- jump_destination_fixups_.emplace(pos, 0);
-}
-
-template <typename T>
-void RegExpBytecodePeephole::EmitValue(T value) {
- DCHECK(optimized_bytecode_buffer_.begin() + pc() ==
- optimized_bytecode_buffer_.end());
- byte* value_byte_iter = reinterpret_cast<byte*>(&value);
- optimized_bytecode_buffer_.insert(optimized_bytecode_buffer_.end(),
- value_byte_iter,
- value_byte_iter + sizeof(T));
-}
-
-template <typename T>
-void RegExpBytecodePeephole::OverwriteValue(int offset, T value) {
- byte* value_byte_iter = reinterpret_cast<byte*>(&value);
- byte* value_byte_iter_end = value_byte_iter + sizeof(T);
- while (value_byte_iter < value_byte_iter_end) {
- optimized_bytecode_buffer_[offset++] = *value_byte_iter++;
- }
-}
-
-void RegExpBytecodePeephole::CopyRangeToOutput(const byte* orig_bytecode,
- int start, int length) {
- DCHECK(optimized_bytecode_buffer_.begin() + pc() ==
- optimized_bytecode_buffer_.end());
- optimized_bytecode_buffer_.insert(optimized_bytecode_buffer_.end(),
- orig_bytecode + start,
- orig_bytecode + start + length);
-}
-
-void RegExpBytecodePeephole::SetRange(byte value, int count) {
- DCHECK(optimized_bytecode_buffer_.begin() + pc() ==
- optimized_bytecode_buffer_.end());
- optimized_bytecode_buffer_.insert(optimized_bytecode_buffer_.end(), count,
- value);
-}
-
-void RegExpBytecodePeephole::EmitArgument(int start_pc, const byte* bytecode,
- BytecodeArgumentMapping arg) {
- int arg_pos = start_pc + arg.offset;
- switch (arg.length) {
- case 1:
- DCHECK_EQ(arg.new_length, arg.length);
- EmitValue(GetValue<byte>(bytecode, arg_pos));
- break;
- case 2:
- DCHECK_EQ(arg.new_length, arg.length);
- EmitValue(GetValue<uint16_t>(bytecode, arg_pos));
- break;
- case 3: {
- // Length 3 only occurs in 'packed' arguments where the lowermost byte is
- // the current bytecode, and the remaining 3 bytes are the packed value.
- //
- // We load 4 bytes from position - 1 and shift out the bytecode.
-#ifdef V8_TARGET_BIG_ENDIAN
- UNIMPLEMENTED();
- int32_t val = 0;
-#else
- int32_t val = GetValue<int32_t>(bytecode, arg_pos - 1) >> kBitsPerByte;
-#endif // V8_TARGET_BIG_ENDIAN
-
- switch (arg.new_length) {
- case 2:
- EmitValue<uint16_t>(val);
- break;
- case 3: {
- // Pack with previously emitted value.
- auto prev_val =
- GetValue<int32_t>(&(*optimized_bytecode_buffer_.begin()),
- Length() - sizeof(uint32_t));
-#ifdef V8_TARGET_BIG_ENDIAN
- UNIMPLEMENTED();
- USE(prev_val);
-#else
- DCHECK_EQ(prev_val & 0xFFFFFF00, 0);
- OverwriteValue<uint32_t>(
- pc() - sizeof(uint32_t),
- (static_cast<uint32_t>(val) << 8) | (prev_val & 0xFF));
-#endif // V8_TARGET_BIG_ENDIAN
- break;
- }
- case 4:
- EmitValue<uint32_t>(val);
- break;
- }
- break;
- }
- case 4:
- DCHECK_EQ(arg.new_length, arg.length);
- EmitValue(GetValue<uint32_t>(bytecode, arg_pos));
- break;
- case 8:
- DCHECK_EQ(arg.new_length, arg.length);
- EmitValue(GetValue<uint64_t>(bytecode, arg_pos));
- break;
- default:
- CopyRangeToOutput(bytecode, arg_pos, Min(arg.length, arg.new_length));
- if (arg.length < arg.new_length) {
- SetRange(0x00, arg.new_length - arg.length);
- }
- break;
- }
-}
-
-int RegExpBytecodePeephole::pc() const {
- DCHECK_LE(optimized_bytecode_buffer_.size(), std::numeric_limits<int>::max());
- return static_cast<int>(optimized_bytecode_buffer_.size());
-}
-
-Zone* RegExpBytecodePeephole::zone() const { return zone_; }
-
-} // namespace
-
-// static
-Handle<ByteArray> RegExpBytecodePeepholeOptimization::OptimizeBytecode(
- Isolate* isolate, Zone* zone, Handle<String> source, const byte* bytecode,
- int length, const ZoneUnorderedMap<int, int>& jump_edges) {
- RegExpBytecodePeephole peephole(zone, length, jump_edges);
- bool did_optimize = peephole.OptimizeBytecode(bytecode, length);
- Handle<ByteArray> array = isolate->factory()->NewByteArray(peephole.Length());
- peephole.CopyOptimizedBytecode(array->GetDataStartAddress());
-
- if (did_optimize && FLAG_trace_regexp_peephole_optimization) {
- PrintF("Original Bytecode:\n");
- RegExpBytecodeDisassemble(bytecode, length, source->ToCString().get());
- PrintF("Optimized Bytecode:\n");
- RegExpBytecodeDisassemble(array->GetDataStartAddress(), peephole.Length(),
- source->ToCString().get());
- }
-
- return array;
-}
-
-} // namespace internal
-} // namespace v8
diff --git a/js/src/new-regexp/regexp-bytecode-peephole.h b/js/src/new-regexp/regexp-bytecode-peephole.h
deleted file mode 100644
index 781f0c914..000000000
--- a/js/src/new-regexp/regexp-bytecode-peephole.h
+++ /dev/null
@@ -1,30 +0,0 @@
-// Copyright 2019 the V8 project authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#ifndef V8_REGEXP_REGEXP_BYTECODE_PEEPHOLE_H_
-#define V8_REGEXP_REGEXP_BYTECODE_PEEPHOLE_H_
-
-#include "new-regexp/regexp-shim.h"
-
-namespace v8 {
-namespace internal {
-
-class ByteArray;
-
-// Peephole optimization for regexp interpreter bytecode.
-// Pre-defined bytecode sequences occuring in the bytecode generated by the
-// RegExpBytecodeGenerator can be optimized into a single bytecode.
-class RegExpBytecodePeepholeOptimization : public AllStatic {
- public:
- // Performs peephole optimization on the given bytecode and returns the
- // optimized bytecode.
- static Handle<ByteArray> OptimizeBytecode(
- Isolate* isolate, Zone* zone, Handle<String> source, const byte* bytecode,
- int length, const ZoneUnorderedMap<int, int>& jump_edges);
-};
-
-} // namespace internal
-} // namespace v8
-
-#endif // V8_REGEXP_REGEXP_BYTECODE_PEEPHOLE_H_
diff --git a/js/src/new-regexp/regexp-bytecodes.cc b/js/src/new-regexp/regexp-bytecodes.cc
deleted file mode 100644
index 679a7c06a..000000000
--- a/js/src/new-regexp/regexp-bytecodes.cc
+++ /dev/null
@@ -1,45 +0,0 @@
-// Copyright 2019 the V8 project authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#include "new-regexp/regexp-bytecodes.h"
-
-#include <cctype>
-
-
-namespace v8 {
-namespace internal {
-
-void RegExpBytecodeDisassembleSingle(const byte* code_base, const byte* pc) {
- PrintF("%s", RegExpBytecodeName(*pc));
-
- // Args and the bytecode as hex.
- for (int i = 0; i < RegExpBytecodeLength(*pc); i++) {
- PrintF(", %02x", pc[i]);
- }
- PrintF(" ");
-
- // Args as ascii.
- for (int i = 1; i < RegExpBytecodeLength(*pc); i++) {
- unsigned char b = pc[i];
- PrintF("%c", std::isprint(b) ? b : '.');
- }
- PrintF("\n");
-}
-
-void RegExpBytecodeDisassemble(const byte* code_base, int length,
- const char* pattern) {
- PrintF("[generated bytecode for regexp pattern: '%s']\n", pattern);
-
- ptrdiff_t offset = 0;
-
- while (offset < length) {
- const byte* const pc = code_base + offset;
- PrintF("%p %4" V8PRIxPTRDIFF " ", pc, offset);
- RegExpBytecodeDisassembleSingle(code_base, pc);
- offset += RegExpBytecodeLength(*pc);
- }
-}
-
-} // namespace internal
-} // namespace v8
diff --git a/js/src/new-regexp/regexp-bytecodes.h b/js/src/new-regexp/regexp-bytecodes.h
deleted file mode 100644
index e5ab7cf66..000000000
--- a/js/src/new-regexp/regexp-bytecodes.h
+++ /dev/null
@@ -1,251 +0,0 @@
-// Copyright 2011 the V8 project authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#ifndef V8_REGEXP_REGEXP_BYTECODES_H_
-#define V8_REGEXP_REGEXP_BYTECODES_H_
-
-#include "new-regexp/regexp-shim.h"
-
-namespace v8 {
-namespace internal {
-
-// Maximum number of bytecodes that will be used (next power of 2 of actually
-// defined bytecodes).
-// All slots between the last actually defined bytecode and maximum id will be
-// filled with BREAKs, indicating an invalid operation. This way using
-// BYTECODE_MASK guarantees no OOB access to the dispatch table.
-constexpr int kRegExpPaddedBytecodeCount = 1 << 6;
-constexpr int BYTECODE_MASK = kRegExpPaddedBytecodeCount - 1;
-// The first argument is packed in with the byte code in one word, but so it
-// has 24 bits, but it can be positive and negative so only use 23 bits for
-// positive values.
-const unsigned int MAX_FIRST_ARG = 0x7fffffu;
-const int BYTECODE_SHIFT = 8;
-STATIC_ASSERT(1 << BYTECODE_SHIFT > BYTECODE_MASK);
-
-// TODO(pthier): Argument offsets of bytecodes should be easily accessible by
-// name or at least by position.
-#define BYTECODE_ITERATOR(V) \
- V(BREAK, 0, 4) /* bc8 */ \
- V(PUSH_CP, 1, 4) /* bc8 pad24 */ \
- V(PUSH_BT, 2, 8) /* bc8 pad24 offset32 */ \
- V(PUSH_REGISTER, 3, 4) /* bc8 reg_idx24 */ \
- V(SET_REGISTER_TO_CP, 4, 8) /* bc8 reg_idx24 offset32 */ \
- V(SET_CP_TO_REGISTER, 5, 4) /* bc8 reg_idx24 */ \
- V(SET_REGISTER_TO_SP, 6, 4) /* bc8 reg_idx24 */ \
- V(SET_SP_TO_REGISTER, 7, 4) /* bc8 reg_idx24 */ \
- V(SET_REGISTER, 8, 8) /* bc8 reg_idx24 value32 */ \
- V(ADVANCE_REGISTER, 9, 8) /* bc8 reg_idx24 value32 */ \
- V(POP_CP, 10, 4) /* bc8 pad24 */ \
- V(POP_BT, 11, 4) /* bc8 pad24 */ \
- V(POP_REGISTER, 12, 4) /* bc8 reg_idx24 */ \
- V(FAIL, 13, 4) /* bc8 pad24 */ \
- V(SUCCEED, 14, 4) /* bc8 pad24 */ \
- V(ADVANCE_CP, 15, 4) /* bc8 offset24 */ \
- /* Jump to another bytecode given its offset. */ \
- /* Bit Layout: */ \
- /* 0x00 - 0x07: 0x10 (fixed) Bytecode */ \
- /* 0x08 - 0x1F: 0x00 (unused) Padding */ \
- /* 0x20 - 0x3F: Address of bytecode to jump to */ \
- V(GOTO, 16, 8) /* bc8 pad24 addr32 */ \
- /* Check if offset is in range and load character at given offset. */ \
- /* Bit Layout: */ \
- /* 0x00 - 0x07: 0x11 (fixed) Bytecode */ \
- /* 0x08 - 0x1F: Offset from current position */ \
- /* 0x20 - 0x3F: Address of bytecode when load is out of range */ \
- V(LOAD_CURRENT_CHAR, 17, 8) /* bc8 offset24 addr32 */ \
- /* Load character at given offset without range checks. */ \
- /* Bit Layout: */ \
- /* 0x00 - 0x07: 0x12 (fixed) Bytecode */ \
- /* 0x08 - 0x1F: Offset from current position */ \
- V(LOAD_CURRENT_CHAR_UNCHECKED, 18, 4) /* bc8 offset24 */ \
- V(LOAD_2_CURRENT_CHARS, 19, 8) /* bc8 offset24 addr32 */ \
- V(LOAD_2_CURRENT_CHARS_UNCHECKED, 20, 4) /* bc8 offset24 */ \
- V(LOAD_4_CURRENT_CHARS, 21, 8) /* bc8 offset24 addr32 */ \
- V(LOAD_4_CURRENT_CHARS_UNCHECKED, 22, 4) /* bc8 offset24 */ \
- V(CHECK_4_CHARS, 23, 12) /* bc8 pad24 uint32 addr32 */ \
- /* Check if current character is equal to a given character */ \
- /* Bit Layout: */ \
- /* 0x00 - 0x07: 0x19 (fixed) Bytecode */ \
- /* 0x08 - 0x0F: 0x00 (unused) Padding */ \
- /* 0x10 - 0x1F: Character to check */ \
- /* 0x20 - 0x3F: Address of bytecode when matched */ \
- V(CHECK_CHAR, 24, 8) /* bc8 pad8 uint16 addr32 */ \
- V(CHECK_NOT_4_CHARS, 25, 12) /* bc8 pad24 uint32 addr32 */ \
- V(CHECK_NOT_CHAR, 26, 8) /* bc8 pad8 uint16 addr32 */ \
- V(AND_CHECK_4_CHARS, 27, 16) /* bc8 pad24 uint32 uint32 addr32 */ \
- /* Checks if the current character combined with mask (bitwise and) */ \
- /* matches a character (e.g. used when two characters in a disjunction */ \
- /* differ by only a single bit */ \
- /* Bit Layout: */ \
- /* 0x00 - 0x07: 0x1c (fixed) Bytecode */ \
- /* 0x08 - 0x0F: 0x00 (unused) Padding */ \
- /* 0x10 - 0x1F: Character to match against (after mask aplied) */ \
- /* 0x20 - 0x3F: Bitmask bitwise and combined with current character */ \
- /* 0x40 - 0x5F: Address of bytecode when matched */ \
- V(AND_CHECK_CHAR, 28, 12) /* bc8 pad8 uint16 uint32 addr32 */ \
- V(AND_CHECK_NOT_4_CHARS, 29, 16) /* bc8 pad24 uint32 uint32 addr32 */ \
- V(AND_CHECK_NOT_CHAR, 30, 12) /* bc8 pad8 uint16 uint32 addr32 */ \
- V(MINUS_AND_CHECK_NOT_CHAR, 31, 12) /* bc8 pad8 uc16 uc16 uc16 addr32 */ \
- V(CHECK_CHAR_IN_RANGE, 32, 12) /* bc8 pad24 uc16 uc16 addr32 */ \
- V(CHECK_CHAR_NOT_IN_RANGE, 33, 12) /* bc8 pad24 uc16 uc16 addr32 */ \
- /* Checks if the current character matches any of the characters encoded */ \
- /* in a bit table. Similar to/inspired by boyer moore string search */ \
- /* Bit Layout: */ \
- /* 0x00 - 0x07: 0x22 (fixed) Bytecode */ \
- /* 0x08 - 0x1F: 0x00 (unused) Padding */ \
- /* 0x20 - 0x3F: Address of bytecode when bit is set */ \
- /* 0x40 - 0xBF: Bit table */ \
- V(CHECK_BIT_IN_TABLE, 34, 24) /* bc8 pad24 addr32 bits128 */ \
- V(CHECK_LT, 35, 8) /* bc8 pad8 uc16 addr32 */ \
- V(CHECK_GT, 36, 8) /* bc8 pad8 uc16 addr32 */ \
- V(CHECK_NOT_BACK_REF, 37, 8) /* bc8 reg_idx24 addr32 */ \
- V(CHECK_NOT_BACK_REF_NO_CASE, 38, 8) /* bc8 reg_idx24 addr32 */ \
- V(CHECK_NOT_BACK_REF_NO_CASE_UNICODE, 39, 8) /* UNUSED */ \
- V(CHECK_NOT_BACK_REF_BACKWARD, 40, 8) /* bc8 reg_idx24 addr32 */ \
- V(CHECK_NOT_BACK_REF_NO_CASE_BACKWARD, 41, 8) /* bc8 reg_idx24 addr32 */ \
- V(CHECK_NOT_BACK_REF_NO_CASE_UNICODE_BACKWARD, 42, 8) /* UNUSED */ \
- V(CHECK_NOT_REGS_EQUAL, 43, 12) /* bc8 regidx24 reg_idx32 addr32 */ \
- V(CHECK_REGISTER_LT, 44, 12) /* bc8 reg_idx24 value32 addr32 */ \
- V(CHECK_REGISTER_GE, 45, 12) /* bc8 reg_idx24 value32 addr32 */ \
- V(CHECK_REGISTER_EQ_POS, 46, 8) /* bc8 reg_idx24 addr32 */ \
- V(CHECK_AT_START, 47, 8) /* bc8 pad24 addr32 */ \
- V(CHECK_NOT_AT_START, 48, 8) /* bc8 offset24 addr32 */ \
- /* Checks if the current position matches top of backtrack stack */ \
- /* Bit Layout: */ \
- /* 0x00 - 0x07: 0x31 (fixed) Bytecode */ \
- /* 0x08 - 0x1F: 0x00 (unused) Padding */ \
- /* 0x20 - 0x3F: Address of bytecode when current matches tos */ \
- V(CHECK_GREEDY, 49, 8) /* bc8 pad24 addr32 */ \
- /* Advance character pointer by given offset and jump to another bytecode.*/ \
- /* Bit Layout: */ \
- /* 0x00 - 0x07: 0x32 (fixed) Bytecode */ \
- /* 0x08 - 0x1F: Number of characters to advance */ \
- /* 0x20 - 0x3F: Address of bytecode to jump to */ \
- V(ADVANCE_CP_AND_GOTO, 50, 8) /* bc8 offset24 addr32 */ \
- V(SET_CURRENT_POSITION_FROM_END, 51, 4) /* bc8 idx24 */ \
- /* Checks if current position + given offset is in range. */ \
- /* Bit Layout: */ \
- /* 0x00 - 0x07: 0x34 (fixed) Bytecode */ \
- /* 0x08 - 0x1F: Offset from current position */ \
- /* 0x20 - 0x3F: Address of bytecode when position is out of range */ \
- V(CHECK_CURRENT_POSITION, 52, 8) /* bc8 idx24 addr32 */ \
- /* Combination of: */ \
- /* LOAD_CURRENT_CHAR, CHECK_BIT_IN_TABLE and ADVANCE_CP_AND_GOTO */ \
- /* Emitted by RegExpBytecodePeepholeOptimization. */ \
- /* Bit Layout: */ \
- /* 0x00 - 0x07 0x35 (fixed) Bytecode */ \
- /* 0x08 - 0x1F Load character offset from current position */ \
- /* 0x20 - 0x3F Number of characters to advance */ \
- /* 0x40 - 0xBF Bit Table */ \
- /* 0xC0 - 0xDF Address of bytecode when character is matched */ \
- /* 0xE0 - 0xFF Address of bytecode when no match */ \
- V(SKIP_UNTIL_BIT_IN_TABLE, 53, 32) \
- /* Combination of: */ \
- /* CHECK_CURRENT_POSITION, LOAD_CURRENT_CHAR_UNCHECKED, AND_CHECK_CHAR */ \
- /* and ADVANCE_CP_AND_GOTO */ \
- /* Emitted by RegExpBytecodePeepholeOptimization. */ \
- /* Bit Layout: */ \
- /* 0x00 - 0x07 0x36 (fixed) Bytecode */ \
- /* 0x08 - 0x1F Load character offset from current position */ \
- /* 0x20 - 0x2F Number of characters to advance */ \
- /* 0x30 - 0x3F Character to match against (after mask applied) */ \
- /* 0x40 - 0x5F: Bitmask bitwise and combined with current character */ \
- /* 0x60 - 0x7F Minimum number of characters this pattern consumes */ \
- /* 0x80 - 0x9F Address of bytecode when character is matched */ \
- /* 0xA0 - 0xBF Address of bytecode when no match */ \
- V(SKIP_UNTIL_CHAR_AND, 54, 24) \
- /* Combination of: */ \
- /* LOAD_CURRENT_CHAR, CHECK_CHAR and ADVANCE_CP_AND_GOTO */ \
- /* Emitted by RegExpBytecodePeepholeOptimization. */ \
- /* Bit Layout: */ \
- /* 0x00 - 0x07 0x37 (fixed) Bytecode */ \
- /* 0x08 - 0x1F Load character offset from current position */ \
- /* 0x20 - 0x2F Number of characters to advance */ \
- /* 0x30 - 0x3F Character to match */ \
- /* 0x40 - 0x5F Address of bytecode when character is matched */ \
- /* 0x60 - 0x7F Address of bytecode when no match */ \
- V(SKIP_UNTIL_CHAR, 55, 16) \
- /* Combination of: */ \
- /* CHECK_CURRENT_POSITION, LOAD_CURRENT_CHAR_UNCHECKED, CHECK_CHAR */ \
- /* and ADVANCE_CP_AND_GOTO */ \
- /* Emitted by RegExpBytecodePeepholeOptimization. */ \
- /* Bit Layout: */ \
- /* 0x00 - 0x07 0x38 (fixed) Bytecode */ \
- /* 0x08 - 0x1F Load character offset from current position */ \
- /* 0x20 - 0x2F Number of characters to advance */ \
- /* 0x30 - 0x3F Character to match */ \
- /* 0x40 - 0x5F Minimum number of characters this pattern consumes */ \
- /* 0x60 - 0x7F Address of bytecode when character is matched */ \
- /* 0x80 - 0x9F Address of bytecode when no match */ \
- V(SKIP_UNTIL_CHAR_POS_CHECKED, 56, 20) \
- /* Combination of: */ \
- /* LOAD_CURRENT_CHAR, CHECK_CHAR, CHECK_CHAR and ADVANCE_CP_AND_GOTO */ \
- /* Emitted by RegExpBytecodePeepholeOptimization. */ \
- /* Bit Layout: */ \
- /* 0x00 - 0x07 0x39 (fixed) Bytecode */ \
- /* 0x08 - 0x1F Load character offset from current position */ \
- /* 0x20 - 0x3F Number of characters to advance */ \
- /* 0x40 - 0x4F Character to match */ \
- /* 0x50 - 0x5F Other Character to match */ \
- /* 0x60 - 0x7F Address of bytecode when either character is matched */ \
- /* 0x80 - 0x9F Address of bytecode when no match */ \
- V(SKIP_UNTIL_CHAR_OR_CHAR, 57, 20) \
- /* Combination of: */ \
- /* LOAD_CURRENT_CHAR, CHECK_GT, CHECK_BIT_IN_TABLE, GOTO and */ \
- /* and ADVANCE_CP_AND_GOTO */ \
- /* Emitted by RegExpBytecodePeepholeOptimization. */ \
- /* Bit Layout: */ \
- /* 0x00 - 0x07 0x3A (fixed) Bytecode */ \
- /* 0x08 - 0x1F Load character offset from current position */ \
- /* 0x20 - 0x2F Number of characters to advance */ \
- /* 0x30 - 0x3F Character to check if it is less than current char */ \
- /* 0x40 - 0xBF Bit Table */ \
- /* 0xC0 - 0xDF Address of bytecode when character is matched */ \
- /* 0xE0 - 0xFF Address of bytecode when no match */ \
- V(SKIP_UNTIL_GT_OR_NOT_BIT_IN_TABLE, 58, 32)
-
-#define COUNT(...) +1
-static constexpr int kRegExpBytecodeCount = BYTECODE_ITERATOR(COUNT);
-#undef COUNT
-
-// Just making sure we assigned values above properly. They should be
-// contiguous, strictly increasing, and start at 0.
-// TODO(jgruber): Do not explicitly assign values, instead generate them
-// implicitly from the list order.
-STATIC_ASSERT(kRegExpBytecodeCount == 59);
-
-#define DECLARE_BYTECODES(name, code, length) \
- static constexpr int BC_##name = code;
-BYTECODE_ITERATOR(DECLARE_BYTECODES)
-#undef DECLARE_BYTECODES
-
-static constexpr int kRegExpBytecodeLengths[] = {
-#define DECLARE_BYTECODE_LENGTH(name, code, length) length,
- BYTECODE_ITERATOR(DECLARE_BYTECODE_LENGTH)
-#undef DECLARE_BYTECODE_LENGTH
-};
-
-inline constexpr int RegExpBytecodeLength(int bytecode) {
- return kRegExpBytecodeLengths[bytecode];
-}
-
-static const char* const kRegExpBytecodeNames[] = {
-#define DECLARE_BYTECODE_NAME(name, ...) #name,
- BYTECODE_ITERATOR(DECLARE_BYTECODE_NAME)
-#undef DECLARE_BYTECODE_NAME
-};
-
-inline const char* RegExpBytecodeName(int bytecode) {
- return kRegExpBytecodeNames[bytecode];
-}
-
-void RegExpBytecodeDisassembleSingle(const byte* code_base, const byte* pc);
-void RegExpBytecodeDisassemble(const byte* code_base, int length,
- const char* pattern);
-
-} // namespace internal
-} // namespace v8
-
-#endif // V8_REGEXP_REGEXP_BYTECODES_H_
diff --git a/js/src/new-regexp/regexp-compiler-tonode.cc b/js/src/new-regexp/regexp-compiler-tonode.cc
deleted file mode 100644
index 7de167eef..000000000
--- a/js/src/new-regexp/regexp-compiler-tonode.cc
+++ /dev/null
@@ -1,1589 +0,0 @@
-// Copyright 2019 the V8 project authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#include "new-regexp/regexp-compiler.h"
-
-#include "new-regexp/regexp.h"
-#ifdef V8_INTL_SUPPORT
-#include "new-regexp/special-case.h"
-#endif // V8_INTL_SUPPORT
-
-#ifdef V8_INTL_SUPPORT
-#include "unicode/locid.h"
-#include "unicode/uniset.h"
-#include "unicode/utypes.h"
-#endif // V8_INTL_SUPPORT
-
-namespace v8 {
-namespace internal {
-
-using namespace regexp_compiler_constants; // NOLINT(build/namespaces)
-
-// -------------------------------------------------------------------
-// Tree to graph conversion
-
-RegExpNode* RegExpAtom::ToNode(RegExpCompiler* compiler,
- RegExpNode* on_success) {
- ZoneList<TextElement>* elms =
- new (compiler->zone()) ZoneList<TextElement>(1, compiler->zone());
- elms->Add(TextElement::Atom(this), compiler->zone());
- return new (compiler->zone())
- TextNode(elms, compiler->read_backward(), on_success);
-}
-
-RegExpNode* RegExpText::ToNode(RegExpCompiler* compiler,
- RegExpNode* on_success) {
- return new (compiler->zone())
- TextNode(elements(), compiler->read_backward(), on_success);
-}
-
-static bool CompareInverseRanges(ZoneList<CharacterRange>* ranges,
- const int* special_class, int length) {
- length--; // Remove final marker.
- DCHECK_EQ(kRangeEndMarker, special_class[length]);
- DCHECK_NE(0, ranges->length());
- DCHECK_NE(0, length);
- DCHECK_NE(0, special_class[0]);
- if (ranges->length() != (length >> 1) + 1) {
- return false;
- }
- CharacterRange range = ranges->at(0);
- if (range.from() != 0) {
- return false;
- }
- for (int i = 0; i < length; i += 2) {
- if (special_class[i] != (range.to() + 1)) {
- return false;
- }
- range = ranges->at((i >> 1) + 1);
- if (special_class[i + 1] != range.from()) {
- return false;
- }
- }
- if (range.to() != String::kMaxCodePoint) {
- return false;
- }
- return true;
-}
-
-static bool CompareRanges(ZoneList<CharacterRange>* ranges,
- const int* special_class, int length) {
- length--; // Remove final marker.
- DCHECK_EQ(kRangeEndMarker, special_class[length]);
- if (ranges->length() * 2 != length) {
- return false;
- }
- for (int i = 0; i < length; i += 2) {
- CharacterRange range = ranges->at(i >> 1);
- if (range.from() != special_class[i] ||
- range.to() != special_class[i + 1] - 1) {
- return false;
- }
- }
- return true;
-}
-
-bool RegExpCharacterClass::is_standard(Zone* zone) {
- // TODO(lrn): Remove need for this function, by not throwing away information
- // along the way.
- if (is_negated()) {
- return false;
- }
- if (set_.is_standard()) {
- return true;
- }
- if (CompareRanges(set_.ranges(zone), kSpaceRanges, kSpaceRangeCount)) {
- set_.set_standard_set_type('s');
- return true;
- }
- if (CompareInverseRanges(set_.ranges(zone), kSpaceRanges, kSpaceRangeCount)) {
- set_.set_standard_set_type('S');
- return true;
- }
- if (CompareInverseRanges(set_.ranges(zone), kLineTerminatorRanges,
- kLineTerminatorRangeCount)) {
- set_.set_standard_set_type('.');
- return true;
- }
- if (CompareRanges(set_.ranges(zone), kLineTerminatorRanges,
- kLineTerminatorRangeCount)) {
- set_.set_standard_set_type('n');
- return true;
- }
- if (CompareRanges(set_.ranges(zone), kWordRanges, kWordRangeCount)) {
- set_.set_standard_set_type('w');
- return true;
- }
- if (CompareInverseRanges(set_.ranges(zone), kWordRanges, kWordRangeCount)) {
- set_.set_standard_set_type('W');
- return true;
- }
- return false;
-}
-
-UnicodeRangeSplitter::UnicodeRangeSplitter(ZoneList<CharacterRange>* base) {
- // The unicode range splitter categorizes given character ranges into:
- // - Code points from the BMP representable by one code unit.
- // - Code points outside the BMP that need to be split into surrogate pairs.
- // - Lone lead surrogates.
- // - Lone trail surrogates.
- // Lone surrogates are valid code points, even though no actual characters.
- // They require special matching to make sure we do not split surrogate pairs.
-
- for (int i = 0; i < base->length(); i++) AddRange(base->at(i));
-}
-
-void UnicodeRangeSplitter::AddRange(CharacterRange range) {
- static constexpr uc32 kBmp1Start = 0;
- static constexpr uc32 kBmp1End = kLeadSurrogateStart - 1;
- static constexpr uc32 kBmp2Start = kTrailSurrogateEnd + 1;
- static constexpr uc32 kBmp2End = kNonBmpStart - 1;
-
- // Ends are all inclusive.
- STATIC_ASSERT(kBmp1Start == 0);
- STATIC_ASSERT(kBmp1Start < kBmp1End);
- STATIC_ASSERT(kBmp1End + 1 == kLeadSurrogateStart);
- STATIC_ASSERT(kLeadSurrogateStart < kLeadSurrogateEnd);
- STATIC_ASSERT(kLeadSurrogateEnd + 1 == kTrailSurrogateStart);
- STATIC_ASSERT(kTrailSurrogateStart < kTrailSurrogateEnd);
- STATIC_ASSERT(kTrailSurrogateEnd + 1 == kBmp2Start);
- STATIC_ASSERT(kBmp2Start < kBmp2End);
- STATIC_ASSERT(kBmp2End + 1 == kNonBmpStart);
- STATIC_ASSERT(kNonBmpStart < kNonBmpEnd);
-
- static constexpr uc32 kStarts[] = {
- kBmp1Start, kLeadSurrogateStart, kTrailSurrogateStart,
- kBmp2Start, kNonBmpStart,
- };
-
- static constexpr uc32 kEnds[] = {
- kBmp1End, kLeadSurrogateEnd, kTrailSurrogateEnd, kBmp2End, kNonBmpEnd,
- };
-
- CharacterRangeVector* const kTargets[] = {
- &bmp_, &lead_surrogates_, &trail_surrogates_, &bmp_, &non_bmp_,
- };
-
- static constexpr int kCount = arraysize(kStarts);
- STATIC_ASSERT(kCount == arraysize(kEnds));
- STATIC_ASSERT(kCount == arraysize(kTargets));
-
- for (int i = 0; i < kCount; i++) {
- if (kStarts[i] > range.to()) break;
- const uc32 from = std::max(kStarts[i], range.from());
- const uc32 to = std::min(kEnds[i], range.to());
- if (from > to) continue;
- kTargets[i]->emplace_back(CharacterRange::Range(from, to));
- }
-}
-
-namespace {
-
-// Translates between new and old V8-isms (SmallVector, ZoneList).
-ZoneList<CharacterRange>* ToCanonicalZoneList(
- const UnicodeRangeSplitter::CharacterRangeVector* v, Zone* zone) {
- if (v->empty()) return nullptr;
-
- ZoneList<CharacterRange>* result =
- new (zone) ZoneList<CharacterRange>(static_cast<int>(v->size()), zone);
- for (size_t i = 0; i < v->size(); i++) {
- result->Add(v->at(i), zone);
- }
-
- CharacterRange::Canonicalize(result);
- return result;
-}
-
-void AddBmpCharacters(RegExpCompiler* compiler, ChoiceNode* result,
- RegExpNode* on_success, UnicodeRangeSplitter* splitter) {
- ZoneList<CharacterRange>* bmp =
- ToCanonicalZoneList(splitter->bmp(), compiler->zone());
- if (bmp == nullptr) return;
- JSRegExp::Flags default_flags = JSRegExp::Flags();
- result->AddAlternative(GuardedAlternative(TextNode::CreateForCharacterRanges(
- compiler->zone(), bmp, compiler->read_backward(), on_success,
- default_flags)));
-}
-
-void AddNonBmpSurrogatePairs(RegExpCompiler* compiler, ChoiceNode* result,
- RegExpNode* on_success,
- UnicodeRangeSplitter* splitter) {
- ZoneList<CharacterRange>* non_bmp =
- ToCanonicalZoneList(splitter->non_bmp(), compiler->zone());
- if (non_bmp == nullptr) return;
- DCHECK(!compiler->one_byte());
- Zone* zone = compiler->zone();
- JSRegExp::Flags default_flags = JSRegExp::Flags();
- CharacterRange::Canonicalize(non_bmp);
- for (int i = 0; i < non_bmp->length(); i++) {
- // Match surrogate pair.
- // E.g. [\u10005-\u11005] becomes
- // \ud800[\udc05-\udfff]|
- // [\ud801-\ud803][\udc00-\udfff]|
- // \ud804[\udc00-\udc05]
- uc32 from = non_bmp->at(i).from();
- uc32 to = non_bmp->at(i).to();
- uc16 from_l = unibrow::Utf16::LeadSurrogate(from);
- uc16 from_t = unibrow::Utf16::TrailSurrogate(from);
- uc16 to_l = unibrow::Utf16::LeadSurrogate(to);
- uc16 to_t = unibrow::Utf16::TrailSurrogate(to);
- if (from_l == to_l) {
- // The lead surrogate is the same.
- result->AddAlternative(
- GuardedAlternative(TextNode::CreateForSurrogatePair(
- zone, CharacterRange::Singleton(from_l),
- CharacterRange::Range(from_t, to_t), compiler->read_backward(),
- on_success, default_flags)));
- } else {
- if (from_t != kTrailSurrogateStart) {
- // Add [from_l][from_t-\udfff]
- result->AddAlternative(
- GuardedAlternative(TextNode::CreateForSurrogatePair(
- zone, CharacterRange::Singleton(from_l),
- CharacterRange::Range(from_t, kTrailSurrogateEnd),
- compiler->read_backward(), on_success, default_flags)));
- from_l++;
- }
- if (to_t != kTrailSurrogateEnd) {
- // Add [to_l][\udc00-to_t]
- result->AddAlternative(
- GuardedAlternative(TextNode::CreateForSurrogatePair(
- zone, CharacterRange::Singleton(to_l),
- CharacterRange::Range(kTrailSurrogateStart, to_t),
- compiler->read_backward(), on_success, default_flags)));
- to_l--;
- }
- if (from_l <= to_l) {
- // Add [from_l-to_l][\udc00-\udfff]
- result->AddAlternative(
- GuardedAlternative(TextNode::CreateForSurrogatePair(
- zone, CharacterRange::Range(from_l, to_l),
- CharacterRange::Range(kTrailSurrogateStart, kTrailSurrogateEnd),
- compiler->read_backward(), on_success, default_flags)));
- }
- }
- }
-}
-
-RegExpNode* NegativeLookaroundAgainstReadDirectionAndMatch(
- RegExpCompiler* compiler, ZoneList<CharacterRange>* lookbehind,
- ZoneList<CharacterRange>* match, RegExpNode* on_success, bool read_backward,
- JSRegExp::Flags flags) {
- Zone* zone = compiler->zone();
- RegExpNode* match_node = TextNode::CreateForCharacterRanges(
- zone, match, read_backward, on_success, flags);
- int stack_register = compiler->UnicodeLookaroundStackRegister();
- int position_register = compiler->UnicodeLookaroundPositionRegister();
- RegExpLookaround::Builder lookaround(false, match_node, stack_register,
- position_register);
- RegExpNode* negative_match = TextNode::CreateForCharacterRanges(
- zone, lookbehind, !read_backward, lookaround.on_match_success(), flags);
- return lookaround.ForMatch(negative_match);
-}
-
-RegExpNode* MatchAndNegativeLookaroundInReadDirection(
- RegExpCompiler* compiler, ZoneList<CharacterRange>* match,
- ZoneList<CharacterRange>* lookahead, RegExpNode* on_success,
- bool read_backward, JSRegExp::Flags flags) {
- Zone* zone = compiler->zone();
- int stack_register = compiler->UnicodeLookaroundStackRegister();
- int position_register = compiler->UnicodeLookaroundPositionRegister();
- RegExpLookaround::Builder lookaround(false, on_success, stack_register,
- position_register);
- RegExpNode* negative_match = TextNode::CreateForCharacterRanges(
- zone, lookahead, read_backward, lookaround.on_match_success(), flags);
- return TextNode::CreateForCharacterRanges(
- zone, match, read_backward, lookaround.ForMatch(negative_match), flags);
-}
-
-void AddLoneLeadSurrogates(RegExpCompiler* compiler, ChoiceNode* result,
- RegExpNode* on_success,
- UnicodeRangeSplitter* splitter) {
- JSRegExp::Flags default_flags = JSRegExp::Flags();
- ZoneList<CharacterRange>* lead_surrogates =
- ToCanonicalZoneList(splitter->lead_surrogates(), compiler->zone());
- if (lead_surrogates == nullptr) return;
- Zone* zone = compiler->zone();
- // E.g. \ud801 becomes \ud801(?![\udc00-\udfff]).
- ZoneList<CharacterRange>* trail_surrogates = CharacterRange::List(
- zone, CharacterRange::Range(kTrailSurrogateStart, kTrailSurrogateEnd));
-
- RegExpNode* match;
- if (compiler->read_backward()) {
- // Reading backward. Assert that reading forward, there is no trail
- // surrogate, and then backward match the lead surrogate.
- match = NegativeLookaroundAgainstReadDirectionAndMatch(
- compiler, trail_surrogates, lead_surrogates, on_success, true,
- default_flags);
- } else {
- // Reading forward. Forward match the lead surrogate and assert that
- // no trail surrogate follows.
- match = MatchAndNegativeLookaroundInReadDirection(
- compiler, lead_surrogates, trail_surrogates, on_success, false,
- default_flags);
- }
- result->AddAlternative(GuardedAlternative(match));
-}
-
-void AddLoneTrailSurrogates(RegExpCompiler* compiler, ChoiceNode* result,
- RegExpNode* on_success,
- UnicodeRangeSplitter* splitter) {
- JSRegExp::Flags default_flags = JSRegExp::Flags();
- ZoneList<CharacterRange>* trail_surrogates =
- ToCanonicalZoneList(splitter->trail_surrogates(), compiler->zone());
- if (trail_surrogates == nullptr) return;
- Zone* zone = compiler->zone();
- // E.g. \udc01 becomes (?<![\ud800-\udbff])\udc01
- ZoneList<CharacterRange>* lead_surrogates = CharacterRange::List(
- zone, CharacterRange::Range(kLeadSurrogateStart, kLeadSurrogateEnd));
-
- RegExpNode* match;
- if (compiler->read_backward()) {
- // Reading backward. Backward match the trail surrogate and assert that no
- // lead surrogate precedes it.
- match = MatchAndNegativeLookaroundInReadDirection(
- compiler, trail_surrogates, lead_surrogates, on_success, true,
- default_flags);
- } else {
- // Reading forward. Assert that reading backward, there is no lead
- // surrogate, and then forward match the trail surrogate.
- match = NegativeLookaroundAgainstReadDirectionAndMatch(
- compiler, lead_surrogates, trail_surrogates, on_success, false,
- default_flags);
- }
- result->AddAlternative(GuardedAlternative(match));
-}
-
-RegExpNode* UnanchoredAdvance(RegExpCompiler* compiler,
- RegExpNode* on_success) {
- // This implements ES2015 21.2.5.2.3, AdvanceStringIndex.
- DCHECK(!compiler->read_backward());
- Zone* zone = compiler->zone();
- // Advance any character. If the character happens to be a lead surrogate and
- // we advanced into the middle of a surrogate pair, it will work out, as
- // nothing will match from there. We will have to advance again, consuming
- // the associated trail surrogate.
- ZoneList<CharacterRange>* range = CharacterRange::List(
- zone, CharacterRange::Range(0, String::kMaxUtf16CodeUnit));
- JSRegExp::Flags default_flags = JSRegExp::Flags();
- return TextNode::CreateForCharacterRanges(zone, range, false, on_success,
- default_flags);
-}
-
-void AddUnicodeCaseEquivalents(ZoneList<CharacterRange>* ranges, Zone* zone) {
-#ifdef V8_INTL_SUPPORT
- DCHECK(CharacterRange::IsCanonical(ranges));
-
- // Micro-optimization to avoid passing large ranges to UnicodeSet::closeOver.
- // See also https://crbug.com/v8/6727.
- // TODO(jgruber): This only covers the special case of the {0,0x10FFFF} range,
- // which we use frequently internally. But large ranges can also easily be
- // created by the user. We might want to have a more general caching mechanism
- // for such ranges.
- if (ranges->length() == 1 && ranges->at(0).IsEverything(kNonBmpEnd)) return;
-
- // Use ICU to compute the case fold closure over the ranges.
- icu::UnicodeSet set;
- for (int i = 0; i < ranges->length(); i++) {
- set.add(ranges->at(i).from(), ranges->at(i).to());
- }
- ranges->Clear();
- set.closeOver(USET_CASE_INSENSITIVE);
- // Full case mapping map single characters to multiple characters.
- // Those are represented as strings in the set. Remove them so that
- // we end up with only simple and common case mappings.
- set.removeAllStrings();
- for (int i = 0; i < set.getRangeCount(); i++) {
- ranges->Add(CharacterRange::Range(set.getRangeStart(i), set.getRangeEnd(i)),
- zone);
- }
- // No errors and everything we collected have been ranges.
- CharacterRange::Canonicalize(ranges);
-#endif // V8_INTL_SUPPORT
-}
-
-} // namespace
-
-RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler,
- RegExpNode* on_success) {
- set_.Canonicalize();
- Zone* zone = compiler->zone();
- ZoneList<CharacterRange>* ranges = this->ranges(zone);
- if (NeedsUnicodeCaseEquivalents(flags_)) {
- AddUnicodeCaseEquivalents(ranges, zone);
- }
- if (IsUnicode(flags_) && !compiler->one_byte() &&
- !contains_split_surrogate()) {
- if (is_negated()) {
- ZoneList<CharacterRange>* negated =
- new (zone) ZoneList<CharacterRange>(2, zone);
- CharacterRange::Negate(ranges, negated, zone);
- ranges = negated;
- }
- if (ranges->length() == 0) {
- JSRegExp::Flags default_flags;
- RegExpCharacterClass* fail =
- new (zone) RegExpCharacterClass(zone, ranges, default_flags);
- return new (zone) TextNode(fail, compiler->read_backward(), on_success);
- }
- if (standard_type() == '*') {
- return UnanchoredAdvance(compiler, on_success);
- } else {
- ChoiceNode* result = new (zone) ChoiceNode(2, zone);
- UnicodeRangeSplitter splitter(ranges);
- AddBmpCharacters(compiler, result, on_success, &splitter);
- AddNonBmpSurrogatePairs(compiler, result, on_success, &splitter);
- AddLoneLeadSurrogates(compiler, result, on_success, &splitter);
- AddLoneTrailSurrogates(compiler, result, on_success, &splitter);
- return result;
- }
- } else {
- return new (zone) TextNode(this, compiler->read_backward(), on_success);
- }
-}
-
-int CompareFirstChar(RegExpTree* const* a, RegExpTree* const* b) {
- RegExpAtom* atom1 = (*a)->AsAtom();
- RegExpAtom* atom2 = (*b)->AsAtom();
- uc16 character1 = atom1->data().at(0);
- uc16 character2 = atom2->data().at(0);
- if (character1 < character2) return -1;
- if (character1 > character2) return 1;
- return 0;
-}
-
-#ifdef V8_INTL_SUPPORT
-
-// Case Insensitve comparesion
-int CompareFirstCharCaseInsensitve(RegExpTree* const* a, RegExpTree* const* b) {
- RegExpAtom* atom1 = (*a)->AsAtom();
- RegExpAtom* atom2 = (*b)->AsAtom();
- icu::UnicodeString character1(atom1->data().at(0));
- return character1.caseCompare(atom2->data().at(0), U_FOLD_CASE_DEFAULT);
-}
-
-#else
-
-static unibrow::uchar Canonical(
- unibrow::Mapping<unibrow::Ecma262Canonicalize>* canonicalize,
- unibrow::uchar c) {
- unibrow::uchar chars[unibrow::Ecma262Canonicalize::kMaxWidth];
- int length = canonicalize->get(c, '\0', chars);
- DCHECK_LE(length, 1);
- unibrow::uchar canonical = c;
- if (length == 1) canonical = chars[0];
- return canonical;
-}
-
-int CompareFirstCharCaseIndependent(
- unibrow::Mapping<unibrow::Ecma262Canonicalize>* canonicalize,
- RegExpTree* const* a, RegExpTree* const* b) {
- RegExpAtom* atom1 = (*a)->AsAtom();
- RegExpAtom* atom2 = (*b)->AsAtom();
- unibrow::uchar character1 = atom1->data().at(0);
- unibrow::uchar character2 = atom2->data().at(0);
- if (character1 == character2) return 0;
- if (character1 >= 'a' || character2 >= 'a') {
- character1 = Canonical(canonicalize, character1);
- character2 = Canonical(canonicalize, character2);
- }
- return static_cast<int>(character1) - static_cast<int>(character2);
-}
-#endif // V8_INTL_SUPPORT
-
-// We can stable sort runs of atoms, since the order does not matter if they
-// start with different characters.
-// Returns true if any consecutive atoms were found.
-bool RegExpDisjunction::SortConsecutiveAtoms(RegExpCompiler* compiler) {
- ZoneList<RegExpTree*>* alternatives = this->alternatives();
- int length = alternatives->length();
- bool found_consecutive_atoms = false;
- for (int i = 0; i < length; i++) {
- while (i < length) {
- RegExpTree* alternative = alternatives->at(i);
- if (alternative->IsAtom()) break;
- i++;
- }
- // i is length or it is the index of an atom.
- if (i == length) break;
- int first_atom = i;
- JSRegExp::Flags flags = alternatives->at(i)->AsAtom()->flags();
- i++;
- while (i < length) {
- RegExpTree* alternative = alternatives->at(i);
- if (!alternative->IsAtom()) break;
- if (alternative->AsAtom()->flags() != flags) break;
- i++;
- }
- // Sort atoms to get ones with common prefixes together.
- // This step is more tricky if we are in a case-independent regexp,
- // because it would change /is|I/ to /I|is/, and order matters when
- // the regexp parts don't match only disjoint starting points. To fix
- // this we have a version of CompareFirstChar that uses case-
- // independent character classes for comparison.
- DCHECK_LT(first_atom, alternatives->length());
- DCHECK_LE(i, alternatives->length());
- DCHECK_LE(first_atom, i);
- if (IgnoreCase(flags)) {
-#ifdef V8_INTL_SUPPORT
- alternatives->StableSort(CompareFirstCharCaseInsensitve, first_atom,
- i - first_atom);
-#else
- unibrow::Mapping<unibrow::Ecma262Canonicalize>* canonicalize =
- compiler->isolate()->regexp_macro_assembler_canonicalize();
- auto compare_closure = [canonicalize](RegExpTree* const* a,
- RegExpTree* const* b) {
- return CompareFirstCharCaseIndependent(canonicalize, a, b);
- };
- alternatives->StableSort(compare_closure, first_atom, i - first_atom);
-#endif // V8_INTL_SUPPORT
- } else {
- alternatives->StableSort(CompareFirstChar, first_atom, i - first_atom);
- }
- if (i - first_atom > 1) found_consecutive_atoms = true;
- }
- return found_consecutive_atoms;
-}
-
-// Optimizes ab|ac|az to a(?:b|c|d).
-void RegExpDisjunction::RationalizeConsecutiveAtoms(RegExpCompiler* compiler) {
- Zone* zone = compiler->zone();
- ZoneList<RegExpTree*>* alternatives = this->alternatives();
- int length = alternatives->length();
-
- int write_posn = 0;
- int i = 0;
- while (i < length) {
- RegExpTree* alternative = alternatives->at(i);
- if (!alternative->IsAtom()) {
- alternatives->at(write_posn++) = alternatives->at(i);
- i++;
- continue;
- }
- RegExpAtom* const atom = alternative->AsAtom();
- JSRegExp::Flags flags = atom->flags();
-#ifdef V8_INTL_SUPPORT
- icu::UnicodeString common_prefix(atom->data().at(0));
-#else
- unibrow::uchar common_prefix = atom->data().at(0);
-#endif // V8_INTL_SUPPORT
- int first_with_prefix = i;
- int prefix_length = atom->length();
- i++;
- while (i < length) {
- alternative = alternatives->at(i);
- if (!alternative->IsAtom()) break;
- RegExpAtom* const atom = alternative->AsAtom();
- if (atom->flags() != flags) break;
-#ifdef V8_INTL_SUPPORT
- icu::UnicodeString new_prefix(atom->data().at(0));
- if (new_prefix != common_prefix) {
- if (!IgnoreCase(flags)) break;
- if (common_prefix.caseCompare(new_prefix, U_FOLD_CASE_DEFAULT) != 0)
- break;
- }
-#else
- unibrow::uchar new_prefix = atom->data().at(0);
- if (new_prefix != common_prefix) {
- if (!IgnoreCase(flags)) break;
- unibrow::Mapping<unibrow::Ecma262Canonicalize>* canonicalize =
- compiler->isolate()->regexp_macro_assembler_canonicalize();
- new_prefix = Canonical(canonicalize, new_prefix);
- common_prefix = Canonical(canonicalize, common_prefix);
- if (new_prefix != common_prefix) break;
- }
-#endif // V8_INTL_SUPPORT
- prefix_length = Min(prefix_length, atom->length());
- i++;
- }
- if (i > first_with_prefix + 2) {
- // Found worthwhile run of alternatives with common prefix of at least one
- // character. The sorting function above did not sort on more than one
- // character for reasons of correctness, but there may still be a longer
- // common prefix if the terms were similar or presorted in the input.
- // Find out how long the common prefix is.
- int run_length = i - first_with_prefix;
- RegExpAtom* const atom = alternatives->at(first_with_prefix)->AsAtom();
- for (int j = 1; j < run_length && prefix_length > 1; j++) {
- RegExpAtom* old_atom =
- alternatives->at(j + first_with_prefix)->AsAtom();
- for (int k = 1; k < prefix_length; k++) {
- if (atom->data().at(k) != old_atom->data().at(k)) {
- prefix_length = k;
- break;
- }
- }
- }
- RegExpAtom* prefix = new (zone)
- RegExpAtom(atom->data().SubVector(0, prefix_length), flags);
- ZoneList<RegExpTree*>* pair = new (zone) ZoneList<RegExpTree*>(2, zone);
- pair->Add(prefix, zone);
- ZoneList<RegExpTree*>* suffixes =
- new (zone) ZoneList<RegExpTree*>(run_length, zone);
- for (int j = 0; j < run_length; j++) {
- RegExpAtom* old_atom =
- alternatives->at(j + first_with_prefix)->AsAtom();
- int len = old_atom->length();
- if (len == prefix_length) {
- suffixes->Add(new (zone) RegExpEmpty(), zone);
- } else {
- RegExpTree* suffix = new (zone) RegExpAtom(
- old_atom->data().SubVector(prefix_length, old_atom->length()),
- flags);
- suffixes->Add(suffix, zone);
- }
- }
- pair->Add(new (zone) RegExpDisjunction(suffixes), zone);
- alternatives->at(write_posn++) = new (zone) RegExpAlternative(pair);
- } else {
- // Just copy any non-worthwhile alternatives.
- for (int j = first_with_prefix; j < i; j++) {
- alternatives->at(write_posn++) = alternatives->at(j);
- }
- }
- }
- alternatives->Rewind(write_posn); // Trim end of array.
-}
-
-// Optimizes b|c|z to [bcz].
-void RegExpDisjunction::FixSingleCharacterDisjunctions(
- RegExpCompiler* compiler) {
- Zone* zone = compiler->zone();
- ZoneList<RegExpTree*>* alternatives = this->alternatives();
- int length = alternatives->length();
-
- int write_posn = 0;
- int i = 0;
- while (i < length) {
- RegExpTree* alternative = alternatives->at(i);
- if (!alternative->IsAtom()) {
- alternatives->at(write_posn++) = alternatives->at(i);
- i++;
- continue;
- }
- RegExpAtom* const atom = alternative->AsAtom();
- if (atom->length() != 1) {
- alternatives->at(write_posn++) = alternatives->at(i);
- i++;
- continue;
- }
- JSRegExp::Flags flags = atom->flags();
- DCHECK_IMPLIES(IsUnicode(flags),
- !unibrow::Utf16::IsLeadSurrogate(atom->data().at(0)));
- bool contains_trail_surrogate =
- unibrow::Utf16::IsTrailSurrogate(atom->data().at(0));
- int first_in_run = i;
- i++;
- // Find a run of single-character atom alternatives that have identical
- // flags (case independence and unicode-ness).
- while (i < length) {
- alternative = alternatives->at(i);
- if (!alternative->IsAtom()) break;
- RegExpAtom* const atom = alternative->AsAtom();
- if (atom->length() != 1) break;
- if (atom->flags() != flags) break;
- DCHECK_IMPLIES(IsUnicode(flags),
- !unibrow::Utf16::IsLeadSurrogate(atom->data().at(0)));
- contains_trail_surrogate |=
- unibrow::Utf16::IsTrailSurrogate(atom->data().at(0));
- i++;
- }
- if (i > first_in_run + 1) {
- // Found non-trivial run of single-character alternatives.
- int run_length = i - first_in_run;
- ZoneList<CharacterRange>* ranges =
- new (zone) ZoneList<CharacterRange>(2, zone);
- for (int j = 0; j < run_length; j++) {
- RegExpAtom* old_atom = alternatives->at(j + first_in_run)->AsAtom();
- DCHECK_EQ(old_atom->length(), 1);
- ranges->Add(CharacterRange::Singleton(old_atom->data().at(0)), zone);
- }
- RegExpCharacterClass::CharacterClassFlags character_class_flags;
- if (IsUnicode(flags) && contains_trail_surrogate) {
- character_class_flags = RegExpCharacterClass::CONTAINS_SPLIT_SURROGATE;
- }
- alternatives->at(write_posn++) = new (zone)
- RegExpCharacterClass(zone, ranges, flags, character_class_flags);
- } else {
- // Just copy any trivial alternatives.
- for (int j = first_in_run; j < i; j++) {
- alternatives->at(write_posn++) = alternatives->at(j);
- }
- }
- }
- alternatives->Rewind(write_posn); // Trim end of array.
-}
-
-RegExpNode* RegExpDisjunction::ToNode(RegExpCompiler* compiler,
- RegExpNode* on_success) {
- ZoneList<RegExpTree*>* alternatives = this->alternatives();
-
- if (alternatives->length() > 2) {
- bool found_consecutive_atoms = SortConsecutiveAtoms(compiler);
- if (found_consecutive_atoms) RationalizeConsecutiveAtoms(compiler);
- FixSingleCharacterDisjunctions(compiler);
- if (alternatives->length() == 1) {
- return alternatives->at(0)->ToNode(compiler, on_success);
- }
- }
-
- int length = alternatives->length();
-
- ChoiceNode* result =
- new (compiler->zone()) ChoiceNode(length, compiler->zone());
- for (int i = 0; i < length; i++) {
- GuardedAlternative alternative(
- alternatives->at(i)->ToNode(compiler, on_success));
- result->AddAlternative(alternative);
- }
- return result;
-}
-
-RegExpNode* RegExpQuantifier::ToNode(RegExpCompiler* compiler,
- RegExpNode* on_success) {
- return ToNode(min(), max(), is_greedy(), body(), compiler, on_success);
-}
-
-namespace {
-// Desugar \b to (?<=\w)(?=\W)|(?<=\W)(?=\w) and
-// \B to (?<=\w)(?=\w)|(?<=\W)(?=\W)
-RegExpNode* BoundaryAssertionAsLookaround(RegExpCompiler* compiler,
- RegExpNode* on_success,
- RegExpAssertion::AssertionType type,
- JSRegExp::Flags flags) {
- DCHECK(NeedsUnicodeCaseEquivalents(flags));
- Zone* zone = compiler->zone();
- ZoneList<CharacterRange>* word_range =
- new (zone) ZoneList<CharacterRange>(2, zone);
- CharacterRange::AddClassEscape('w', word_range, true, zone);
- int stack_register = compiler->UnicodeLookaroundStackRegister();
- int position_register = compiler->UnicodeLookaroundPositionRegister();
- ChoiceNode* result = new (zone) ChoiceNode(2, zone);
- // Add two choices. The (non-)boundary could start with a word or
- // a non-word-character.
- for (int i = 0; i < 2; i++) {
- bool lookbehind_for_word = i == 0;
- bool lookahead_for_word =
- (type == RegExpAssertion::BOUNDARY) ^ lookbehind_for_word;
- // Look to the left.
- RegExpLookaround::Builder lookbehind(lookbehind_for_word, on_success,
- stack_register, position_register);
- RegExpNode* backward = TextNode::CreateForCharacterRanges(
- zone, word_range, true, lookbehind.on_match_success(), flags);
- // Look to the right.
- RegExpLookaround::Builder lookahead(lookahead_for_word,
- lookbehind.ForMatch(backward),
- stack_register, position_register);
- RegExpNode* forward = TextNode::CreateForCharacterRanges(
- zone, word_range, false, lookahead.on_match_success(), flags);
- result->AddAlternative(GuardedAlternative(lookahead.ForMatch(forward)));
- }
- return result;
-}
-} // anonymous namespace
-
-RegExpNode* RegExpAssertion::ToNode(RegExpCompiler* compiler,
- RegExpNode* on_success) {
- NodeInfo info;
- Zone* zone = compiler->zone();
-
- switch (assertion_type()) {
- case START_OF_LINE:
- return AssertionNode::AfterNewline(on_success);
- case START_OF_INPUT:
- return AssertionNode::AtStart(on_success);
- case BOUNDARY:
- return NeedsUnicodeCaseEquivalents(flags_)
- ? BoundaryAssertionAsLookaround(compiler, on_success, BOUNDARY,
- flags_)
- : AssertionNode::AtBoundary(on_success);
- case NON_BOUNDARY:
- return NeedsUnicodeCaseEquivalents(flags_)
- ? BoundaryAssertionAsLookaround(compiler, on_success,
- NON_BOUNDARY, flags_)
- : AssertionNode::AtNonBoundary(on_success);
- case END_OF_INPUT:
- return AssertionNode::AtEnd(on_success);
- case END_OF_LINE: {
- // Compile $ in multiline regexps as an alternation with a positive
- // lookahead in one side and an end-of-input on the other side.
- // We need two registers for the lookahead.
- int stack_pointer_register = compiler->AllocateRegister();
- int position_register = compiler->AllocateRegister();
- // The ChoiceNode to distinguish between a newline and end-of-input.
- ChoiceNode* result = new (zone) ChoiceNode(2, zone);
- // Create a newline atom.
- ZoneList<CharacterRange>* newline_ranges =
- new (zone) ZoneList<CharacterRange>(3, zone);
- CharacterRange::AddClassEscape('n', newline_ranges, false, zone);
- JSRegExp::Flags default_flags = JSRegExp::Flags();
- RegExpCharacterClass* newline_atom =
- new (zone) RegExpCharacterClass('n', default_flags);
- TextNode* newline_matcher =
- new (zone) TextNode(newline_atom, false,
- ActionNode::PositiveSubmatchSuccess(
- stack_pointer_register, position_register,
- 0, // No captures inside.
- -1, // Ignored if no captures.
- on_success));
- // Create an end-of-input matcher.
- RegExpNode* end_of_line = ActionNode::BeginSubmatch(
- stack_pointer_register, position_register, newline_matcher);
- // Add the two alternatives to the ChoiceNode.
- GuardedAlternative eol_alternative(end_of_line);
- result->AddAlternative(eol_alternative);
- GuardedAlternative end_alternative(AssertionNode::AtEnd(on_success));
- result->AddAlternative(end_alternative);
- return result;
- }
- default:
- UNREACHABLE();
- }
- return on_success;
-}
-
-RegExpNode* RegExpBackReference::ToNode(RegExpCompiler* compiler,
- RegExpNode* on_success) {
- return new (compiler->zone())
- BackReferenceNode(RegExpCapture::StartRegister(index()),
- RegExpCapture::EndRegister(index()), flags_,
- compiler->read_backward(), on_success);
-}
-
-RegExpNode* RegExpEmpty::ToNode(RegExpCompiler* compiler,
- RegExpNode* on_success) {
- return on_success;
-}
-
-RegExpLookaround::Builder::Builder(bool is_positive, RegExpNode* on_success,
- int stack_pointer_register,
- int position_register,
- int capture_register_count,
- int capture_register_start)
- : is_positive_(is_positive),
- on_success_(on_success),
- stack_pointer_register_(stack_pointer_register),
- position_register_(position_register) {
- if (is_positive_) {
- on_match_success_ = ActionNode::PositiveSubmatchSuccess(
- stack_pointer_register, position_register, capture_register_count,
- capture_register_start, on_success_);
- } else {
- Zone* zone = on_success_->zone();
- on_match_success_ = new (zone) NegativeSubmatchSuccess(
- stack_pointer_register, position_register, capture_register_count,
- capture_register_start, zone);
- }
-}
-
-RegExpNode* RegExpLookaround::Builder::ForMatch(RegExpNode* match) {
- if (is_positive_) {
- return ActionNode::BeginSubmatch(stack_pointer_register_,
- position_register_, match);
- } else {
- Zone* zone = on_success_->zone();
- // We use a ChoiceNode to represent the negative lookaround. The first
- // alternative is the negative match. On success, the end node backtracks.
- // On failure, the second alternative is tried and leads to success.
- // NegativeLookaheadChoiceNode is a special ChoiceNode that ignores the
- // first exit when calculating quick checks.
- ChoiceNode* choice_node = new (zone) NegativeLookaroundChoiceNode(
- GuardedAlternative(match), GuardedAlternative(on_success_), zone);
- return ActionNode::BeginSubmatch(stack_pointer_register_,
- position_register_, choice_node);
- }
-}
-
-RegExpNode* RegExpLookaround::ToNode(RegExpCompiler* compiler,
- RegExpNode* on_success) {
- int stack_pointer_register = compiler->AllocateRegister();
- int position_register = compiler->AllocateRegister();
-
- const int registers_per_capture = 2;
- const int register_of_first_capture = 2;
- int register_count = capture_count_ * registers_per_capture;
- int register_start =
- register_of_first_capture + capture_from_ * registers_per_capture;
-
- RegExpNode* result;
- bool was_reading_backward = compiler->read_backward();
- compiler->set_read_backward(type() == LOOKBEHIND);
- Builder builder(is_positive(), on_success, stack_pointer_register,
- position_register, register_count, register_start);
- RegExpNode* match = body_->ToNode(compiler, builder.on_match_success());
- result = builder.ForMatch(match);
- compiler->set_read_backward(was_reading_backward);
- return result;
-}
-
-RegExpNode* RegExpCapture::ToNode(RegExpCompiler* compiler,
- RegExpNode* on_success) {
- return ToNode(body(), index(), compiler, on_success);
-}
-
-RegExpNode* RegExpCapture::ToNode(RegExpTree* body, int index,
- RegExpCompiler* compiler,
- RegExpNode* on_success) {
- DCHECK_NOT_NULL(body);
- int start_reg = RegExpCapture::StartRegister(index);
- int end_reg = RegExpCapture::EndRegister(index);
- if (compiler->read_backward()) std::swap(start_reg, end_reg);
- RegExpNode* store_end = ActionNode::StorePosition(end_reg, true, on_success);
- RegExpNode* body_node = body->ToNode(compiler, store_end);
- return ActionNode::StorePosition(start_reg, true, body_node);
-}
-
-namespace {
-
-class AssertionSequenceRewriter final {
- public:
- // TODO(jgruber): Consider moving this to a separate AST tree rewriter pass
- // instead of sprinkling rewrites into the AST->Node conversion process.
- static void MaybeRewrite(ZoneList<RegExpTree*>* terms, Zone* zone) {
- AssertionSequenceRewriter rewriter(terms, zone);
-
- static constexpr int kNoIndex = -1;
- int from = kNoIndex;
-
- for (int i = 0; i < terms->length(); i++) {
- RegExpTree* t = terms->at(i);
- if (from == kNoIndex && t->IsAssertion()) {
- from = i; // Start a sequence.
- } else if (from != kNoIndex && !t->IsAssertion()) {
- // Terminate and process the sequence.
- if (i - from > 1) rewriter.Rewrite(from, i);
- from = kNoIndex;
- }
- }
-
- if (from != kNoIndex && terms->length() - from > 1) {
- rewriter.Rewrite(from, terms->length());
- }
- }
-
- // All assertions are zero width. A consecutive sequence of assertions is
- // order-independent. There's two ways we can optimize here:
- // 1. fold all identical assertions.
- // 2. if any assertion combinations are known to fail (e.g. \b\B), the entire
- // sequence fails.
- void Rewrite(int from, int to) {
- DCHECK_GT(to, from + 1);
-
- // Bitfield of all seen assertions.
- uint32_t seen_assertions = 0;
- STATIC_ASSERT(RegExpAssertion::LAST_TYPE < kUInt32Size * kBitsPerByte);
-
- // Flags must match for folding.
- JSRegExp::Flags flags = terms_->at(from)->AsAssertion()->flags();
- bool saw_mismatched_flags = false;
-
- for (int i = from; i < to; i++) {
- RegExpAssertion* t = terms_->at(i)->AsAssertion();
- if (t->flags() != flags) saw_mismatched_flags = true;
- const uint32_t bit = 1 << t->assertion_type();
-
- if ((seen_assertions & bit) && !saw_mismatched_flags) {
- // Fold duplicates.
- terms_->Set(i, new (zone_) RegExpEmpty());
- }
-
- seen_assertions |= bit;
- }
-
- // Collapse failures.
- const uint32_t always_fails_mask =
- 1 << RegExpAssertion::BOUNDARY | 1 << RegExpAssertion::NON_BOUNDARY;
- if ((seen_assertions & always_fails_mask) == always_fails_mask) {
- ReplaceSequenceWithFailure(from, to);
- }
- }
-
- void ReplaceSequenceWithFailure(int from, int to) {
- // Replace the entire sequence with a single node that always fails.
- // TODO(jgruber): Consider adding an explicit Fail kind. Until then, the
- // negated '*' (everything) range serves the purpose.
- ZoneList<CharacterRange>* ranges =
- new (zone_) ZoneList<CharacterRange>(0, zone_);
- RegExpCharacterClass* cc =
- new (zone_) RegExpCharacterClass(zone_, ranges, JSRegExp::Flags());
- terms_->Set(from, cc);
-
- // Zero out the rest.
- RegExpEmpty* empty = new (zone_) RegExpEmpty();
- for (int i = from + 1; i < to; i++) terms_->Set(i, empty);
- }
-
- private:
- AssertionSequenceRewriter(ZoneList<RegExpTree*>* terms, Zone* zone)
- : zone_(zone), terms_(terms) {}
-
- Zone* zone_;
- ZoneList<RegExpTree*>* terms_;
-};
-
-} // namespace
-
-RegExpNode* RegExpAlternative::ToNode(RegExpCompiler* compiler,
- RegExpNode* on_success) {
- ZoneList<RegExpTree*>* children = nodes();
-
- AssertionSequenceRewriter::MaybeRewrite(children, compiler->zone());
-
- RegExpNode* current = on_success;
- if (compiler->read_backward()) {
- for (int i = 0; i < children->length(); i++) {
- current = children->at(i)->ToNode(compiler, current);
- }
- } else {
- for (int i = children->length() - 1; i >= 0; i--) {
- current = children->at(i)->ToNode(compiler, current);
- }
- }
- return current;
-}
-
-static void AddClass(const int* elmv, int elmc,
- ZoneList<CharacterRange>* ranges, Zone* zone) {
- elmc--;
- DCHECK_EQ(kRangeEndMarker, elmv[elmc]);
- for (int i = 0; i < elmc; i += 2) {
- DCHECK(elmv[i] < elmv[i + 1]);
- ranges->Add(CharacterRange::Range(elmv[i], elmv[i + 1] - 1), zone);
- }
-}
-
-static void AddClassNegated(const int* elmv, int elmc,
- ZoneList<CharacterRange>* ranges, Zone* zone) {
- elmc--;
- DCHECK_EQ(kRangeEndMarker, elmv[elmc]);
- DCHECK_NE(0x0000, elmv[0]);
- DCHECK_NE(String::kMaxCodePoint, elmv[elmc - 1]);
- uc16 last = 0x0000;
- for (int i = 0; i < elmc; i += 2) {
- DCHECK(last <= elmv[i] - 1);
- DCHECK(elmv[i] < elmv[i + 1]);
- ranges->Add(CharacterRange::Range(last, elmv[i] - 1), zone);
- last = elmv[i + 1];
- }
- ranges->Add(CharacterRange::Range(last, String::kMaxCodePoint), zone);
-}
-
-void CharacterRange::AddClassEscape(char type, ZoneList<CharacterRange>* ranges,
- bool add_unicode_case_equivalents,
- Zone* zone) {
- if (add_unicode_case_equivalents && (type == 'w' || type == 'W')) {
- // See #sec-runtime-semantics-wordcharacters-abstract-operation
- // In case of unicode and ignore_case, we need to create the closure over
- // case equivalent characters before negating.
- ZoneList<CharacterRange>* new_ranges =
- new (zone) ZoneList<CharacterRange>(2, zone);
- AddClass(kWordRanges, kWordRangeCount, new_ranges, zone);
- AddUnicodeCaseEquivalents(new_ranges, zone);
- if (type == 'W') {
- ZoneList<CharacterRange>* negated =
- new (zone) ZoneList<CharacterRange>(2, zone);
- CharacterRange::Negate(new_ranges, negated, zone);
- new_ranges = negated;
- }
- ranges->AddAll(*new_ranges, zone);
- return;
- }
- AddClassEscape(type, ranges, zone);
-}
-
-void CharacterRange::AddClassEscape(char type, ZoneList<CharacterRange>* ranges,
- Zone* zone) {
- switch (type) {
- case 's':
- AddClass(kSpaceRanges, kSpaceRangeCount, ranges, zone);
- break;
- case 'S':
- AddClassNegated(kSpaceRanges, kSpaceRangeCount, ranges, zone);
- break;
- case 'w':
- AddClass(kWordRanges, kWordRangeCount, ranges, zone);
- break;
- case 'W':
- AddClassNegated(kWordRanges, kWordRangeCount, ranges, zone);
- break;
- case 'd':
- AddClass(kDigitRanges, kDigitRangeCount, ranges, zone);
- break;
- case 'D':
- AddClassNegated(kDigitRanges, kDigitRangeCount, ranges, zone);
- break;
- case '.':
- AddClassNegated(kLineTerminatorRanges, kLineTerminatorRangeCount, ranges,
- zone);
- break;
- // This is not a character range as defined by the spec but a
- // convenient shorthand for a character class that matches any
- // character.
- case '*':
- ranges->Add(CharacterRange::Everything(), zone);
- break;
- // This is the set of characters matched by the $ and ^ symbols
- // in multiline mode.
- case 'n':
- AddClass(kLineTerminatorRanges, kLineTerminatorRangeCount, ranges, zone);
- break;
- default:
- UNREACHABLE();
- }
-}
-
-Vector<const int> CharacterRange::GetWordBounds() {
- return Vector<const int>(kWordRanges, kWordRangeCount - 1);
-}
-
-// static
-void CharacterRange::AddCaseEquivalents(Isolate* isolate, Zone* zone,
- ZoneList<CharacterRange>* ranges,
- bool is_one_byte) {
- CharacterRange::Canonicalize(ranges);
- int range_count = ranges->length();
-#ifdef V8_INTL_SUPPORT
- icu::UnicodeSet others;
- for (int i = 0; i < range_count; i++) {
- CharacterRange range = ranges->at(i);
- uc32 from = range.from();
- if (from > String::kMaxUtf16CodeUnit) continue;
- uc32 to = Min(range.to(), String::kMaxUtf16CodeUnit);
- // Nothing to be done for surrogates.
- if (from >= kLeadSurrogateStart && to <= kTrailSurrogateEnd) continue;
- if (is_one_byte && !RangeContainsLatin1Equivalents(range)) {
- if (from > String::kMaxOneByteCharCode) continue;
- if (to > String::kMaxOneByteCharCode) to = String::kMaxOneByteCharCode;
- }
- others.add(from, to);
- }
-
- // Compute the set of additional characters that should be added,
- // using UnicodeSet::closeOver. ECMA 262 defines slightly different
- // case-folding rules than Unicode, so some characters that are
- // added by closeOver do not match anything other than themselves in
- // JS. For example, 'Å¿' (U+017F LATIN SMALL LETTER LONG S) is the
- // same case-insensitive character as 's' or 'S' according to
- // Unicode, but does not match any other character in JS. To handle
- // this case, we add such characters to the IgnoreSet and filter
- // them out. We filter twice: once before calling closeOver (to
- // prevent 'Å¿' from adding 's'), and once after calling closeOver
- // (to prevent 's' from adding 'Å¿'). See regexp/special-case.h for
- // more information.
- icu::UnicodeSet already_added(others);
- others.removeAll(RegExpCaseFolding::IgnoreSet());
- others.closeOver(USET_CASE_INSENSITIVE);
- others.removeAll(RegExpCaseFolding::IgnoreSet());
- others.removeAll(already_added);
-
- // Add others to the ranges
- for (int32_t i = 0; i < others.getRangeCount(); i++) {
- UChar32 from = others.getRangeStart(i);
- UChar32 to = others.getRangeEnd(i);
- if (from == to) {
- ranges->Add(CharacterRange::Singleton(from), zone);
- } else {
- ranges->Add(CharacterRange::Range(from, to), zone);
- }
- }
-#else
- for (int i = 0; i < range_count; i++) {
- CharacterRange range = ranges->at(i);
- uc32 bottom = range.from();
- if (bottom > String::kMaxUtf16CodeUnit) continue;
- uc32 top = Min(range.to(), String::kMaxUtf16CodeUnit);
- // Nothing to be done for surrogates.
- if (bottom >= kLeadSurrogateStart && top <= kTrailSurrogateEnd) continue;
- if (is_one_byte && !RangeContainsLatin1Equivalents(range)) {
- if (bottom > String::kMaxOneByteCharCode) continue;
- if (top > String::kMaxOneByteCharCode) top = String::kMaxOneByteCharCode;
- }
- unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
- if (top == bottom) {
- // If this is a singleton we just expand the one character.
- int length = isolate->jsregexp_uncanonicalize()->get(bottom, '\0', chars);
- for (int i = 0; i < length; i++) {
- uc32 chr = chars[i];
- if (chr != bottom) {
- ranges->Add(CharacterRange::Singleton(chars[i]), zone);
- }
- }
- } else {
- // If this is a range we expand the characters block by block, expanding
- // contiguous subranges (blocks) one at a time. The approach is as
- // follows. For a given start character we look up the remainder of the
- // block that contains it (represented by the end point), for instance we
- // find 'z' if the character is 'c'. A block is characterized by the
- // property that all characters uncanonicalize in the same way, except
- // that each entry in the result is incremented by the distance from the
- // first element. So a-z is a block because 'a' uncanonicalizes to ['a',
- // 'A'] and the k'th letter uncanonicalizes to ['a' + k, 'A' + k]. Once
- // we've found the end point we look up its uncanonicalization and
- // produce a range for each element. For instance for [c-f] we look up
- // ['z', 'Z'] and produce [c-f] and [C-F]. We then only add a range if
- // it is not already contained in the input, so [c-f] will be skipped but
- // [C-F] will be added. If this range is not completely contained in a
- // block we do this for all the blocks covered by the range (handling
- // characters that is not in a block as a "singleton block").
- unibrow::uchar equivalents[unibrow::Ecma262UnCanonicalize::kMaxWidth];
- int pos = bottom;
- while (pos <= top) {
- int length =
- isolate->jsregexp_canonrange()->get(pos, '\0', equivalents);
- uc32 block_end;
- if (length == 0) {
- block_end = pos;
- } else {
- DCHECK_EQ(1, length);
- block_end = equivalents[0];
- }
- int end = (block_end > top) ? top : block_end;
- length = isolate->jsregexp_uncanonicalize()->get(block_end, '\0',
- equivalents);
- for (int i = 0; i < length; i++) {
- uc32 c = equivalents[i];
- uc32 range_from = c - (block_end - pos);
- uc32 range_to = c - (block_end - end);
- if (!(bottom <= range_from && range_to <= top)) {
- ranges->Add(CharacterRange::Range(range_from, range_to), zone);
- }
- }
- pos = end + 1;
- }
- }
- }
-#endif // V8_INTL_SUPPORT
-}
-
-bool CharacterRange::IsCanonical(ZoneList<CharacterRange>* ranges) {
- DCHECK_NOT_NULL(ranges);
- int n = ranges->length();
- if (n <= 1) return true;
- int max = ranges->at(0).to();
- for (int i = 1; i < n; i++) {
- CharacterRange next_range = ranges->at(i);
- if (next_range.from() <= max + 1) return false;
- max = next_range.to();
- }
- return true;
-}
-
-ZoneList<CharacterRange>* CharacterSet::ranges(Zone* zone) {
- if (ranges_ == nullptr) {
- ranges_ = new (zone) ZoneList<CharacterRange>(2, zone);
- CharacterRange::AddClassEscape(standard_set_type_, ranges_, false, zone);
- }
- return ranges_;
-}
-
-// Move a number of elements in a zonelist to another position
-// in the same list. Handles overlapping source and target areas.
-static void MoveRanges(ZoneList<CharacterRange>* list, int from, int to,
- int count) {
- // Ranges are potentially overlapping.
- if (from < to) {
- for (int i = count - 1; i >= 0; i--) {
- list->at(to + i) = list->at(from + i);
- }
- } else {
- for (int i = 0; i < count; i++) {
- list->at(to + i) = list->at(from + i);
- }
- }
-}
-
-static int InsertRangeInCanonicalList(ZoneList<CharacterRange>* list, int count,
- CharacterRange insert) {
- // Inserts a range into list[0..count[, which must be sorted
- // by from value and non-overlapping and non-adjacent, using at most
- // list[0..count] for the result. Returns the number of resulting
- // canonicalized ranges. Inserting a range may collapse existing ranges into
- // fewer ranges, so the return value can be anything in the range 1..count+1.
- uc32 from = insert.from();
- uc32 to = insert.to();
- int start_pos = 0;
- int end_pos = count;
- for (int i = count - 1; i >= 0; i--) {
- CharacterRange current = list->at(i);
- if (current.from() > to + 1) {
- end_pos = i;
- } else if (current.to() + 1 < from) {
- start_pos = i + 1;
- break;
- }
- }
-
- // Inserted range overlaps, or is adjacent to, ranges at positions
- // [start_pos..end_pos[. Ranges before start_pos or at or after end_pos are
- // not affected by the insertion.
- // If start_pos == end_pos, the range must be inserted before start_pos.
- // if start_pos < end_pos, the entire range from start_pos to end_pos
- // must be merged with the insert range.
-
- if (start_pos == end_pos) {
- // Insert between existing ranges at position start_pos.
- if (start_pos < count) {
- MoveRanges(list, start_pos, start_pos + 1, count - start_pos);
- }
- list->at(start_pos) = insert;
- return count + 1;
- }
- if (start_pos + 1 == end_pos) {
- // Replace single existing range at position start_pos.
- CharacterRange to_replace = list->at(start_pos);
- int new_from = Min(to_replace.from(), from);
- int new_to = Max(to_replace.to(), to);
- list->at(start_pos) = CharacterRange::Range(new_from, new_to);
- return count;
- }
- // Replace a number of existing ranges from start_pos to end_pos - 1.
- // Move the remaining ranges down.
-
- int new_from = Min(list->at(start_pos).from(), from);
- int new_to = Max(list->at(end_pos - 1).to(), to);
- if (end_pos < count) {
- MoveRanges(list, end_pos, start_pos + 1, count - end_pos);
- }
- list->at(start_pos) = CharacterRange::Range(new_from, new_to);
- return count - (end_pos - start_pos) + 1;
-}
-
-void CharacterSet::Canonicalize() {
- // Special/default classes are always considered canonical. The result
- // of calling ranges() will be sorted.
- if (ranges_ == nullptr) return;
- CharacterRange::Canonicalize(ranges_);
-}
-
-void CharacterRange::Canonicalize(ZoneList<CharacterRange>* character_ranges) {
- if (character_ranges->length() <= 1) return;
- // Check whether ranges are already canonical (increasing, non-overlapping,
- // non-adjacent).
- int n = character_ranges->length();
- int max = character_ranges->at(0).to();
- int i = 1;
- while (i < n) {
- CharacterRange current = character_ranges->at(i);
- if (current.from() <= max + 1) {
- break;
- }
- max = current.to();
- i++;
- }
- // Canonical until the i'th range. If that's all of them, we are done.
- if (i == n) return;
-
- // The ranges at index i and forward are not canonicalized. Make them so by
- // doing the equivalent of insertion sort (inserting each into the previous
- // list, in order).
- // Notice that inserting a range can reduce the number of ranges in the
- // result due to combining of adjacent and overlapping ranges.
- int read = i; // Range to insert.
- int num_canonical = i; // Length of canonicalized part of list.
- do {
- num_canonical = InsertRangeInCanonicalList(character_ranges, num_canonical,
- character_ranges->at(read));
- read++;
- } while (read < n);
- character_ranges->Rewind(num_canonical);
-
- DCHECK(CharacterRange::IsCanonical(character_ranges));
-}
-
-void CharacterRange::Negate(ZoneList<CharacterRange>* ranges,
- ZoneList<CharacterRange>* negated_ranges,
- Zone* zone) {
- DCHECK(CharacterRange::IsCanonical(ranges));
- DCHECK_EQ(0, negated_ranges->length());
- int range_count = ranges->length();
- uc32 from = 0;
- int i = 0;
- if (range_count > 0 && ranges->at(0).from() == 0) {
- from = ranges->at(0).to() + 1;
- i = 1;
- }
- while (i < range_count) {
- CharacterRange range = ranges->at(i);
- negated_ranges->Add(CharacterRange::Range(from, range.from() - 1), zone);
- from = range.to() + 1;
- i++;
- }
- if (from < String::kMaxCodePoint) {
- negated_ranges->Add(CharacterRange::Range(from, String::kMaxCodePoint),
- zone);
- }
-}
-
-// Scoped object to keep track of how much we unroll quantifier loops in the
-// regexp graph generator.
-class RegExpExpansionLimiter {
- public:
- static const int kMaxExpansionFactor = 6;
- RegExpExpansionLimiter(RegExpCompiler* compiler, int factor)
- : compiler_(compiler),
- saved_expansion_factor_(compiler->current_expansion_factor()),
- ok_to_expand_(saved_expansion_factor_ <= kMaxExpansionFactor) {
- DCHECK_LT(0, factor);
- if (ok_to_expand_) {
- if (factor > kMaxExpansionFactor) {
- // Avoid integer overflow of the current expansion factor.
- ok_to_expand_ = false;
- compiler->set_current_expansion_factor(kMaxExpansionFactor + 1);
- } else {
- int new_factor = saved_expansion_factor_ * factor;
- ok_to_expand_ = (new_factor <= kMaxExpansionFactor);
- compiler->set_current_expansion_factor(new_factor);
- }
- }
- }
-
- ~RegExpExpansionLimiter() {
- compiler_->set_current_expansion_factor(saved_expansion_factor_);
- }
-
- bool ok_to_expand() { return ok_to_expand_; }
-
- private:
- RegExpCompiler* compiler_;
- int saved_expansion_factor_;
- bool ok_to_expand_;
-
- DISALLOW_IMPLICIT_CONSTRUCTORS(RegExpExpansionLimiter);
-};
-
-RegExpNode* RegExpQuantifier::ToNode(int min, int max, bool is_greedy,
- RegExpTree* body, RegExpCompiler* compiler,
- RegExpNode* on_success,
- bool not_at_start) {
- // x{f, t} becomes this:
- //
- // (r++)<-.
- // | `
- // | (x)
- // v ^
- // (r=0)-->(?)---/ [if r < t]
- // |
- // [if r >= f] \----> ...
- //
-
- // 15.10.2.5 RepeatMatcher algorithm.
- // The parser has already eliminated the case where max is 0. In the case
- // where max_match is zero the parser has removed the quantifier if min was
- // > 0 and removed the atom if min was 0. See AddQuantifierToAtom.
-
- // If we know that we cannot match zero length then things are a little
- // simpler since we don't need to make the special zero length match check
- // from step 2.1. If the min and max are small we can unroll a little in
- // this case.
- static const int kMaxUnrolledMinMatches = 3; // Unroll (foo)+ and (foo){3,}
- static const int kMaxUnrolledMaxMatches = 3; // Unroll (foo)? and (foo){x,3}
- if (max == 0) return on_success; // This can happen due to recursion.
- bool body_can_be_empty = (body->min_match() == 0);
- int body_start_reg = RegExpCompiler::kNoRegister;
- Interval capture_registers = body->CaptureRegisters();
- bool needs_capture_clearing = !capture_registers.is_empty();
- Zone* zone = compiler->zone();
-
- if (body_can_be_empty) {
- body_start_reg = compiler->AllocateRegister();
- } else if (compiler->optimize() && !needs_capture_clearing) {
- // Only unroll if there are no captures and the body can't be
- // empty.
- {
- RegExpExpansionLimiter limiter(compiler, min + ((max != min) ? 1 : 0));
- if (min > 0 && min <= kMaxUnrolledMinMatches && limiter.ok_to_expand()) {
- int new_max = (max == kInfinity) ? max : max - min;
- // Recurse once to get the loop or optional matches after the fixed
- // ones.
- RegExpNode* answer =
- ToNode(0, new_max, is_greedy, body, compiler, on_success, true);
- // Unroll the forced matches from 0 to min. This can cause chains of
- // TextNodes (which the parser does not generate). These should be
- // combined if it turns out they hinder good code generation.
- for (int i = 0; i < min; i++) {
- answer = body->ToNode(compiler, answer);
- }
- return answer;
- }
- }
- if (max <= kMaxUnrolledMaxMatches && min == 0) {
- DCHECK_LT(0, max); // Due to the 'if' above.
- RegExpExpansionLimiter limiter(compiler, max);
- if (limiter.ok_to_expand()) {
- // Unroll the optional matches up to max.
- RegExpNode* answer = on_success;
- for (int i = 0; i < max; i++) {
- ChoiceNode* alternation = new (zone) ChoiceNode(2, zone);
- if (is_greedy) {
- alternation->AddAlternative(
- GuardedAlternative(body->ToNode(compiler, answer)));
- alternation->AddAlternative(GuardedAlternative(on_success));
- } else {
- alternation->AddAlternative(GuardedAlternative(on_success));
- alternation->AddAlternative(
- GuardedAlternative(body->ToNode(compiler, answer)));
- }
- answer = alternation;
- if (not_at_start && !compiler->read_backward()) {
- alternation->set_not_at_start();
- }
- }
- return answer;
- }
- }
- }
- bool has_min = min > 0;
- bool has_max = max < RegExpTree::kInfinity;
- bool needs_counter = has_min || has_max;
- int reg_ctr = needs_counter ? compiler->AllocateRegister()
- : RegExpCompiler::kNoRegister;
- LoopChoiceNode* center = new (zone) LoopChoiceNode(
- body->min_match() == 0, compiler->read_backward(), min, zone);
- if (not_at_start && !compiler->read_backward()) center->set_not_at_start();
- RegExpNode* loop_return =
- needs_counter ? static_cast<RegExpNode*>(
- ActionNode::IncrementRegister(reg_ctr, center))
- : static_cast<RegExpNode*>(center);
- if (body_can_be_empty) {
- // If the body can be empty we need to check if it was and then
- // backtrack.
- loop_return =
- ActionNode::EmptyMatchCheck(body_start_reg, reg_ctr, min, loop_return);
- }
- RegExpNode* body_node = body->ToNode(compiler, loop_return);
- if (body_can_be_empty) {
- // If the body can be empty we need to store the start position
- // so we can bail out if it was empty.
- body_node = ActionNode::StorePosition(body_start_reg, false, body_node);
- }
- if (needs_capture_clearing) {
- // Before entering the body of this loop we need to clear captures.
- body_node = ActionNode::ClearCaptures(capture_registers, body_node);
- }
- GuardedAlternative body_alt(body_node);
- if (has_max) {
- Guard* body_guard = new (zone) Guard(reg_ctr, Guard::LT, max);
- body_alt.AddGuard(body_guard, zone);
- }
- GuardedAlternative rest_alt(on_success);
- if (has_min) {
- Guard* rest_guard = new (compiler->zone()) Guard(reg_ctr, Guard::GEQ, min);
- rest_alt.AddGuard(rest_guard, zone);
- }
- if (is_greedy) {
- center->AddLoopAlternative(body_alt);
- center->AddContinueAlternative(rest_alt);
- } else {
- center->AddContinueAlternative(rest_alt);
- center->AddLoopAlternative(body_alt);
- }
- if (needs_counter) {
- return ActionNode::SetRegisterForLoop(reg_ctr, 0, center);
- } else {
- return center;
- }
-}
-
-} // namespace internal
-} // namespace v8
diff --git a/js/src/new-regexp/regexp-compiler.cc b/js/src/new-regexp/regexp-compiler.cc
deleted file mode 100644
index 98771354c..000000000
--- a/js/src/new-regexp/regexp-compiler.cc
+++ /dev/null
@@ -1,3831 +0,0 @@
-// Copyright 2019 the V8 project authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#include "new-regexp/regexp-compiler.h"
-
-#include "new-regexp/regexp-macro-assembler-arch.h"
-#ifdef V8_INTL_SUPPORT
-#include "new-regexp/special-case.h"
-#endif // V8_INTL_SUPPORT
-
-#ifdef V8_INTL_SUPPORT
-#include "unicode/locid.h"
-#include "unicode/uniset.h"
-#include "unicode/utypes.h"
-#endif // V8_INTL_SUPPORT
-
-namespace v8 {
-namespace internal {
-
-using namespace regexp_compiler_constants; // NOLINT(build/namespaces)
-
-// -------------------------------------------------------------------
-// Implementation of the Irregexp regular expression engine.
-//
-// The Irregexp regular expression engine is intended to be a complete
-// implementation of ECMAScript regular expressions. It generates either
-// bytecodes or native code.
-
-// The Irregexp regexp engine is structured in three steps.
-// 1) The parser generates an abstract syntax tree. See ast.cc.
-// 2) From the AST a node network is created. The nodes are all
-// subclasses of RegExpNode. The nodes represent states when
-// executing a regular expression. Several optimizations are
-// performed on the node network.
-// 3) From the nodes we generate either byte codes or native code
-// that can actually execute the regular expression (perform
-// the search). The code generation step is described in more
-// detail below.
-
-// Code generation.
-//
-// The nodes are divided into four main categories.
-// * Choice nodes
-// These represent places where the regular expression can
-// match in more than one way. For example on entry to an
-// alternation (foo|bar) or a repetition (*, +, ? or {}).
-// * Action nodes
-// These represent places where some action should be
-// performed. Examples include recording the current position
-// in the input string to a register (in order to implement
-// captures) or other actions on register for example in order
-// to implement the counters needed for {} repetitions.
-// * Matching nodes
-// These attempt to match some element part of the input string.
-// Examples of elements include character classes, plain strings
-// or back references.
-// * End nodes
-// These are used to implement the actions required on finding
-// a successful match or failing to find a match.
-//
-// The code generated (whether as byte codes or native code) maintains
-// some state as it runs. This consists of the following elements:
-//
-// * The capture registers. Used for string captures.
-// * Other registers. Used for counters etc.
-// * The current position.
-// * The stack of backtracking information. Used when a matching node
-// fails to find a match and needs to try an alternative.
-//
-// Conceptual regular expression execution model:
-//
-// There is a simple conceptual model of regular expression execution
-// which will be presented first. The actual code generated is a more
-// efficient simulation of the simple conceptual model:
-//
-// * Choice nodes are implemented as follows:
-// For each choice except the last {
-// push current position
-// push backtrack code location
-// <generate code to test for choice>
-// backtrack code location:
-// pop current position
-// }
-// <generate code to test for last choice>
-//
-// * Actions nodes are generated as follows
-// <push affected registers on backtrack stack>
-// <generate code to perform action>
-// push backtrack code location
-// <generate code to test for following nodes>
-// backtrack code location:
-// <pop affected registers to restore their state>
-// <pop backtrack location from stack and go to it>
-//
-// * Matching nodes are generated as follows:
-// if input string matches at current position
-// update current position
-// <generate code to test for following nodes>
-// else
-// <pop backtrack location from stack and go to it>
-//
-// Thus it can be seen that the current position is saved and restored
-// by the choice nodes, whereas the registers are saved and restored by
-// by the action nodes that manipulate them.
-//
-// The other interesting aspect of this model is that nodes are generated
-// at the point where they are needed by a recursive call to Emit(). If
-// the node has already been code generated then the Emit() call will
-// generate a jump to the previously generated code instead. In order to
-// limit recursion it is possible for the Emit() function to put the node
-// on a work list for later generation and instead generate a jump. The
-// destination of the jump is resolved later when the code is generated.
-//
-// Actual regular expression code generation.
-//
-// Code generation is actually more complicated than the above. In order
-// to improve the efficiency of the generated code some optimizations are
-// performed
-//
-// * Choice nodes have 1-character lookahead.
-// A choice node looks at the following character and eliminates some of
-// the choices immediately based on that character. This is not yet
-// implemented.
-// * Simple greedy loops store reduced backtracking information.
-// A quantifier like /.*foo/m will greedily match the whole input. It will
-// then need to backtrack to a point where it can match "foo". The naive
-// implementation of this would push each character position onto the
-// backtracking stack, then pop them off one by one. This would use space
-// proportional to the length of the input string. However since the "."
-// can only match in one way and always has a constant length (in this case
-// of 1) it suffices to store the current position on the top of the stack
-// once. Matching now becomes merely incrementing the current position and
-// backtracking becomes decrementing the current position and checking the
-// result against the stored current position. This is faster and saves
-// space.
-// * The current state is virtualized.
-// This is used to defer expensive operations until it is clear that they
-// are needed and to generate code for a node more than once, allowing
-// specialized an efficient versions of the code to be created. This is
-// explained in the section below.
-//
-// Execution state virtualization.
-//
-// Instead of emitting code, nodes that manipulate the state can record their
-// manipulation in an object called the Trace. The Trace object can record a
-// current position offset, an optional backtrack code location on the top of
-// the virtualized backtrack stack and some register changes. When a node is
-// to be emitted it can flush the Trace or update it. Flushing the Trace
-// will emit code to bring the actual state into line with the virtual state.
-// Avoiding flushing the state can postpone some work (e.g. updates of capture
-// registers). Postponing work can save time when executing the regular
-// expression since it may be found that the work never has to be done as a
-// failure to match can occur. In addition it is much faster to jump to a
-// known backtrack code location than it is to pop an unknown backtrack
-// location from the stack and jump there.
-//
-// The virtual state found in the Trace affects code generation. For example
-// the virtual state contains the difference between the actual current
-// position and the virtual current position, and matching code needs to use
-// this offset to attempt a match in the correct location of the input
-// string. Therefore code generated for a non-trivial trace is specialized
-// to that trace. The code generator therefore has the ability to generate
-// code for each node several times. In order to limit the size of the
-// generated code there is an arbitrary limit on how many specialized sets of
-// code may be generated for a given node. If the limit is reached, the
-// trace is flushed and a generic version of the code for a node is emitted.
-// This is subsequently used for that node. The code emitted for non-generic
-// trace is not recorded in the node and so it cannot currently be reused in
-// the event that code generation is requested for an identical trace.
-
-void RegExpTree::AppendToText(RegExpText* text, Zone* zone) { UNREACHABLE(); }
-
-void RegExpAtom::AppendToText(RegExpText* text, Zone* zone) {
- text->AddElement(TextElement::Atom(this), zone);
-}
-
-void RegExpCharacterClass::AppendToText(RegExpText* text, Zone* zone) {
- text->AddElement(TextElement::CharClass(this), zone);
-}
-
-void RegExpText::AppendToText(RegExpText* text, Zone* zone) {
- for (int i = 0; i < elements()->length(); i++)
- text->AddElement(elements()->at(i), zone);
-}
-
-TextElement TextElement::Atom(RegExpAtom* atom) {
- return TextElement(ATOM, atom);
-}
-
-TextElement TextElement::CharClass(RegExpCharacterClass* char_class) {
- return TextElement(CHAR_CLASS, char_class);
-}
-
-int TextElement::length() const {
- switch (text_type()) {
- case ATOM:
- return atom()->length();
-
- case CHAR_CLASS:
- return 1;
- }
- UNREACHABLE();
-}
-
-class RecursionCheck {
- public:
- explicit RecursionCheck(RegExpCompiler* compiler) : compiler_(compiler) {
- compiler->IncrementRecursionDepth();
- }
- ~RecursionCheck() { compiler_->DecrementRecursionDepth(); }
-
- private:
- RegExpCompiler* compiler_;
-};
-
-// Attempts to compile the regexp using an Irregexp code generator. Returns
-// a fixed array or a null handle depending on whether it succeeded.
-RegExpCompiler::RegExpCompiler(Isolate* isolate, Zone* zone, int capture_count,
- bool one_byte)
- : next_register_(2 * (capture_count + 1)),
- unicode_lookaround_stack_register_(kNoRegister),
- unicode_lookaround_position_register_(kNoRegister),
- work_list_(nullptr),
- recursion_depth_(0),
- one_byte_(one_byte),
- reg_exp_too_big_(false),
- limiting_recursion_(false),
- optimize_(FLAG_regexp_optimization),
- read_backward_(false),
- current_expansion_factor_(1),
- frequency_collator_(),
- isolate_(isolate),
- zone_(zone) {
- accept_ = new (zone) EndNode(EndNode::ACCEPT, zone);
- DCHECK_GE(RegExpMacroAssembler::kMaxRegister, next_register_ - 1);
-}
-
-RegExpCompiler::CompilationResult RegExpCompiler::Assemble(
- Isolate* isolate, RegExpMacroAssembler* macro_assembler, RegExpNode* start,
- int capture_count, Handle<String> pattern) {
- macro_assembler_ = macro_assembler;
-
- ZoneVector<RegExpNode*> work_list(zone());
- work_list_ = &work_list;
- Label fail;
- macro_assembler_->PushBacktrack(&fail);
- Trace new_trace;
- start->Emit(this, &new_trace);
- macro_assembler_->BindJumpTarget(&fail);
- macro_assembler_->Fail();
- while (!work_list.empty()) {
- RegExpNode* node = work_list.back();
- work_list.pop_back();
- node->set_on_work_list(false);
- if (!node->label()->is_bound()) node->Emit(this, &new_trace);
- }
- if (reg_exp_too_big_) {
- macro_assembler_->AbortedCodeGeneration();
- return CompilationResult::RegExpTooBig();
- }
-
- Handle<HeapObject> code = macro_assembler_->GetCode(pattern);
- isolate->IncreaseTotalRegexpCodeGenerated(code);
- work_list_ = nullptr;
-
- return {*code, next_register_};
-}
-
-bool Trace::DeferredAction::Mentions(int that) {
- if (action_type() == ActionNode::CLEAR_CAPTURES) {
- Interval range = static_cast<DeferredClearCaptures*>(this)->range();
- return range.Contains(that);
- } else {
- return reg() == that;
- }
-}
-
-bool Trace::mentions_reg(int reg) {
- for (DeferredAction* action = actions_; action != nullptr;
- action = action->next()) {
- if (action->Mentions(reg)) return true;
- }
- return false;
-}
-
-bool Trace::GetStoredPosition(int reg, int* cp_offset) {
- DCHECK_EQ(0, *cp_offset);
- for (DeferredAction* action = actions_; action != nullptr;
- action = action->next()) {
- if (action->Mentions(reg)) {
- if (action->action_type() == ActionNode::STORE_POSITION) {
- *cp_offset = static_cast<DeferredCapture*>(action)->cp_offset();
- return true;
- } else {
- return false;
- }
- }
- }
- return false;
-}
-
-// A (dynamically-sized) set of unsigned integers that behaves especially well
-// on small integers (< kFirstLimit). May do zone-allocation.
-class DynamicBitSet : public ZoneObject {
- public:
- V8_EXPORT_PRIVATE bool Get(unsigned value) const {
- if (value < kFirstLimit) {
- return (first_ & (1 << value)) != 0;
- } else if (remaining_ == nullptr) {
- return false;
- } else {
- return remaining_->Contains(value);
- }
- }
-
- // Destructively set a value in this set.
- void Set(unsigned value, Zone* zone) {
- if (value < kFirstLimit) {
- first_ |= (1 << value);
- } else {
- if (remaining_ == nullptr)
- remaining_ = new (zone) ZoneList<unsigned>(1, zone);
- if (remaining_->is_empty() || !remaining_->Contains(value))
- remaining_->Add(value, zone);
- }
- }
-
- private:
- static constexpr unsigned kFirstLimit = 32;
-
- uint32_t first_ = 0;
- ZoneList<unsigned>* remaining_ = nullptr;
-};
-
-int Trace::FindAffectedRegisters(DynamicBitSet* affected_registers,
- Zone* zone) {
- int max_register = RegExpCompiler::kNoRegister;
- for (DeferredAction* action = actions_; action != nullptr;
- action = action->next()) {
- if (action->action_type() == ActionNode::CLEAR_CAPTURES) {
- Interval range = static_cast<DeferredClearCaptures*>(action)->range();
- for (int i = range.from(); i <= range.to(); i++)
- affected_registers->Set(i, zone);
- if (range.to() > max_register) max_register = range.to();
- } else {
- affected_registers->Set(action->reg(), zone);
- if (action->reg() > max_register) max_register = action->reg();
- }
- }
- return max_register;
-}
-
-void Trace::RestoreAffectedRegisters(RegExpMacroAssembler* assembler,
- int max_register,
- const DynamicBitSet& registers_to_pop,
- const DynamicBitSet& registers_to_clear) {
- for (int reg = max_register; reg >= 0; reg--) {
- if (registers_to_pop.Get(reg)) {
- assembler->PopRegister(reg);
- } else if (registers_to_clear.Get(reg)) {
- int clear_to = reg;
- while (reg > 0 && registers_to_clear.Get(reg - 1)) {
- reg--;
- }
- assembler->ClearRegisters(reg, clear_to);
- }
- }
-}
-
-void Trace::PerformDeferredActions(RegExpMacroAssembler* assembler,
- int max_register,
- const DynamicBitSet& affected_registers,
- DynamicBitSet* registers_to_pop,
- DynamicBitSet* registers_to_clear,
- Zone* zone) {
- // The "+1" is to avoid a push_limit of zero if stack_limit_slack() is 1.
- const int push_limit = (assembler->stack_limit_slack() + 1) / 2;
-
- // Count pushes performed to force a stack limit check occasionally.
- int pushes = 0;
-
- for (int reg = 0; reg <= max_register; reg++) {
- if (!affected_registers.Get(reg)) {
- continue;
- }
-
- // The chronologically first deferred action in the trace
- // is used to infer the action needed to restore a register
- // to its previous state (or not, if it's safe to ignore it).
- enum DeferredActionUndoType { IGNORE, RESTORE, CLEAR };
- DeferredActionUndoType undo_action = IGNORE;
-
- int value = 0;
- bool absolute = false;
- bool clear = false;
- static const int kNoStore = kMinInt;
- int store_position = kNoStore;
- // This is a little tricky because we are scanning the actions in reverse
- // historical order (newest first).
- for (DeferredAction* action = actions_; action != nullptr;
- action = action->next()) {
- if (action->Mentions(reg)) {
- switch (action->action_type()) {
- case ActionNode::SET_REGISTER_FOR_LOOP: {
- Trace::DeferredSetRegisterForLoop* psr =
- static_cast<Trace::DeferredSetRegisterForLoop*>(action);
- if (!absolute) {
- value += psr->value();
- absolute = true;
- }
- // SET_REGISTER_FOR_LOOP is only used for newly introduced loop
- // counters. They can have a significant previous value if they
- // occur in a loop. TODO(lrn): Propagate this information, so
- // we can set undo_action to IGNORE if we know there is no value to
- // restore.
- undo_action = RESTORE;
- DCHECK_EQ(store_position, kNoStore);
- DCHECK(!clear);
- break;
- }
- case ActionNode::INCREMENT_REGISTER:
- if (!absolute) {
- value++;
- }
- DCHECK_EQ(store_position, kNoStore);
- DCHECK(!clear);
- undo_action = RESTORE;
- break;
- case ActionNode::STORE_POSITION: {
- Trace::DeferredCapture* pc =
- static_cast<Trace::DeferredCapture*>(action);
- if (!clear && store_position == kNoStore) {
- store_position = pc->cp_offset();
- }
-
- // For captures we know that stores and clears alternate.
- // Other register, are never cleared, and if the occur
- // inside a loop, they might be assigned more than once.
- if (reg <= 1) {
- // Registers zero and one, aka "capture zero", is
- // always set correctly if we succeed. There is no
- // need to undo a setting on backtrack, because we
- // will set it again or fail.
- undo_action = IGNORE;
- } else {
- undo_action = pc->is_capture() ? CLEAR : RESTORE;
- }
- DCHECK(!absolute);
- DCHECK_EQ(value, 0);
- break;
- }
- case ActionNode::CLEAR_CAPTURES: {
- // Since we're scanning in reverse order, if we've already
- // set the position we have to ignore historically earlier
- // clearing operations.
- if (store_position == kNoStore) {
- clear = true;
- }
- undo_action = RESTORE;
- DCHECK(!absolute);
- DCHECK_EQ(value, 0);
- break;
- }
- default:
- UNREACHABLE();
- break;
- }
- }
- }
- // Prepare for the undo-action (e.g., push if it's going to be popped).
- if (undo_action == RESTORE) {
- pushes++;
- RegExpMacroAssembler::StackCheckFlag stack_check =
- RegExpMacroAssembler::kNoStackLimitCheck;
- if (pushes == push_limit) {
- stack_check = RegExpMacroAssembler::kCheckStackLimit;
- pushes = 0;
- }
-
- assembler->PushRegister(reg, stack_check);
- registers_to_pop->Set(reg, zone);
- } else if (undo_action == CLEAR) {
- registers_to_clear->Set(reg, zone);
- }
- // Perform the chronologically last action (or accumulated increment)
- // for the register.
- if (store_position != kNoStore) {
- assembler->WriteCurrentPositionToRegister(reg, store_position);
- } else if (clear) {
- assembler->ClearRegisters(reg, reg);
- } else if (absolute) {
- assembler->SetRegister(reg, value);
- } else if (value != 0) {
- assembler->AdvanceRegister(reg, value);
- }
- }
-}
-
-// This is called as we come into a loop choice node and some other tricky
-// nodes. It normalizes the state of the code generator to ensure we can
-// generate generic code.
-void Trace::Flush(RegExpCompiler* compiler, RegExpNode* successor) {
- RegExpMacroAssembler* assembler = compiler->macro_assembler();
-
- DCHECK(!is_trivial());
-
- if (actions_ == nullptr && backtrack() == nullptr) {
- // Here we just have some deferred cp advances to fix and we are back to
- // a normal situation. We may also have to forget some information gained
- // through a quick check that was already performed.
- if (cp_offset_ != 0) assembler->AdvanceCurrentPosition(cp_offset_);
- // Create a new trivial state and generate the node with that.
- Trace new_state;
- successor->Emit(compiler, &new_state);
- return;
- }
-
- // Generate deferred actions here along with code to undo them again.
- DynamicBitSet affected_registers;
-
- if (backtrack() != nullptr) {
- // Here we have a concrete backtrack location. These are set up by choice
- // nodes and so they indicate that we have a deferred save of the current
- // position which we may need to emit here.
- assembler->PushCurrentPosition();
- }
-
- int max_register =
- FindAffectedRegisters(&affected_registers, compiler->zone());
- DynamicBitSet registers_to_pop;
- DynamicBitSet registers_to_clear;
- PerformDeferredActions(assembler, max_register, affected_registers,
- &registers_to_pop, &registers_to_clear,
- compiler->zone());
- if (cp_offset_ != 0) {
- assembler->AdvanceCurrentPosition(cp_offset_);
- }
-
- // Create a new trivial state and generate the node with that.
- Label undo;
- assembler->PushBacktrack(&undo);
- if (successor->KeepRecursing(compiler)) {
- Trace new_state;
- successor->Emit(compiler, &new_state);
- } else {
- compiler->AddWork(successor);
- assembler->GoTo(successor->label());
- }
-
- // On backtrack we need to restore state.
- assembler->BindJumpTarget(&undo);
- RestoreAffectedRegisters(assembler, max_register, registers_to_pop,
- registers_to_clear);
- if (backtrack() == nullptr) {
- assembler->Backtrack();
- } else {
- assembler->PopCurrentPosition();
- assembler->GoTo(backtrack());
- }
-}
-
-void NegativeSubmatchSuccess::Emit(RegExpCompiler* compiler, Trace* trace) {
- RegExpMacroAssembler* assembler = compiler->macro_assembler();
-
- // Omit flushing the trace. We discard the entire stack frame anyway.
-
- if (!label()->is_bound()) {
- // We are completely independent of the trace, since we ignore it,
- // so this code can be used as the generic version.
- assembler->Bind(label());
- }
-
- // Throw away everything on the backtrack stack since the start
- // of the negative submatch and restore the character position.
- assembler->ReadCurrentPositionFromRegister(current_position_register_);
- assembler->ReadStackPointerFromRegister(stack_pointer_register_);
- if (clear_capture_count_ > 0) {
- // Clear any captures that might have been performed during the success
- // of the body of the negative look-ahead.
- int clear_capture_end = clear_capture_start_ + clear_capture_count_ - 1;
- assembler->ClearRegisters(clear_capture_start_, clear_capture_end);
- }
- // Now that we have unwound the stack we find at the top of the stack the
- // backtrack that the BeginSubmatch node got.
- assembler->Backtrack();
-}
-
-void EndNode::Emit(RegExpCompiler* compiler, Trace* trace) {
- if (!trace->is_trivial()) {
- trace->Flush(compiler, this);
- return;
- }
- RegExpMacroAssembler* assembler = compiler->macro_assembler();
- if (!label()->is_bound()) {
- assembler->Bind(label());
- }
- switch (action_) {
- case ACCEPT:
- assembler->Succeed();
- return;
- case BACKTRACK:
- assembler->GoTo(trace->backtrack());
- return;
- case NEGATIVE_SUBMATCH_SUCCESS:
- // This case is handled in a different virtual method.
- UNREACHABLE();
- }
- UNIMPLEMENTED();
-}
-
-void GuardedAlternative::AddGuard(Guard* guard, Zone* zone) {
- if (guards_ == nullptr) guards_ = new (zone) ZoneList<Guard*>(1, zone);
- guards_->Add(guard, zone);
-}
-
-ActionNode* ActionNode::SetRegisterForLoop(int reg, int val,
- RegExpNode* on_success) {
- ActionNode* result =
- new (on_success->zone()) ActionNode(SET_REGISTER_FOR_LOOP, on_success);
- result->data_.u_store_register.reg = reg;
- result->data_.u_store_register.value = val;
- return result;
-}
-
-ActionNode* ActionNode::IncrementRegister(int reg, RegExpNode* on_success) {
- ActionNode* result =
- new (on_success->zone()) ActionNode(INCREMENT_REGISTER, on_success);
- result->data_.u_increment_register.reg = reg;
- return result;
-}
-
-ActionNode* ActionNode::StorePosition(int reg, bool is_capture,
- RegExpNode* on_success) {
- ActionNode* result =
- new (on_success->zone()) ActionNode(STORE_POSITION, on_success);
- result->data_.u_position_register.reg = reg;
- result->data_.u_position_register.is_capture = is_capture;
- return result;
-}
-
-ActionNode* ActionNode::ClearCaptures(Interval range, RegExpNode* on_success) {
- ActionNode* result =
- new (on_success->zone()) ActionNode(CLEAR_CAPTURES, on_success);
- result->data_.u_clear_captures.range_from = range.from();
- result->data_.u_clear_captures.range_to = range.to();
- return result;
-}
-
-ActionNode* ActionNode::BeginSubmatch(int stack_reg, int position_reg,
- RegExpNode* on_success) {
- ActionNode* result =
- new (on_success->zone()) ActionNode(BEGIN_SUBMATCH, on_success);
- result->data_.u_submatch.stack_pointer_register = stack_reg;
- result->data_.u_submatch.current_position_register = position_reg;
- return result;
-}
-
-ActionNode* ActionNode::PositiveSubmatchSuccess(int stack_reg, int position_reg,
- int clear_register_count,
- int clear_register_from,
- RegExpNode* on_success) {
- ActionNode* result = new (on_success->zone())
- ActionNode(POSITIVE_SUBMATCH_SUCCESS, on_success);
- result->data_.u_submatch.stack_pointer_register = stack_reg;
- result->data_.u_submatch.current_position_register = position_reg;
- result->data_.u_submatch.clear_register_count = clear_register_count;
- result->data_.u_submatch.clear_register_from = clear_register_from;
- return result;
-}
-
-ActionNode* ActionNode::EmptyMatchCheck(int start_register,
- int repetition_register,
- int repetition_limit,
- RegExpNode* on_success) {
- ActionNode* result =
- new (on_success->zone()) ActionNode(EMPTY_MATCH_CHECK, on_success);
- result->data_.u_empty_match_check.start_register = start_register;
- result->data_.u_empty_match_check.repetition_register = repetition_register;
- result->data_.u_empty_match_check.repetition_limit = repetition_limit;
- return result;
-}
-
-#define DEFINE_ACCEPT(Type) \
- void Type##Node::Accept(NodeVisitor* visitor) { visitor->Visit##Type(this); }
-FOR_EACH_NODE_TYPE(DEFINE_ACCEPT)
-#undef DEFINE_ACCEPT
-
-// -------------------------------------------------------------------
-// Emit code.
-
-void ChoiceNode::GenerateGuard(RegExpMacroAssembler* macro_assembler,
- Guard* guard, Trace* trace) {
- switch (guard->op()) {
- case Guard::LT:
- DCHECK(!trace->mentions_reg(guard->reg()));
- macro_assembler->IfRegisterGE(guard->reg(), guard->value(),
- trace->backtrack());
- break;
- case Guard::GEQ:
- DCHECK(!trace->mentions_reg(guard->reg()));
- macro_assembler->IfRegisterLT(guard->reg(), guard->value(),
- trace->backtrack());
- break;
- }
-}
-
-// Returns the number of characters in the equivalence class, omitting those
-// that cannot occur in the source string because it is Latin1.
-static int GetCaseIndependentLetters(Isolate* isolate, uc16 character,
- bool one_byte_subject,
- unibrow::uchar* letters,
- int letter_length) {
-#ifdef V8_INTL_SUPPORT
- if (RegExpCaseFolding::IgnoreSet().contains(character)) {
- letters[0] = character;
- return 1;
- }
- bool in_special_add_set =
- RegExpCaseFolding::SpecialAddSet().contains(character);
-
- icu::UnicodeSet set;
- set.add(character);
- set = set.closeOver(USET_CASE_INSENSITIVE);
-
- UChar32 canon = 0;
- if (in_special_add_set) {
- canon = RegExpCaseFolding::Canonicalize(character);
- }
-
- int32_t range_count = set.getRangeCount();
- int items = 0;
- for (int32_t i = 0; i < range_count; i++) {
- UChar32 start = set.getRangeStart(i);
- UChar32 end = set.getRangeEnd(i);
- CHECK(end - start + items <= letter_length);
- for (UChar32 cu = start; cu <= end; cu++) {
- if (one_byte_subject && cu > String::kMaxOneByteCharCode) break;
- if (in_special_add_set && RegExpCaseFolding::Canonicalize(cu) != canon) {
- continue;
- }
- letters[items++] = (unibrow::uchar)(cu);
- }
- }
- return items;
-#else
- int length =
- isolate->jsregexp_uncanonicalize()->get(character, '\0', letters);
- // Unibrow returns 0 or 1 for characters where case independence is
- // trivial.
- if (length == 0) {
- letters[0] = character;
- length = 1;
- }
-
- if (one_byte_subject) {
- int new_length = 0;
- for (int i = 0; i < length; i++) {
- if (letters[i] <= String::kMaxOneByteCharCode) {
- letters[new_length++] = letters[i];
- }
- }
- length = new_length;
- }
-
- return length;
-#endif // V8_INTL_SUPPORT
-}
-
-static inline bool EmitSimpleCharacter(Isolate* isolate,
- RegExpCompiler* compiler, uc16 c,
- Label* on_failure, int cp_offset,
- bool check, bool preloaded) {
- RegExpMacroAssembler* assembler = compiler->macro_assembler();
- bool bound_checked = false;
- if (!preloaded) {
- assembler->LoadCurrentCharacter(cp_offset, on_failure, check);
- bound_checked = true;
- }
- assembler->CheckNotCharacter(c, on_failure);
- return bound_checked;
-}
-
-// Only emits non-letters (things that don't have case). Only used for case
-// independent matches.
-static inline bool EmitAtomNonLetter(Isolate* isolate, RegExpCompiler* compiler,
- uc16 c, Label* on_failure, int cp_offset,
- bool check, bool preloaded) {
- RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
- bool one_byte = compiler->one_byte();
- unibrow::uchar chars[4];
- int length = GetCaseIndependentLetters(isolate, c, one_byte, chars, 4);
- if (length < 1) {
- // This can't match. Must be an one-byte subject and a non-one-byte
- // character. We do not need to do anything since the one-byte pass
- // already handled this.
- return false; // Bounds not checked.
- }
- bool checked = false;
- // We handle the length > 1 case in a later pass.
- if (length == 1) {
- if (one_byte && c > String::kMaxOneByteCharCodeU) {
- // Can't match - see above.
- return false; // Bounds not checked.
- }
- if (!preloaded) {
- macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check);
- checked = check;
- }
- macro_assembler->CheckNotCharacter(c, on_failure);
- }
- return checked;
-}
-
-static bool ShortCutEmitCharacterPair(RegExpMacroAssembler* macro_assembler,
- bool one_byte, uc16 c1, uc16 c2,
- Label* on_failure) {
- uc16 char_mask;
- if (one_byte) {
- char_mask = String::kMaxOneByteCharCode;
- } else {
- char_mask = String::kMaxUtf16CodeUnit;
- }
- uc16 exor = c1 ^ c2;
- // Check whether exor has only one bit set.
- if (((exor - 1) & exor) == 0) {
- // If c1 and c2 differ only by one bit.
- // Ecma262UnCanonicalize always gives the highest number last.
- DCHECK(c2 > c1);
- uc16 mask = char_mask ^ exor;
- macro_assembler->CheckNotCharacterAfterAnd(c1, mask, on_failure);
- return true;
- }
- DCHECK(c2 > c1);
- uc16 diff = c2 - c1;
- if (((diff - 1) & diff) == 0 && c1 >= diff) {
- // If the characters differ by 2^n but don't differ by one bit then
- // subtract the difference from the found character, then do the or
- // trick. We avoid the theoretical case where negative numbers are
- // involved in order to simplify code generation.
- uc16 mask = char_mask ^ diff;
- macro_assembler->CheckNotCharacterAfterMinusAnd(c1 - diff, diff, mask,
- on_failure);
- return true;
- }
- return false;
-}
-
-// Only emits letters (things that have case). Only used for case independent
-// matches.
-static inline bool EmitAtomLetter(Isolate* isolate, RegExpCompiler* compiler,
- uc16 c, Label* on_failure, int cp_offset,
- bool check, bool preloaded) {
- RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
- bool one_byte = compiler->one_byte();
- unibrow::uchar chars[4];
- int length = GetCaseIndependentLetters(isolate, c, one_byte, chars, 4);
- if (length <= 1) return false;
- // We may not need to check against the end of the input string
- // if this character lies before a character that matched.
- if (!preloaded) {
- macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check);
- }
- Label ok;
- switch (length) {
- case 2: {
- if (ShortCutEmitCharacterPair(macro_assembler, one_byte, chars[0],
- chars[1], on_failure)) {
- } else {
- macro_assembler->CheckCharacter(chars[0], &ok);
- macro_assembler->CheckNotCharacter(chars[1], on_failure);
- macro_assembler->Bind(&ok);
- }
- break;
- }
- case 4:
- macro_assembler->CheckCharacter(chars[3], &ok);
- V8_FALLTHROUGH;
- case 3:
- macro_assembler->CheckCharacter(chars[0], &ok);
- macro_assembler->CheckCharacter(chars[1], &ok);
- macro_assembler->CheckNotCharacter(chars[2], on_failure);
- macro_assembler->Bind(&ok);
- break;
- default:
- UNREACHABLE();
- }
- return true;
-}
-
-static void EmitBoundaryTest(RegExpMacroAssembler* masm, int border,
- Label* fall_through, Label* above_or_equal,
- Label* below) {
- if (below != fall_through) {
- masm->CheckCharacterLT(border, below);
- if (above_or_equal != fall_through) masm->GoTo(above_or_equal);
- } else {
- masm->CheckCharacterGT(border - 1, above_or_equal);
- }
-}
-
-static void EmitDoubleBoundaryTest(RegExpMacroAssembler* masm, int first,
- int last, Label* fall_through,
- Label* in_range, Label* out_of_range) {
- if (in_range == fall_through) {
- if (first == last) {
- masm->CheckNotCharacter(first, out_of_range);
- } else {
- masm->CheckCharacterNotInRange(first, last, out_of_range);
- }
- } else {
- if (first == last) {
- masm->CheckCharacter(first, in_range);
- } else {
- masm->CheckCharacterInRange(first, last, in_range);
- }
- if (out_of_range != fall_through) masm->GoTo(out_of_range);
- }
-}
-
-// even_label is for ranges[i] to ranges[i + 1] where i - start_index is even.
-// odd_label is for ranges[i] to ranges[i + 1] where i - start_index is odd.
-static void EmitUseLookupTable(RegExpMacroAssembler* masm,
- ZoneList<int>* ranges, int start_index,
- int end_index, int min_char, Label* fall_through,
- Label* even_label, Label* odd_label) {
- static const int kSize = RegExpMacroAssembler::kTableSize;
- static const int kMask = RegExpMacroAssembler::kTableMask;
-
- int base = (min_char & ~kMask);
- USE(base);
-
- // Assert that everything is on one kTableSize page.
- for (int i = start_index; i <= end_index; i++) {
- DCHECK_EQ(ranges->at(i) & ~kMask, base);
- }
- DCHECK(start_index == 0 || (ranges->at(start_index - 1) & ~kMask) <= base);
-
- char templ[kSize];
- Label* on_bit_set;
- Label* on_bit_clear;
- int bit;
- if (even_label == fall_through) {
- on_bit_set = odd_label;
- on_bit_clear = even_label;
- bit = 1;
- } else {
- on_bit_set = even_label;
- on_bit_clear = odd_label;
- bit = 0;
- }
- for (int i = 0; i < (ranges->at(start_index) & kMask) && i < kSize; i++) {
- templ[i] = bit;
- }
- int j = 0;
- bit ^= 1;
- for (int i = start_index; i < end_index; i++) {
- for (j = (ranges->at(i) & kMask); j < (ranges->at(i + 1) & kMask); j++) {
- templ[j] = bit;
- }
- bit ^= 1;
- }
- for (int i = j; i < kSize; i++) {
- templ[i] = bit;
- }
- Factory* factory = masm->isolate()->factory();
- // TODO(erikcorry): Cache these.
- Handle<ByteArray> ba = factory->NewByteArray(kSize, AllocationType::kOld);
- for (int i = 0; i < kSize; i++) {
- ba->set(i, templ[i]);
- }
- masm->CheckBitInTable(ba, on_bit_set);
- if (on_bit_clear != fall_through) masm->GoTo(on_bit_clear);
-}
-
-static void CutOutRange(RegExpMacroAssembler* masm, ZoneList<int>* ranges,
- int start_index, int end_index, int cut_index,
- Label* even_label, Label* odd_label) {
- bool odd = (((cut_index - start_index) & 1) == 1);
- Label* in_range_label = odd ? odd_label : even_label;
- Label dummy;
- EmitDoubleBoundaryTest(masm, ranges->at(cut_index),
- ranges->at(cut_index + 1) - 1, &dummy, in_range_label,
- &dummy);
- DCHECK(!dummy.is_linked());
- // Cut out the single range by rewriting the array. This creates a new
- // range that is a merger of the two ranges on either side of the one we
- // are cutting out. The oddity of the labels is preserved.
- for (int j = cut_index; j > start_index; j--) {
- ranges->at(j) = ranges->at(j - 1);
- }
- for (int j = cut_index + 1; j < end_index; j++) {
- ranges->at(j) = ranges->at(j + 1);
- }
-}
-
-// Unicode case. Split the search space into kSize spaces that are handled
-// with recursion.
-static void SplitSearchSpace(ZoneList<int>* ranges, int start_index,
- int end_index, int* new_start_index,
- int* new_end_index, int* border) {
- static const int kSize = RegExpMacroAssembler::kTableSize;
- static const int kMask = RegExpMacroAssembler::kTableMask;
-
- int first = ranges->at(start_index);
- int last = ranges->at(end_index) - 1;
-
- *new_start_index = start_index;
- *border = (ranges->at(start_index) & ~kMask) + kSize;
- while (*new_start_index < end_index) {
- if (ranges->at(*new_start_index) > *border) break;
- (*new_start_index)++;
- }
- // new_start_index is the index of the first edge that is beyond the
- // current kSize space.
-
- // For very large search spaces we do a binary chop search of the non-Latin1
- // space instead of just going to the end of the current kSize space. The
- // heuristics are complicated a little by the fact that any 128-character
- // encoding space can be quickly tested with a table lookup, so we don't
- // wish to do binary chop search at a smaller granularity than that. A
- // 128-character space can take up a lot of space in the ranges array if,
- // for example, we only want to match every second character (eg. the lower
- // case characters on some Unicode pages).
- int binary_chop_index = (end_index + start_index) / 2;
- // The first test ensures that we get to the code that handles the Latin1
- // range with a single not-taken branch, speeding up this important
- // character range (even non-Latin1 charset-based text has spaces and
- // punctuation).
- if (*border - 1 > String::kMaxOneByteCharCode && // Latin1 case.
- end_index - start_index > (*new_start_index - start_index) * 2 &&
- last - first > kSize * 2 && binary_chop_index > *new_start_index &&
- ranges->at(binary_chop_index) >= first + 2 * kSize) {
- int scan_forward_for_section_border = binary_chop_index;
- int new_border = (ranges->at(binary_chop_index) | kMask) + 1;
-
- while (scan_forward_for_section_border < end_index) {
- if (ranges->at(scan_forward_for_section_border) > new_border) {
- *new_start_index = scan_forward_for_section_border;
- *border = new_border;
- break;
- }
- scan_forward_for_section_border++;
- }
- }
-
- DCHECK(*new_start_index > start_index);
- *new_end_index = *new_start_index - 1;
- if (ranges->at(*new_end_index) == *border) {
- (*new_end_index)--;
- }
- if (*border >= ranges->at(end_index)) {
- *border = ranges->at(end_index);
- *new_start_index = end_index; // Won't be used.
- *new_end_index = end_index - 1;
- }
-}
-
-// Gets a series of segment boundaries representing a character class. If the
-// character is in the range between an even and an odd boundary (counting from
-// start_index) then go to even_label, otherwise go to odd_label. We already
-// know that the character is in the range of min_char to max_char inclusive.
-// Either label can be nullptr indicating backtracking. Either label can also
-// be equal to the fall_through label.
-static void GenerateBranches(RegExpMacroAssembler* masm, ZoneList<int>* ranges,
- int start_index, int end_index, uc32 min_char,
- uc32 max_char, Label* fall_through,
- Label* even_label, Label* odd_label) {
- DCHECK_LE(min_char, String::kMaxUtf16CodeUnit);
- DCHECK_LE(max_char, String::kMaxUtf16CodeUnit);
-
- int first = ranges->at(start_index);
- int last = ranges->at(end_index) - 1;
-
- DCHECK_LT(min_char, first);
-
- // Just need to test if the character is before or on-or-after
- // a particular character.
- if (start_index == end_index) {
- EmitBoundaryTest(masm, first, fall_through, even_label, odd_label);
- return;
- }
-
- // Another almost trivial case: There is one interval in the middle that is
- // different from the end intervals.
- if (start_index + 1 == end_index) {
- EmitDoubleBoundaryTest(masm, first, last, fall_through, even_label,
- odd_label);
- return;
- }
-
- // It's not worth using table lookup if there are very few intervals in the
- // character class.
- if (end_index - start_index <= 6) {
- // It is faster to test for individual characters, so we look for those
- // first, then try arbitrary ranges in the second round.
- static int kNoCutIndex = -1;
- int cut = kNoCutIndex;
- for (int i = start_index; i < end_index; i++) {
- if (ranges->at(i) == ranges->at(i + 1) - 1) {
- cut = i;
- break;
- }
- }
- if (cut == kNoCutIndex) cut = start_index;
- CutOutRange(masm, ranges, start_index, end_index, cut, even_label,
- odd_label);
- DCHECK_GE(end_index - start_index, 2);
- GenerateBranches(masm, ranges, start_index + 1, end_index - 1, min_char,
- max_char, fall_through, even_label, odd_label);
- return;
- }
-
- // If there are a lot of intervals in the regexp, then we will use tables to
- // determine whether the character is inside or outside the character class.
- static const int kBits = RegExpMacroAssembler::kTableSizeBits;
-
- if ((max_char >> kBits) == (min_char >> kBits)) {
- EmitUseLookupTable(masm, ranges, start_index, end_index, min_char,
- fall_through, even_label, odd_label);
- return;
- }
-
- if ((min_char >> kBits) != (first >> kBits)) {
- masm->CheckCharacterLT(first, odd_label);
- GenerateBranches(masm, ranges, start_index + 1, end_index, first, max_char,
- fall_through, odd_label, even_label);
- return;
- }
-
- int new_start_index = 0;
- int new_end_index = 0;
- int border = 0;
-
- SplitSearchSpace(ranges, start_index, end_index, &new_start_index,
- &new_end_index, &border);
-
- Label handle_rest;
- Label* above = &handle_rest;
- if (border == last + 1) {
- // We didn't find any section that started after the limit, so everything
- // above the border is one of the terminal labels.
- above = (end_index & 1) != (start_index & 1) ? odd_label : even_label;
- DCHECK(new_end_index == end_index - 1);
- }
-
- DCHECK_LE(start_index, new_end_index);
- DCHECK_LE(new_start_index, end_index);
- DCHECK_LT(start_index, new_start_index);
- DCHECK_LT(new_end_index, end_index);
- DCHECK(new_end_index + 1 == new_start_index ||
- (new_end_index + 2 == new_start_index &&
- border == ranges->at(new_end_index + 1)));
- DCHECK_LT(min_char, border - 1);
- DCHECK_LT(border, max_char);
- DCHECK_LT(ranges->at(new_end_index), border);
- DCHECK(border < ranges->at(new_start_index) ||
- (border == ranges->at(new_start_index) &&
- new_start_index == end_index && new_end_index == end_index - 1 &&
- border == last + 1));
- DCHECK(new_start_index == 0 || border >= ranges->at(new_start_index - 1));
-
- masm->CheckCharacterGT(border - 1, above);
- Label dummy;
- GenerateBranches(masm, ranges, start_index, new_end_index, min_char,
- border - 1, &dummy, even_label, odd_label);
- if (handle_rest.is_linked()) {
- masm->Bind(&handle_rest);
- bool flip = (new_start_index & 1) != (start_index & 1);
- GenerateBranches(masm, ranges, new_start_index, end_index, border, max_char,
- &dummy, flip ? odd_label : even_label,
- flip ? even_label : odd_label);
- }
-}
-
-static void EmitCharClass(RegExpMacroAssembler* macro_assembler,
- RegExpCharacterClass* cc, bool one_byte,
- Label* on_failure, int cp_offset, bool check_offset,
- bool preloaded, Zone* zone) {
- ZoneList<CharacterRange>* ranges = cc->ranges(zone);
- CharacterRange::Canonicalize(ranges);
-
- int max_char;
- if (one_byte) {
- max_char = String::kMaxOneByteCharCode;
- } else {
- max_char = String::kMaxUtf16CodeUnit;
- }
-
- int range_count = ranges->length();
-
- int last_valid_range = range_count - 1;
- while (last_valid_range >= 0) {
- CharacterRange& range = ranges->at(last_valid_range);
- if (range.from() <= max_char) {
- break;
- }
- last_valid_range--;
- }
-
- if (last_valid_range < 0) {
- if (!cc->is_negated()) {
- macro_assembler->GoTo(on_failure);
- }
- if (check_offset) {
- macro_assembler->CheckPosition(cp_offset, on_failure);
- }
- return;
- }
-
- if (last_valid_range == 0 && ranges->at(0).IsEverything(max_char)) {
- if (cc->is_negated()) {
- macro_assembler->GoTo(on_failure);
- } else {
- // This is a common case hit by non-anchored expressions.
- if (check_offset) {
- macro_assembler->CheckPosition(cp_offset, on_failure);
- }
- }
- return;
- }
-
- if (!preloaded) {
- macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check_offset);
- }
-
- if (cc->is_standard(zone) && macro_assembler->CheckSpecialCharacterClass(
- cc->standard_type(), on_failure)) {
- return;
- }
-
- // A new list with ascending entries. Each entry is a code unit
- // where there is a boundary between code units that are part of
- // the class and code units that are not. Normally we insert an
- // entry at zero which goes to the failure label, but if there
- // was already one there we fall through for success on that entry.
- // Subsequent entries have alternating meaning (success/failure).
- ZoneList<int>* range_boundaries =
- new (zone) ZoneList<int>(last_valid_range, zone);
-
- bool zeroth_entry_is_failure = !cc->is_negated();
-
- for (int i = 0; i <= last_valid_range; i++) {
- CharacterRange& range = ranges->at(i);
- if (range.from() == 0) {
- DCHECK_EQ(i, 0);
- zeroth_entry_is_failure = !zeroth_entry_is_failure;
- } else {
- range_boundaries->Add(range.from(), zone);
- }
- range_boundaries->Add(range.to() + 1, zone);
- }
- int end_index = range_boundaries->length() - 1;
- if (range_boundaries->at(end_index) > max_char) {
- end_index--;
- }
-
- Label fall_through;
- GenerateBranches(macro_assembler, range_boundaries,
- 0, // start_index.
- end_index,
- 0, // min_char.
- max_char, &fall_through,
- zeroth_entry_is_failure ? &fall_through : on_failure,
- zeroth_entry_is_failure ? on_failure : &fall_through);
- macro_assembler->Bind(&fall_through);
-}
-
-RegExpNode::~RegExpNode() = default;
-
-RegExpNode::LimitResult RegExpNode::LimitVersions(RegExpCompiler* compiler,
- Trace* trace) {
- // If we are generating a greedy loop then don't stop and don't reuse code.
- if (trace->stop_node() != nullptr) {
- return CONTINUE;
- }
-
- RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
- if (trace->is_trivial()) {
- if (label_.is_bound() || on_work_list() || !KeepRecursing(compiler)) {
- // If a generic version is already scheduled to be generated or we have
- // recursed too deeply then just generate a jump to that code.
- macro_assembler->GoTo(&label_);
- // This will queue it up for generation of a generic version if it hasn't
- // already been queued.
- compiler->AddWork(this);
- return DONE;
- }
- // Generate generic version of the node and bind the label for later use.
- macro_assembler->Bind(&label_);
- return CONTINUE;
- }
-
- // We are being asked to make a non-generic version. Keep track of how many
- // non-generic versions we generate so as not to overdo it.
- trace_count_++;
- if (KeepRecursing(compiler) && compiler->optimize() &&
- trace_count_ < kMaxCopiesCodeGenerated) {
- return CONTINUE;
- }
-
- // If we get here code has been generated for this node too many times or
- // recursion is too deep. Time to switch to a generic version. The code for
- // generic versions above can handle deep recursion properly.
- bool was_limiting = compiler->limiting_recursion();
- compiler->set_limiting_recursion(true);
- trace->Flush(compiler, this);
- compiler->set_limiting_recursion(was_limiting);
- return DONE;
-}
-
-bool RegExpNode::KeepRecursing(RegExpCompiler* compiler) {
- return !compiler->limiting_recursion() &&
- compiler->recursion_depth() <= RegExpCompiler::kMaxRecursion;
-}
-
-void ActionNode::FillInBMInfo(Isolate* isolate, int offset, int budget,
- BoyerMooreLookahead* bm, bool not_at_start) {
- if (action_type_ == POSITIVE_SUBMATCH_SUCCESS) {
- // Anything may follow a positive submatch success, thus we need to accept
- // all characters from this position onwards.
- bm->SetRest(offset);
- } else {
- on_success()->FillInBMInfo(isolate, offset, budget - 1, bm, not_at_start);
- }
- SaveBMInfo(bm, not_at_start, offset);
-}
-
-void ActionNode::GetQuickCheckDetails(QuickCheckDetails* details,
- RegExpCompiler* compiler, int filled_in,
- bool not_at_start) {
- if (action_type_ == SET_REGISTER_FOR_LOOP) {
- on_success()->GetQuickCheckDetailsFromLoopEntry(details, compiler,
- filled_in, not_at_start);
- } else {
- on_success()->GetQuickCheckDetails(details, compiler, filled_in,
- not_at_start);
- }
-}
-
-void AssertionNode::FillInBMInfo(Isolate* isolate, int offset, int budget,
- BoyerMooreLookahead* bm, bool not_at_start) {
- // Match the behaviour of EatsAtLeast on this node.
- if (assertion_type() == AT_START && not_at_start) return;
- on_success()->FillInBMInfo(isolate, offset, budget - 1, bm, not_at_start);
- SaveBMInfo(bm, not_at_start, offset);
-}
-
-void NegativeLookaroundChoiceNode::GetQuickCheckDetails(
- QuickCheckDetails* details, RegExpCompiler* compiler, int filled_in,
- bool not_at_start) {
- RegExpNode* node = continue_node();
- return node->GetQuickCheckDetails(details, compiler, filled_in, not_at_start);
-}
-
-// Takes the left-most 1-bit and smears it out, setting all bits to its right.
-static inline uint32_t SmearBitsRight(uint32_t v) {
- v |= v >> 1;
- v |= v >> 2;
- v |= v >> 4;
- v |= v >> 8;
- v |= v >> 16;
- return v;
-}
-
-bool QuickCheckDetails::Rationalize(bool asc) {
- bool found_useful_op = false;
- uint32_t char_mask;
- if (asc) {
- char_mask = String::kMaxOneByteCharCode;
- } else {
- char_mask = String::kMaxUtf16CodeUnit;
- }
- mask_ = 0;
- value_ = 0;
- int char_shift = 0;
- for (int i = 0; i < characters_; i++) {
- Position* pos = &positions_[i];
- if ((pos->mask & String::kMaxOneByteCharCode) != 0) {
- found_useful_op = true;
- }
- mask_ |= (pos->mask & char_mask) << char_shift;
- value_ |= (pos->value & char_mask) << char_shift;
- char_shift += asc ? 8 : 16;
- }
- return found_useful_op;
-}
-
-int RegExpNode::EatsAtLeast(bool not_at_start) {
- return not_at_start ? eats_at_least_.eats_at_least_from_not_start
- : eats_at_least_.eats_at_least_from_possibly_start;
-}
-
-EatsAtLeastInfo RegExpNode::EatsAtLeastFromLoopEntry() {
- // SET_REGISTER_FOR_LOOP is only used to initialize loop counters, and it
- // implies that the following node must be a LoopChoiceNode. If we need to
- // set registers to constant values for other reasons, we could introduce a
- // new action type SET_REGISTER that doesn't imply anything about its
- // successor.
- UNREACHABLE();
-}
-
-void RegExpNode::GetQuickCheckDetailsFromLoopEntry(QuickCheckDetails* details,
- RegExpCompiler* compiler,
- int characters_filled_in,
- bool not_at_start) {
- // See comment in RegExpNode::EatsAtLeastFromLoopEntry.
- UNREACHABLE();
-}
-
-EatsAtLeastInfo LoopChoiceNode::EatsAtLeastFromLoopEntry() {
- DCHECK_EQ(alternatives_->length(), 2); // There's just loop and continue.
-
- if (read_backward()) {
- // Can't do anything special for a backward loop, so return the basic values
- // that we got during analysis.
- return *eats_at_least_info();
- }
-
- // Figure out how much the loop body itself eats, not including anything in
- // the continuation case. In general, the nodes in the loop body should report
- // that they eat at least the number eaten by the continuation node, since any
- // successful match in the loop body must also include the continuation node.
- // However, in some cases involving positive lookaround, the loop body under-
- // reports its appetite, so use saturated math here to avoid negative numbers.
- uint8_t loop_body_from_not_start = base::saturated_cast<uint8_t>(
- loop_node_->EatsAtLeast(true) - continue_node_->EatsAtLeast(true));
- uint8_t loop_body_from_possibly_start = base::saturated_cast<uint8_t>(
- loop_node_->EatsAtLeast(false) - continue_node_->EatsAtLeast(true));
-
- // Limit the number of loop iterations to avoid overflow in subsequent steps.
- int loop_iterations = base::saturated_cast<uint8_t>(min_loop_iterations());
-
- EatsAtLeastInfo result;
- result.eats_at_least_from_not_start =
- base::saturated_cast<uint8_t>(loop_iterations * loop_body_from_not_start +
- continue_node_->EatsAtLeast(true));
- if (loop_iterations > 0 && loop_body_from_possibly_start > 0) {
- // First loop iteration eats at least one, so all subsequent iterations
- // and the after-loop chunk are guaranteed to not be at the start.
- result.eats_at_least_from_possibly_start = base::saturated_cast<uint8_t>(
- loop_body_from_possibly_start +
- (loop_iterations - 1) * loop_body_from_not_start +
- continue_node_->EatsAtLeast(true));
- } else {
- // Loop body might eat nothing, so only continue node contributes.
- result.eats_at_least_from_possibly_start =
- continue_node_->EatsAtLeast(false);
- }
- return result;
-}
-
-bool RegExpNode::EmitQuickCheck(RegExpCompiler* compiler,
- Trace* bounds_check_trace, Trace* trace,
- bool preload_has_checked_bounds,
- Label* on_possible_success,
- QuickCheckDetails* details,
- bool fall_through_on_failure,
- ChoiceNode* predecessor) {
- DCHECK_NOT_NULL(predecessor);
- if (details->characters() == 0) return false;
- GetQuickCheckDetails(details, compiler, 0,
- trace->at_start() == Trace::FALSE_VALUE);
- if (details->cannot_match()) return false;
- if (!details->Rationalize(compiler->one_byte())) return false;
- DCHECK(details->characters() == 1 ||
- compiler->macro_assembler()->CanReadUnaligned());
- uint32_t mask = details->mask();
- uint32_t value = details->value();
-
- RegExpMacroAssembler* assembler = compiler->macro_assembler();
-
- if (trace->characters_preloaded() != details->characters()) {
- DCHECK(trace->cp_offset() == bounds_check_trace->cp_offset());
- // The bounds check is performed using the minimum number of characters
- // any choice would eat, so if the bounds check fails, then none of the
- // choices can succeed, so we can just immediately backtrack, rather
- // than go to the next choice. The number of characters preloaded may be
- // less than the number used for the bounds check.
- int eats_at_least = predecessor->EatsAtLeast(
- bounds_check_trace->at_start() == Trace::FALSE_VALUE);
- DCHECK_GE(eats_at_least, details->characters());
- assembler->LoadCurrentCharacter(
- trace->cp_offset(), bounds_check_trace->backtrack(),
- !preload_has_checked_bounds, details->characters(), eats_at_least);
- }
-
- bool need_mask = true;
-
- if (details->characters() == 1) {
- // If number of characters preloaded is 1 then we used a byte or 16 bit
- // load so the value is already masked down.
- uint32_t char_mask;
- if (compiler->one_byte()) {
- char_mask = String::kMaxOneByteCharCode;
- } else {
- char_mask = String::kMaxUtf16CodeUnit;
- }
- if ((mask & char_mask) == char_mask) need_mask = false;
- mask &= char_mask;
- } else {
- // For 2-character preloads in one-byte mode or 1-character preloads in
- // two-byte mode we also use a 16 bit load with zero extend.
- static const uint32_t kTwoByteMask = 0xFFFF;
- static const uint32_t kFourByteMask = 0xFFFFFFFF;
- if (details->characters() == 2 && compiler->one_byte()) {
- if ((mask & kTwoByteMask) == kTwoByteMask) need_mask = false;
- } else if (details->characters() == 1 && !compiler->one_byte()) {
- if ((mask & kTwoByteMask) == kTwoByteMask) need_mask = false;
- } else {
- if (mask == kFourByteMask) need_mask = false;
- }
- }
-
- if (fall_through_on_failure) {
- if (need_mask) {
- assembler->CheckCharacterAfterAnd(value, mask, on_possible_success);
- } else {
- assembler->CheckCharacter(value, on_possible_success);
- }
- } else {
- if (need_mask) {
- assembler->CheckNotCharacterAfterAnd(value, mask, trace->backtrack());
- } else {
- assembler->CheckNotCharacter(value, trace->backtrack());
- }
- }
- return true;
-}
-
-// Here is the meat of GetQuickCheckDetails (see also the comment on the
-// super-class in the .h file).
-//
-// We iterate along the text object, building up for each character a
-// mask and value that can be used to test for a quick failure to match.
-// The masks and values for the positions will be combined into a single
-// machine word for the current character width in order to be used in
-// generating a quick check.
-void TextNode::GetQuickCheckDetails(QuickCheckDetails* details,
- RegExpCompiler* compiler,
- int characters_filled_in,
- bool not_at_start) {
- // Do not collect any quick check details if the text node reads backward,
- // since it reads in the opposite direction than we use for quick checks.
- if (read_backward()) return;
- Isolate* isolate = compiler->macro_assembler()->isolate();
- DCHECK(characters_filled_in < details->characters());
- int characters = details->characters();
- int char_mask;
- if (compiler->one_byte()) {
- char_mask = String::kMaxOneByteCharCode;
- } else {
- char_mask = String::kMaxUtf16CodeUnit;
- }
- for (int k = 0; k < elements()->length(); k++) {
- TextElement elm = elements()->at(k);
- if (elm.text_type() == TextElement::ATOM) {
- Vector<const uc16> quarks = elm.atom()->data();
- for (int i = 0; i < characters && i < quarks.length(); i++) {
- QuickCheckDetails::Position* pos =
- details->positions(characters_filled_in);
- uc16 c = quarks[i];
- if (elm.atom()->ignore_case()) {
- unibrow::uchar chars[4];
- int length = GetCaseIndependentLetters(
- isolate, c, compiler->one_byte(), chars, 4);
- if (length == 0) {
- // This can happen because all case variants are non-Latin1, but we
- // know the input is Latin1.
- details->set_cannot_match();
- pos->determines_perfectly = false;
- return;
- }
- if (length == 1) {
- // This letter has no case equivalents, so it's nice and simple
- // and the mask-compare will determine definitely whether we have
- // a match at this character position.
- pos->mask = char_mask;
- pos->value = chars[0];
- pos->determines_perfectly = true;
- } else {
- uint32_t common_bits = char_mask;
- uint32_t bits = chars[0];
- for (int j = 1; j < length; j++) {
- uint32_t differing_bits = ((chars[j] & common_bits) ^ bits);
- common_bits ^= differing_bits;
- bits &= common_bits;
- }
- // If length is 2 and common bits has only one zero in it then
- // our mask and compare instruction will determine definitely
- // whether we have a match at this character position. Otherwise
- // it can only be an approximate check.
- uint32_t one_zero = (common_bits | ~char_mask);
- if (length == 2 && ((~one_zero) & ((~one_zero) - 1)) == 0) {
- pos->determines_perfectly = true;
- }
- pos->mask = common_bits;
- pos->value = bits;
- }
- } else {
- // Don't ignore case. Nice simple case where the mask-compare will
- // determine definitely whether we have a match at this character
- // position.
- if (c > char_mask) {
- details->set_cannot_match();
- pos->determines_perfectly = false;
- return;
- }
- pos->mask = char_mask;
- pos->value = c;
- pos->determines_perfectly = true;
- }
- characters_filled_in++;
- DCHECK(characters_filled_in <= details->characters());
- if (characters_filled_in == details->characters()) {
- return;
- }
- }
- } else {
- QuickCheckDetails::Position* pos =
- details->positions(characters_filled_in);
- RegExpCharacterClass* tree = elm.char_class();
- ZoneList<CharacterRange>* ranges = tree->ranges(zone());
- DCHECK(!ranges->is_empty());
- if (tree->is_negated()) {
- // A quick check uses multi-character mask and compare. There is no
- // useful way to incorporate a negative char class into this scheme
- // so we just conservatively create a mask and value that will always
- // succeed.
- pos->mask = 0;
- pos->value = 0;
- } else {
- int first_range = 0;
- while (ranges->at(first_range).from() > char_mask) {
- first_range++;
- if (first_range == ranges->length()) {
- details->set_cannot_match();
- pos->determines_perfectly = false;
- return;
- }
- }
- CharacterRange range = ranges->at(first_range);
- uc16 from = range.from();
- uc16 to = range.to();
- if (to > char_mask) {
- to = char_mask;
- }
- uint32_t differing_bits = (from ^ to);
- // A mask and compare is only perfect if the differing bits form a
- // number like 00011111 with one single block of trailing 1s.
- if ((differing_bits & (differing_bits + 1)) == 0 &&
- from + differing_bits == to) {
- pos->determines_perfectly = true;
- }
- uint32_t common_bits = ~SmearBitsRight(differing_bits);
- uint32_t bits = (from & common_bits);
- for (int i = first_range + 1; i < ranges->length(); i++) {
- CharacterRange range = ranges->at(i);
- uc16 from = range.from();
- uc16 to = range.to();
- if (from > char_mask) continue;
- if (to > char_mask) to = char_mask;
- // Here we are combining more ranges into the mask and compare
- // value. With each new range the mask becomes more sparse and
- // so the chances of a false positive rise. A character class
- // with multiple ranges is assumed never to be equivalent to a
- // mask and compare operation.
- pos->determines_perfectly = false;
- uint32_t new_common_bits = (from ^ to);
- new_common_bits = ~SmearBitsRight(new_common_bits);
- common_bits &= new_common_bits;
- bits &= new_common_bits;
- uint32_t differing_bits = (from & common_bits) ^ bits;
- common_bits ^= differing_bits;
- bits &= common_bits;
- }
- pos->mask = common_bits;
- pos->value = bits;
- }
- characters_filled_in++;
- DCHECK(characters_filled_in <= details->characters());
- if (characters_filled_in == details->characters()) {
- return;
- }
- }
- }
- DCHECK(characters_filled_in != details->characters());
- if (!details->cannot_match()) {
- on_success()->GetQuickCheckDetails(details, compiler, characters_filled_in,
- true);
- }
-}
-
-void QuickCheckDetails::Clear() {
- for (int i = 0; i < characters_; i++) {
- positions_[i].mask = 0;
- positions_[i].value = 0;
- positions_[i].determines_perfectly = false;
- }
- characters_ = 0;
-}
-
-void QuickCheckDetails::Advance(int by, bool one_byte) {
- if (by >= characters_ || by < 0) {
- DCHECK_IMPLIES(by < 0, characters_ == 0);
- Clear();
- return;
- }
- DCHECK_LE(characters_ - by, 4);
- DCHECK_LE(characters_, 4);
- for (int i = 0; i < characters_ - by; i++) {
- positions_[i] = positions_[by + i];
- }
- for (int i = characters_ - by; i < characters_; i++) {
- positions_[i].mask = 0;
- positions_[i].value = 0;
- positions_[i].determines_perfectly = false;
- }
- characters_ -= by;
- // We could change mask_ and value_ here but we would never advance unless
- // they had already been used in a check and they won't be used again because
- // it would gain us nothing. So there's no point.
-}
-
-void QuickCheckDetails::Merge(QuickCheckDetails* other, int from_index) {
- DCHECK(characters_ == other->characters_);
- if (other->cannot_match_) {
- return;
- }
- if (cannot_match_) {
- *this = *other;
- return;
- }
- for (int i = from_index; i < characters_; i++) {
- QuickCheckDetails::Position* pos = positions(i);
- QuickCheckDetails::Position* other_pos = other->positions(i);
- if (pos->mask != other_pos->mask || pos->value != other_pos->value ||
- !other_pos->determines_perfectly) {
- // Our mask-compare operation will be approximate unless we have the
- // exact same operation on both sides of the alternation.
- pos->determines_perfectly = false;
- }
- pos->mask &= other_pos->mask;
- pos->value &= pos->mask;
- other_pos->value &= pos->mask;
- uc16 differing_bits = (pos->value ^ other_pos->value);
- pos->mask &= ~differing_bits;
- pos->value &= pos->mask;
- }
-}
-
-class VisitMarker {
- public:
- explicit VisitMarker(NodeInfo* info) : info_(info) {
- DCHECK(!info->visited);
- info->visited = true;
- }
- ~VisitMarker() { info_->visited = false; }
-
- private:
- NodeInfo* info_;
-};
-
-// Temporarily sets traversed_loop_initialization_node_.
-class LoopInitializationMarker {
- public:
- explicit LoopInitializationMarker(LoopChoiceNode* node) : node_(node) {
- DCHECK(!node_->traversed_loop_initialization_node_);
- node_->traversed_loop_initialization_node_ = true;
- }
- ~LoopInitializationMarker() {
- DCHECK(node_->traversed_loop_initialization_node_);
- node_->traversed_loop_initialization_node_ = false;
- }
-
- private:
- LoopChoiceNode* node_;
- DISALLOW_COPY_AND_ASSIGN(LoopInitializationMarker);
-};
-
-// Temporarily decrements min_loop_iterations_.
-class IterationDecrementer {
- public:
- explicit IterationDecrementer(LoopChoiceNode* node) : node_(node) {
- DCHECK_GT(node_->min_loop_iterations_, 0);
- --node_->min_loop_iterations_;
- }
- ~IterationDecrementer() { ++node_->min_loop_iterations_; }
-
- private:
- LoopChoiceNode* node_;
- DISALLOW_COPY_AND_ASSIGN(IterationDecrementer);
-};
-
-RegExpNode* SeqRegExpNode::FilterOneByte(int depth) {
- if (info()->replacement_calculated) return replacement();
- if (depth < 0) return this;
- DCHECK(!info()->visited);
- VisitMarker marker(info());
- return FilterSuccessor(depth - 1);
-}
-
-RegExpNode* SeqRegExpNode::FilterSuccessor(int depth) {
- RegExpNode* next = on_success_->FilterOneByte(depth - 1);
- if (next == nullptr) return set_replacement(nullptr);
- on_success_ = next;
- return set_replacement(this);
-}
-
-// We need to check for the following characters: 0x39C 0x3BC 0x178.
-bool RangeContainsLatin1Equivalents(CharacterRange range) {
- // TODO(dcarney): this could be a lot more efficient.
- return range.Contains(0x039C) || range.Contains(0x03BC) ||
- range.Contains(0x0178);
-}
-
-static bool RangesContainLatin1Equivalents(ZoneList<CharacterRange>* ranges) {
- for (int i = 0; i < ranges->length(); i++) {
- // TODO(dcarney): this could be a lot more efficient.
- if (RangeContainsLatin1Equivalents(ranges->at(i))) return true;
- }
- return false;
-}
-
-RegExpNode* TextNode::FilterOneByte(int depth) {
- if (info()->replacement_calculated) return replacement();
- if (depth < 0) return this;
- DCHECK(!info()->visited);
- VisitMarker marker(info());
- int element_count = elements()->length();
- for (int i = 0; i < element_count; i++) {
- TextElement elm = elements()->at(i);
- if (elm.text_type() == TextElement::ATOM) {
- Vector<const uc16> quarks = elm.atom()->data();
- for (int j = 0; j < quarks.length(); j++) {
- uc16 c = quarks[j];
- if (elm.atom()->ignore_case()) {
- c = unibrow::Latin1::TryConvertToLatin1(c);
- }
- if (c > unibrow::Latin1::kMaxChar) return set_replacement(nullptr);
- // Replace quark in case we converted to Latin-1.
- uc16* writable_quarks = const_cast<uc16*>(quarks.begin());
- writable_quarks[j] = c;
- }
- } else {
- DCHECK(elm.text_type() == TextElement::CHAR_CLASS);
- RegExpCharacterClass* cc = elm.char_class();
- ZoneList<CharacterRange>* ranges = cc->ranges(zone());
- CharacterRange::Canonicalize(ranges);
- // Now they are in order so we only need to look at the first.
- int range_count = ranges->length();
- if (cc->is_negated()) {
- if (range_count != 0 && ranges->at(0).from() == 0 &&
- ranges->at(0).to() >= String::kMaxOneByteCharCode) {
- // This will be handled in a later filter.
- if (IgnoreCase(cc->flags()) && RangesContainLatin1Equivalents(ranges))
- continue;
- return set_replacement(nullptr);
- }
- } else {
- if (range_count == 0 ||
- ranges->at(0).from() > String::kMaxOneByteCharCode) {
- // This will be handled in a later filter.
- if (IgnoreCase(cc->flags()) && RangesContainLatin1Equivalents(ranges))
- continue;
- return set_replacement(nullptr);
- }
- }
- }
- }
- return FilterSuccessor(depth - 1);
-}
-
-RegExpNode* LoopChoiceNode::FilterOneByte(int depth) {
- if (info()->replacement_calculated) return replacement();
- if (depth < 0) return this;
- if (info()->visited) return this;
- {
- VisitMarker marker(info());
-
- RegExpNode* continue_replacement = continue_node_->FilterOneByte(depth - 1);
- // If we can't continue after the loop then there is no sense in doing the
- // loop.
- if (continue_replacement == nullptr) return set_replacement(nullptr);
- }
-
- return ChoiceNode::FilterOneByte(depth - 1);
-}
-
-RegExpNode* ChoiceNode::FilterOneByte(int depth) {
- if (info()->replacement_calculated) return replacement();
- if (depth < 0) return this;
- if (info()->visited) return this;
- VisitMarker marker(info());
- int choice_count = alternatives_->length();
-
- for (int i = 0; i < choice_count; i++) {
- GuardedAlternative alternative = alternatives_->at(i);
- if (alternative.guards() != nullptr &&
- alternative.guards()->length() != 0) {
- set_replacement(this);
- return this;
- }
- }
-
- int surviving = 0;
- RegExpNode* survivor = nullptr;
- for (int i = 0; i < choice_count; i++) {
- GuardedAlternative alternative = alternatives_->at(i);
- RegExpNode* replacement = alternative.node()->FilterOneByte(depth - 1);
- DCHECK(replacement != this); // No missing EMPTY_MATCH_CHECK.
- if (replacement != nullptr) {
- alternatives_->at(i).set_node(replacement);
- surviving++;
- survivor = replacement;
- }
- }
- if (surviving < 2) return set_replacement(survivor);
-
- set_replacement(this);
- if (surviving == choice_count) {
- return this;
- }
- // Only some of the nodes survived the filtering. We need to rebuild the
- // alternatives list.
- ZoneList<GuardedAlternative>* new_alternatives =
- new (zone()) ZoneList<GuardedAlternative>(surviving, zone());
- for (int i = 0; i < choice_count; i++) {
- RegExpNode* replacement =
- alternatives_->at(i).node()->FilterOneByte(depth - 1);
- if (replacement != nullptr) {
- alternatives_->at(i).set_node(replacement);
- new_alternatives->Add(alternatives_->at(i), zone());
- }
- }
- alternatives_ = new_alternatives;
- return this;
-}
-
-RegExpNode* NegativeLookaroundChoiceNode::FilterOneByte(int depth) {
- if (info()->replacement_calculated) return replacement();
- if (depth < 0) return this;
- if (info()->visited) return this;
- VisitMarker marker(info());
- // Alternative 0 is the negative lookahead, alternative 1 is what comes
- // afterwards.
- RegExpNode* node = continue_node();
- RegExpNode* replacement = node->FilterOneByte(depth - 1);
- if (replacement == nullptr) return set_replacement(nullptr);
- alternatives_->at(kContinueIndex).set_node(replacement);
-
- RegExpNode* neg_node = lookaround_node();
- RegExpNode* neg_replacement = neg_node->FilterOneByte(depth - 1);
- // If the negative lookahead is always going to fail then
- // we don't need to check it.
- if (neg_replacement == nullptr) return set_replacement(replacement);
- alternatives_->at(kLookaroundIndex).set_node(neg_replacement);
- return set_replacement(this);
-}
-
-void LoopChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details,
- RegExpCompiler* compiler,
- int characters_filled_in,
- bool not_at_start) {
- if (body_can_be_zero_length_ || info()->visited) return;
- not_at_start = not_at_start || this->not_at_start();
- DCHECK_EQ(alternatives_->length(), 2); // There's just loop and continue.
- if (traversed_loop_initialization_node_ && min_loop_iterations_ > 0 &&
- loop_node_->EatsAtLeast(not_at_start) >
- continue_node_->EatsAtLeast(true)) {
- // Loop body is guaranteed to execute at least once, and consume characters
- // when it does, meaning the only possible quick checks from this point
- // begin with the loop body. We may recursively visit this LoopChoiceNode,
- // but we temporarily decrease its minimum iteration counter so we know when
- // to check the continue case.
- IterationDecrementer next_iteration(this);
- loop_node_->GetQuickCheckDetails(details, compiler, characters_filled_in,
- not_at_start);
- } else {
- // Might not consume anything in the loop body, so treat it like a normal
- // ChoiceNode (and don't recursively visit this node again).
- VisitMarker marker(info());
- ChoiceNode::GetQuickCheckDetails(details, compiler, characters_filled_in,
- not_at_start);
- }
-}
-
-void LoopChoiceNode::GetQuickCheckDetailsFromLoopEntry(
- QuickCheckDetails* details, RegExpCompiler* compiler,
- int characters_filled_in, bool not_at_start) {
- if (traversed_loop_initialization_node_) {
- // We already entered this loop once, exited via its continuation node, and
- // followed an outer loop's back-edge to before the loop entry point. We
- // could try to reset the minimum iteration count to its starting value at
- // this point, but that seems like more trouble than it's worth. It's safe
- // to keep going with the current (possibly reduced) minimum iteration
- // count.
- GetQuickCheckDetails(details, compiler, characters_filled_in, not_at_start);
- } else {
- // We are entering a loop via its counter initialization action, meaning we
- // are guaranteed to run the loop body at least some minimum number of times
- // before running the continuation node. Set a flag so that this node knows
- // (now and any times we visit it again recursively) that it was entered
- // from the top.
- LoopInitializationMarker marker(this);
- GetQuickCheckDetails(details, compiler, characters_filled_in, not_at_start);
- }
-}
-
-void LoopChoiceNode::FillInBMInfo(Isolate* isolate, int offset, int budget,
- BoyerMooreLookahead* bm, bool not_at_start) {
- if (body_can_be_zero_length_ || budget <= 0) {
- bm->SetRest(offset);
- SaveBMInfo(bm, not_at_start, offset);
- return;
- }
- ChoiceNode::FillInBMInfo(isolate, offset, budget - 1, bm, not_at_start);
- SaveBMInfo(bm, not_at_start, offset);
-}
-
-void ChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details,
- RegExpCompiler* compiler,
- int characters_filled_in,
- bool not_at_start) {
- not_at_start = (not_at_start || not_at_start_);
- int choice_count = alternatives_->length();
- DCHECK_LT(0, choice_count);
- alternatives_->at(0).node()->GetQuickCheckDetails(
- details, compiler, characters_filled_in, not_at_start);
- for (int i = 1; i < choice_count; i++) {
- QuickCheckDetails new_details(details->characters());
- RegExpNode* node = alternatives_->at(i).node();
- node->GetQuickCheckDetails(&new_details, compiler, characters_filled_in,
- not_at_start);
- // Here we merge the quick match details of the two branches.
- details->Merge(&new_details, characters_filled_in);
- }
-}
-
-namespace {
-
-// Check for [0-9A-Z_a-z].
-void EmitWordCheck(RegExpMacroAssembler* assembler, Label* word,
- Label* non_word, bool fall_through_on_word) {
- if (assembler->CheckSpecialCharacterClass(
- fall_through_on_word ? 'w' : 'W',
- fall_through_on_word ? non_word : word)) {
- // Optimized implementation available.
- return;
- }
- assembler->CheckCharacterGT('z', non_word);
- assembler->CheckCharacterLT('0', non_word);
- assembler->CheckCharacterGT('a' - 1, word);
- assembler->CheckCharacterLT('9' + 1, word);
- assembler->CheckCharacterLT('A', non_word);
- assembler->CheckCharacterLT('Z' + 1, word);
- if (fall_through_on_word) {
- assembler->CheckNotCharacter('_', non_word);
- } else {
- assembler->CheckCharacter('_', word);
- }
-}
-
-// Emit the code to check for a ^ in multiline mode (1-character lookbehind
-// that matches newline or the start of input).
-void EmitHat(RegExpCompiler* compiler, RegExpNode* on_success, Trace* trace) {
- RegExpMacroAssembler* assembler = compiler->macro_assembler();
-
- // We will load the previous character into the current character register.
- Trace new_trace(*trace);
- new_trace.InvalidateCurrentCharacter();
-
- // A positive (> 0) cp_offset means we've already successfully matched a
- // non-empty-width part of the pattern, and thus cannot be at or before the
- // start of the subject string. We can thus skip both at-start and
- // bounds-checks when loading the one-character lookbehind.
- const bool may_be_at_or_before_subject_string_start =
- new_trace.cp_offset() <= 0;
-
- Label ok;
- if (may_be_at_or_before_subject_string_start) {
- // The start of input counts as a newline in this context, so skip to ok if
- // we are at the start.
- assembler->CheckAtStart(new_trace.cp_offset(), &ok);
- }
-
- // If we've already checked that we are not at the start of input, it's okay
- // to load the previous character without bounds checks.
- const bool can_skip_bounds_check = !may_be_at_or_before_subject_string_start;
- assembler->LoadCurrentCharacter(new_trace.cp_offset() - 1,
- new_trace.backtrack(), can_skip_bounds_check);
- if (!assembler->CheckSpecialCharacterClass('n', new_trace.backtrack())) {
- // Newline means \n, \r, 0x2028 or 0x2029.
- if (!compiler->one_byte()) {
- assembler->CheckCharacterAfterAnd(0x2028, 0xFFFE, &ok);
- }
- assembler->CheckCharacter('\n', &ok);
- assembler->CheckNotCharacter('\r', new_trace.backtrack());
- }
- assembler->Bind(&ok);
- on_success->Emit(compiler, &new_trace);
-}
-
-} // namespace
-
-// Emit the code to handle \b and \B (word-boundary or non-word-boundary).
-void AssertionNode::EmitBoundaryCheck(RegExpCompiler* compiler, Trace* trace) {
- RegExpMacroAssembler* assembler = compiler->macro_assembler();
- Isolate* isolate = assembler->isolate();
- Trace::TriBool next_is_word_character = Trace::UNKNOWN;
- bool not_at_start = (trace->at_start() == Trace::FALSE_VALUE);
- BoyerMooreLookahead* lookahead = bm_info(not_at_start);
- if (lookahead == nullptr) {
- int eats_at_least =
- Min(kMaxLookaheadForBoyerMoore, EatsAtLeast(not_at_start));
- if (eats_at_least >= 1) {
- BoyerMooreLookahead* bm =
- new (zone()) BoyerMooreLookahead(eats_at_least, compiler, zone());
- FillInBMInfo(isolate, 0, kRecursionBudget, bm, not_at_start);
- if (bm->at(0)->is_non_word()) next_is_word_character = Trace::FALSE_VALUE;
- if (bm->at(0)->is_word()) next_is_word_character = Trace::TRUE_VALUE;
- }
- } else {
- if (lookahead->at(0)->is_non_word())
- next_is_word_character = Trace::FALSE_VALUE;
- if (lookahead->at(0)->is_word()) next_is_word_character = Trace::TRUE_VALUE;
- }
- bool at_boundary = (assertion_type_ == AssertionNode::AT_BOUNDARY);
- if (next_is_word_character == Trace::UNKNOWN) {
- Label before_non_word;
- Label before_word;
- if (trace->characters_preloaded() != 1) {
- assembler->LoadCurrentCharacter(trace->cp_offset(), &before_non_word);
- }
- // Fall through on non-word.
- EmitWordCheck(assembler, &before_word, &before_non_word, false);
- // Next character is not a word character.
- assembler->Bind(&before_non_word);
- Label ok;
- BacktrackIfPrevious(compiler, trace, at_boundary ? kIsNonWord : kIsWord);
- assembler->GoTo(&ok);
-
- assembler->Bind(&before_word);
- BacktrackIfPrevious(compiler, trace, at_boundary ? kIsWord : kIsNonWord);
- assembler->Bind(&ok);
- } else if (next_is_word_character == Trace::TRUE_VALUE) {
- BacktrackIfPrevious(compiler, trace, at_boundary ? kIsWord : kIsNonWord);
- } else {
- DCHECK(next_is_word_character == Trace::FALSE_VALUE);
- BacktrackIfPrevious(compiler, trace, at_boundary ? kIsNonWord : kIsWord);
- }
-}
-
-void AssertionNode::BacktrackIfPrevious(
- RegExpCompiler* compiler, Trace* trace,
- AssertionNode::IfPrevious backtrack_if_previous) {
- RegExpMacroAssembler* assembler = compiler->macro_assembler();
- Trace new_trace(*trace);
- new_trace.InvalidateCurrentCharacter();
-
- Label fall_through;
- Label* non_word = backtrack_if_previous == kIsNonWord ? new_trace.backtrack()
- : &fall_through;
- Label* word = backtrack_if_previous == kIsNonWord ? &fall_through
- : new_trace.backtrack();
-
- // A positive (> 0) cp_offset means we've already successfully matched a
- // non-empty-width part of the pattern, and thus cannot be at or before the
- // start of the subject string. We can thus skip both at-start and
- // bounds-checks when loading the one-character lookbehind.
- const bool may_be_at_or_before_subject_string_start =
- new_trace.cp_offset() <= 0;
-
- if (may_be_at_or_before_subject_string_start) {
- // The start of input counts as a non-word character, so the question is
- // decided if we are at the start.
- assembler->CheckAtStart(new_trace.cp_offset(), non_word);
- }
-
- // If we've already checked that we are not at the start of input, it's okay
- // to load the previous character without bounds checks.
- const bool can_skip_bounds_check = !may_be_at_or_before_subject_string_start;
- assembler->LoadCurrentCharacter(new_trace.cp_offset() - 1, non_word,
- can_skip_bounds_check);
- EmitWordCheck(assembler, word, non_word, backtrack_if_previous == kIsNonWord);
-
- assembler->Bind(&fall_through);
- on_success()->Emit(compiler, &new_trace);
-}
-
-void AssertionNode::GetQuickCheckDetails(QuickCheckDetails* details,
- RegExpCompiler* compiler,
- int filled_in, bool not_at_start) {
- if (assertion_type_ == AT_START && not_at_start) {
- details->set_cannot_match();
- return;
- }
- return on_success()->GetQuickCheckDetails(details, compiler, filled_in,
- not_at_start);
-}
-
-void AssertionNode::Emit(RegExpCompiler* compiler, Trace* trace) {
- RegExpMacroAssembler* assembler = compiler->macro_assembler();
- switch (assertion_type_) {
- case AT_END: {
- Label ok;
- assembler->CheckPosition(trace->cp_offset(), &ok);
- assembler->GoTo(trace->backtrack());
- assembler->Bind(&ok);
- break;
- }
- case AT_START: {
- if (trace->at_start() == Trace::FALSE_VALUE) {
- assembler->GoTo(trace->backtrack());
- return;
- }
- if (trace->at_start() == Trace::UNKNOWN) {
- assembler->CheckNotAtStart(trace->cp_offset(), trace->backtrack());
- Trace at_start_trace = *trace;
- at_start_trace.set_at_start(Trace::TRUE_VALUE);
- on_success()->Emit(compiler, &at_start_trace);
- return;
- }
- } break;
- case AFTER_NEWLINE:
- EmitHat(compiler, on_success(), trace);
- return;
- case AT_BOUNDARY:
- case AT_NON_BOUNDARY: {
- EmitBoundaryCheck(compiler, trace);
- return;
- }
- }
- on_success()->Emit(compiler, trace);
-}
-
-static bool DeterminedAlready(QuickCheckDetails* quick_check, int offset) {
- if (quick_check == nullptr) return false;
- if (offset >= quick_check->characters()) return false;
- return quick_check->positions(offset)->determines_perfectly;
-}
-
-static void UpdateBoundsCheck(int index, int* checked_up_to) {
- if (index > *checked_up_to) {
- *checked_up_to = index;
- }
-}
-
-// We call this repeatedly to generate code for each pass over the text node.
-// The passes are in increasing order of difficulty because we hope one
-// of the first passes will fail in which case we are saved the work of the
-// later passes. for example for the case independent regexp /%[asdfghjkl]a/
-// we will check the '%' in the first pass, the case independent 'a' in the
-// second pass and the character class in the last pass.
-//
-// The passes are done from right to left, so for example to test for /bar/
-// we will first test for an 'r' with offset 2, then an 'a' with offset 1
-// and then a 'b' with offset 0. This means we can avoid the end-of-input
-// bounds check most of the time. In the example we only need to check for
-// end-of-input when loading the putative 'r'.
-//
-// A slight complication involves the fact that the first character may already
-// be fetched into a register by the previous node. In this case we want to
-// do the test for that character first. We do this in separate passes. The
-// 'preloaded' argument indicates that we are doing such a 'pass'. If such a
-// pass has been performed then subsequent passes will have true in
-// first_element_checked to indicate that that character does not need to be
-// checked again.
-//
-// In addition to all this we are passed a Trace, which can
-// contain an AlternativeGeneration object. In this AlternativeGeneration
-// object we can see details of any quick check that was already passed in
-// order to get to the code we are now generating. The quick check can involve
-// loading characters, which means we do not need to recheck the bounds
-// up to the limit the quick check already checked. In addition the quick
-// check can have involved a mask and compare operation which may simplify
-// or obviate the need for further checks at some character positions.
-void TextNode::TextEmitPass(RegExpCompiler* compiler, TextEmitPassType pass,
- bool preloaded, Trace* trace,
- bool first_element_checked, int* checked_up_to) {
- RegExpMacroAssembler* assembler = compiler->macro_assembler();
- Isolate* isolate = assembler->isolate();
- bool one_byte = compiler->one_byte();
- Label* backtrack = trace->backtrack();
- QuickCheckDetails* quick_check = trace->quick_check_performed();
- int element_count = elements()->length();
- int backward_offset = read_backward() ? -Length() : 0;
- for (int i = preloaded ? 0 : element_count - 1; i >= 0; i--) {
- TextElement elm = elements()->at(i);
- int cp_offset = trace->cp_offset() + elm.cp_offset() + backward_offset;
- if (elm.text_type() == TextElement::ATOM) {
- if (SkipPass(pass, elm.atom()->ignore_case())) continue;
- Vector<const uc16> quarks = elm.atom()->data();
- for (int j = preloaded ? 0 : quarks.length() - 1; j >= 0; j--) {
- if (first_element_checked && i == 0 && j == 0) continue;
- if (DeterminedAlready(quick_check, elm.cp_offset() + j)) continue;
- uc16 quark = quarks[j];
- if (elm.atom()->ignore_case()) {
- // Everywhere else we assume that a non-Latin-1 character cannot match
- // a Latin-1 character. Avoid the cases where this is assumption is
- // invalid by using the Latin1 equivalent instead.
- quark = unibrow::Latin1::TryConvertToLatin1(quark);
- }
- bool needs_bounds_check =
- *checked_up_to < cp_offset + j || read_backward();
- bool bounds_checked = false;
- switch (pass) {
- case NON_LATIN1_MATCH:
- DCHECK(one_byte);
- if (quark > String::kMaxOneByteCharCode) {
- assembler->GoTo(backtrack);
- return;
- }
- break;
- case NON_LETTER_CHARACTER_MATCH:
- bounds_checked =
- EmitAtomNonLetter(isolate, compiler, quark, backtrack,
- cp_offset + j, needs_bounds_check, preloaded);
- break;
- case SIMPLE_CHARACTER_MATCH:
- bounds_checked = EmitSimpleCharacter(isolate, compiler, quark,
- backtrack, cp_offset + j,
- needs_bounds_check, preloaded);
- break;
- case CASE_CHARACTER_MATCH:
- bounds_checked =
- EmitAtomLetter(isolate, compiler, quark, backtrack,
- cp_offset + j, needs_bounds_check, preloaded);
- break;
- default:
- break;
- }
- if (bounds_checked) UpdateBoundsCheck(cp_offset + j, checked_up_to);
- }
- } else {
- DCHECK_EQ(TextElement::CHAR_CLASS, elm.text_type());
- if (pass == CHARACTER_CLASS_MATCH) {
- if (first_element_checked && i == 0) continue;
- if (DeterminedAlready(quick_check, elm.cp_offset())) continue;
- RegExpCharacterClass* cc = elm.char_class();
- bool bounds_check = *checked_up_to < cp_offset || read_backward();
- EmitCharClass(assembler, cc, one_byte, backtrack, cp_offset,
- bounds_check, preloaded, zone());
- UpdateBoundsCheck(cp_offset, checked_up_to);
- }
- }
- }
-}
-
-int TextNode::Length() {
- TextElement elm = elements()->last();
- DCHECK_LE(0, elm.cp_offset());
- return elm.cp_offset() + elm.length();
-}
-
-bool TextNode::SkipPass(TextEmitPassType pass, bool ignore_case) {
- if (ignore_case) {
- return pass == SIMPLE_CHARACTER_MATCH;
- } else {
- return pass == NON_LETTER_CHARACTER_MATCH || pass == CASE_CHARACTER_MATCH;
- }
-}
-
-TextNode* TextNode::CreateForCharacterRanges(Zone* zone,
- ZoneList<CharacterRange>* ranges,
- bool read_backward,
- RegExpNode* on_success,
- JSRegExp::Flags flags) {
- DCHECK_NOT_NULL(ranges);
- ZoneList<TextElement>* elms = new (zone) ZoneList<TextElement>(1, zone);
- elms->Add(TextElement::CharClass(
- new (zone) RegExpCharacterClass(zone, ranges, flags)),
- zone);
- return new (zone) TextNode(elms, read_backward, on_success);
-}
-
-TextNode* TextNode::CreateForSurrogatePair(Zone* zone, CharacterRange lead,
- CharacterRange trail,
- bool read_backward,
- RegExpNode* on_success,
- JSRegExp::Flags flags) {
- ZoneList<CharacterRange>* lead_ranges = CharacterRange::List(zone, lead);
- ZoneList<CharacterRange>* trail_ranges = CharacterRange::List(zone, trail);
- ZoneList<TextElement>* elms = new (zone) ZoneList<TextElement>(2, zone);
- elms->Add(TextElement::CharClass(
- new (zone) RegExpCharacterClass(zone, lead_ranges, flags)),
- zone);
- elms->Add(TextElement::CharClass(
- new (zone) RegExpCharacterClass(zone, trail_ranges, flags)),
- zone);
- return new (zone) TextNode(elms, read_backward, on_success);
-}
-
-// This generates the code to match a text node. A text node can contain
-// straight character sequences (possibly to be matched in a case-independent
-// way) and character classes. For efficiency we do not do this in a single
-// pass from left to right. Instead we pass over the text node several times,
-// emitting code for some character positions every time. See the comment on
-// TextEmitPass for details.
-void TextNode::Emit(RegExpCompiler* compiler, Trace* trace) {
- LimitResult limit_result = LimitVersions(compiler, trace);
- if (limit_result == DONE) return;
- DCHECK(limit_result == CONTINUE);
-
- if (trace->cp_offset() + Length() > RegExpMacroAssembler::kMaxCPOffset) {
- compiler->SetRegExpTooBig();
- return;
- }
-
- if (compiler->one_byte()) {
- int dummy = 0;
- TextEmitPass(compiler, NON_LATIN1_MATCH, false, trace, false, &dummy);
- }
-
- bool first_elt_done = false;
- int bound_checked_to = trace->cp_offset() - 1;
- bound_checked_to += trace->bound_checked_up_to();
-
- // If a character is preloaded into the current character register then
- // check that now.
- if (trace->characters_preloaded() == 1) {
- for (int pass = kFirstRealPass; pass <= kLastPass; pass++) {
- TextEmitPass(compiler, static_cast<TextEmitPassType>(pass), true, trace,
- false, &bound_checked_to);
- }
- first_elt_done = true;
- }
-
- for (int pass = kFirstRealPass; pass <= kLastPass; pass++) {
- TextEmitPass(compiler, static_cast<TextEmitPassType>(pass), false, trace,
- first_elt_done, &bound_checked_to);
- }
-
- Trace successor_trace(*trace);
- // If we advance backward, we may end up at the start.
- successor_trace.AdvanceCurrentPositionInTrace(
- read_backward() ? -Length() : Length(), compiler);
- successor_trace.set_at_start(read_backward() ? Trace::UNKNOWN
- : Trace::FALSE_VALUE);
- RecursionCheck rc(compiler);
- on_success()->Emit(compiler, &successor_trace);
-}
-
-void Trace::InvalidateCurrentCharacter() { characters_preloaded_ = 0; }
-
-void Trace::AdvanceCurrentPositionInTrace(int by, RegExpCompiler* compiler) {
- // We don't have an instruction for shifting the current character register
- // down or for using a shifted value for anything so lets just forget that
- // we preloaded any characters into it.
- characters_preloaded_ = 0;
- // Adjust the offsets of the quick check performed information. This
- // information is used to find out what we already determined about the
- // characters by means of mask and compare.
- quick_check_performed_.Advance(by, compiler->one_byte());
- cp_offset_ += by;
- if (cp_offset_ > RegExpMacroAssembler::kMaxCPOffset) {
- compiler->SetRegExpTooBig();
- cp_offset_ = 0;
- }
- bound_checked_up_to_ = Max(0, bound_checked_up_to_ - by);
-}
-
-void TextNode::MakeCaseIndependent(Isolate* isolate, bool is_one_byte) {
- int element_count = elements()->length();
- for (int i = 0; i < element_count; i++) {
- TextElement elm = elements()->at(i);
- if (elm.text_type() == TextElement::CHAR_CLASS) {
- RegExpCharacterClass* cc = elm.char_class();
-#ifdef V8_INTL_SUPPORT
- bool case_equivalents_already_added =
- NeedsUnicodeCaseEquivalents(cc->flags());
-#else
- bool case_equivalents_already_added = false;
-#endif
- if (IgnoreCase(cc->flags()) && !case_equivalents_already_added) {
- // None of the standard character classes is different in the case
- // independent case and it slows us down if we don't know that.
- if (cc->is_standard(zone())) continue;
- ZoneList<CharacterRange>* ranges = cc->ranges(zone());
- CharacterRange::AddCaseEquivalents(isolate, zone(), ranges,
- is_one_byte);
- }
- }
- }
-}
-
-int TextNode::GreedyLoopTextLength() { return Length(); }
-
-RegExpNode* TextNode::GetSuccessorOfOmnivorousTextNode(
- RegExpCompiler* compiler) {
- if (read_backward()) return nullptr;
- if (elements()->length() != 1) return nullptr;
- TextElement elm = elements()->at(0);
- if (elm.text_type() != TextElement::CHAR_CLASS) return nullptr;
- RegExpCharacterClass* node = elm.char_class();
- ZoneList<CharacterRange>* ranges = node->ranges(zone());
- CharacterRange::Canonicalize(ranges);
- if (node->is_negated()) {
- return ranges->length() == 0 ? on_success() : nullptr;
- }
- if (ranges->length() != 1) return nullptr;
- uint32_t max_char;
- if (compiler->one_byte()) {
- max_char = String::kMaxOneByteCharCode;
- } else {
- max_char = String::kMaxUtf16CodeUnit;
- }
- return ranges->at(0).IsEverything(max_char) ? on_success() : nullptr;
-}
-
-// Finds the fixed match length of a sequence of nodes that goes from
-// this alternative and back to this choice node. If there are variable
-// length nodes or other complications in the way then return a sentinel
-// value indicating that a greedy loop cannot be constructed.
-int ChoiceNode::GreedyLoopTextLengthForAlternative(
- GuardedAlternative* alternative) {
- int length = 0;
- RegExpNode* node = alternative->node();
- // Later we will generate code for all these text nodes using recursion
- // so we have to limit the max number.
- int recursion_depth = 0;
- while (node != this) {
- if (recursion_depth++ > RegExpCompiler::kMaxRecursion) {
- return kNodeIsTooComplexForGreedyLoops;
- }
- int node_length = node->GreedyLoopTextLength();
- if (node_length == kNodeIsTooComplexForGreedyLoops) {
- return kNodeIsTooComplexForGreedyLoops;
- }
- length += node_length;
- SeqRegExpNode* seq_node = static_cast<SeqRegExpNode*>(node);
- node = seq_node->on_success();
- }
- return read_backward() ? -length : length;
-}
-
-void LoopChoiceNode::AddLoopAlternative(GuardedAlternative alt) {
- DCHECK_NULL(loop_node_);
- AddAlternative(alt);
- loop_node_ = alt.node();
-}
-
-void LoopChoiceNode::AddContinueAlternative(GuardedAlternative alt) {
- DCHECK_NULL(continue_node_);
- AddAlternative(alt);
- continue_node_ = alt.node();
-}
-
-void LoopChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
- RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
- if (trace->stop_node() == this) {
- // Back edge of greedy optimized loop node graph.
- int text_length =
- GreedyLoopTextLengthForAlternative(&(alternatives_->at(0)));
- DCHECK_NE(kNodeIsTooComplexForGreedyLoops, text_length);
- // Update the counter-based backtracking info on the stack. This is an
- // optimization for greedy loops (see below).
- DCHECK(trace->cp_offset() == text_length);
- macro_assembler->AdvanceCurrentPosition(text_length);
- macro_assembler->GoTo(trace->loop_label());
- return;
- }
- DCHECK_NULL(trace->stop_node());
- if (!trace->is_trivial()) {
- trace->Flush(compiler, this);
- return;
- }
- ChoiceNode::Emit(compiler, trace);
-}
-
-int ChoiceNode::CalculatePreloadCharacters(RegExpCompiler* compiler,
- int eats_at_least) {
- int preload_characters = Min(4, eats_at_least);
- DCHECK_LE(preload_characters, 4);
- if (compiler->macro_assembler()->CanReadUnaligned()) {
- bool one_byte = compiler->one_byte();
- if (one_byte) {
- // We can't preload 3 characters because there is no machine instruction
- // to do that. We can't just load 4 because we could be reading
- // beyond the end of the string, which could cause a memory fault.
- if (preload_characters == 3) preload_characters = 2;
- } else {
- if (preload_characters > 2) preload_characters = 2;
- }
- } else {
- if (preload_characters > 1) preload_characters = 1;
- }
- return preload_characters;
-}
-
-// This class is used when generating the alternatives in a choice node. It
-// records the way the alternative is being code generated.
-class AlternativeGeneration : public Malloced {
- public:
- AlternativeGeneration()
- : possible_success(),
- expects_preload(false),
- after(),
- quick_check_details() {}
- Label possible_success;
- bool expects_preload;
- Label after;
- QuickCheckDetails quick_check_details;
-};
-
-// Creates a list of AlternativeGenerations. If the list has a reasonable
-// size then it is on the stack, otherwise the excess is on the heap.
-class AlternativeGenerationList {
- public:
- AlternativeGenerationList(int count, Zone* zone) : alt_gens_(count, zone) {
- for (int i = 0; i < count && i < kAFew; i++) {
- alt_gens_.Add(a_few_alt_gens_ + i, zone);
- }
- for (int i = kAFew; i < count; i++) {
- alt_gens_.Add(new AlternativeGeneration(), zone);
- }
- }
- ~AlternativeGenerationList() {
- for (int i = kAFew; i < alt_gens_.length(); i++) {
- delete alt_gens_[i];
- alt_gens_[i] = nullptr;
- }
- }
-
- AlternativeGeneration* at(int i) { return alt_gens_[i]; }
-
- private:
- static const int kAFew = 10;
- ZoneList<AlternativeGeneration*> alt_gens_;
- AlternativeGeneration a_few_alt_gens_[kAFew];
-};
-
-void BoyerMoorePositionInfo::Set(int character) {
- SetInterval(Interval(character, character));
-}
-
-namespace {
-
-ContainedInLattice AddRange(ContainedInLattice containment, const int* ranges,
- int ranges_length, Interval new_range) {
- DCHECK_EQ(1, ranges_length & 1);
- DCHECK_EQ(String::kMaxCodePoint + 1, ranges[ranges_length - 1]);
- if (containment == kLatticeUnknown) return containment;
- bool inside = false;
- int last = 0;
- for (int i = 0; i < ranges_length; inside = !inside, last = ranges[i], i++) {
- // Consider the range from last to ranges[i].
- // We haven't got to the new range yet.
- if (ranges[i] <= new_range.from()) continue;
- // New range is wholly inside last-ranges[i]. Note that new_range.to() is
- // inclusive, but the values in ranges are not.
- if (last <= new_range.from() && new_range.to() < ranges[i]) {
- return Combine(containment, inside ? kLatticeIn : kLatticeOut);
- }
- return kLatticeUnknown;
- }
- return containment;
-}
-
-int BitsetFirstSetBit(BoyerMoorePositionInfo::Bitset bitset) {
- STATIC_ASSERT(BoyerMoorePositionInfo::kMapSize ==
- 2 * kInt64Size * kBitsPerByte);
-
- // Slight fiddling is needed here, since the bitset is of length 128 while
- // CountTrailingZeros requires an integral type and std::bitset can only
- // convert to unsigned long long. So we handle the most- and least-significant
- // bits separately.
-
- {
- static constexpr BoyerMoorePositionInfo::Bitset mask(~uint64_t{0});
- BoyerMoorePositionInfo::Bitset masked_bitset = bitset & mask;
- STATIC_ASSERT(kInt64Size >= sizeof(decltype(masked_bitset.to_ullong())));
- uint64_t lsb = masked_bitset.to_ullong();
- if (lsb != 0) return base::bits::CountTrailingZeros(lsb);
- }
-
- {
- BoyerMoorePositionInfo::Bitset masked_bitset = bitset >> 64;
- uint64_t msb = masked_bitset.to_ullong();
- if (msb != 0) return 64 + base::bits::CountTrailingZeros(msb);
- }
-
- return -1;
-}
-
-} // namespace
-
-void BoyerMoorePositionInfo::SetInterval(const Interval& interval) {
- w_ = AddRange(w_, kWordRanges, kWordRangeCount, interval);
-
- if (interval.size() >= kMapSize) {
- map_count_ = kMapSize;
- map_.set();
- return;
- }
-
- for (int i = interval.from(); i <= interval.to(); i++) {
- int mod_character = (i & kMask);
- if (!map_[mod_character]) {
- map_count_++;
- map_.set(mod_character);
- }
- if (map_count_ == kMapSize) return;
- }
-}
-
-void BoyerMoorePositionInfo::SetAll() {
- w_ = kLatticeUnknown;
- if (map_count_ != kMapSize) {
- map_count_ = kMapSize;
- map_.set();
- }
-}
-
-BoyerMooreLookahead::BoyerMooreLookahead(int length, RegExpCompiler* compiler,
- Zone* zone)
- : length_(length), compiler_(compiler) {
- if (compiler->one_byte()) {
- max_char_ = String::kMaxOneByteCharCode;
- } else {
- max_char_ = String::kMaxUtf16CodeUnit;
- }
- bitmaps_ = new (zone) ZoneList<BoyerMoorePositionInfo*>(length, zone);
- for (int i = 0; i < length; i++) {
- bitmaps_->Add(new (zone) BoyerMoorePositionInfo(), zone);
- }
-}
-
-// Find the longest range of lookahead that has the fewest number of different
-// characters that can occur at a given position. Since we are optimizing two
-// different parameters at once this is a tradeoff.
-bool BoyerMooreLookahead::FindWorthwhileInterval(int* from, int* to) {
- int biggest_points = 0;
- // If more than 32 characters out of 128 can occur it is unlikely that we can
- // be lucky enough to step forwards much of the time.
- const int kMaxMax = 32;
- for (int max_number_of_chars = 4; max_number_of_chars < kMaxMax;
- max_number_of_chars *= 2) {
- biggest_points =
- FindBestInterval(max_number_of_chars, biggest_points, from, to);
- }
- if (biggest_points == 0) return false;
- return true;
-}
-
-// Find the highest-points range between 0 and length_ where the character
-// information is not too vague. 'Too vague' means that there are more than
-// max_number_of_chars that can occur at this position. Calculates the number
-// of points as the product of width-of-the-range and
-// probability-of-finding-one-of-the-characters, where the probability is
-// calculated using the frequency distribution of the sample subject string.
-int BoyerMooreLookahead::FindBestInterval(int max_number_of_chars,
- int old_biggest_points, int* from,
- int* to) {
- int biggest_points = old_biggest_points;
- static const int kSize = RegExpMacroAssembler::kTableSize;
- for (int i = 0; i < length_;) {
- while (i < length_ && Count(i) > max_number_of_chars) i++;
- if (i == length_) break;
- int remembered_from = i;
-
- BoyerMoorePositionInfo::Bitset union_bitset;
- for (; i < length_ && Count(i) <= max_number_of_chars; i++) {
- union_bitset |= bitmaps_->at(i)->raw_bitset();
- }
-
- int frequency = 0;
-
- // Iterate only over set bits.
- int j;
- while ((j = BitsetFirstSetBit(union_bitset)) != -1) {
- DCHECK(union_bitset[j]); // Sanity check.
- // Add 1 to the frequency to give a small per-character boost for
- // the cases where our sampling is not good enough and many
- // characters have a frequency of zero. This means the frequency
- // can theoretically be up to 2*kSize though we treat it mostly as
- // a fraction of kSize.
- frequency += compiler_->frequency_collator()->Frequency(j) + 1;
- union_bitset.reset(j);
- }
-
- // We use the probability of skipping times the distance we are skipping to
- // judge the effectiveness of this. Actually we have a cut-off: By
- // dividing by 2 we switch off the skipping if the probability of skipping
- // is less than 50%. This is because the multibyte mask-and-compare
- // skipping in quickcheck is more likely to do well on this case.
- bool in_quickcheck_range =
- ((i - remembered_from < 4) ||
- (compiler_->one_byte() ? remembered_from <= 4 : remembered_from <= 2));
- // Called 'probability' but it is only a rough estimate and can actually
- // be outside the 0-kSize range.
- int probability = (in_quickcheck_range ? kSize / 2 : kSize) - frequency;
- int points = (i - remembered_from) * probability;
- if (points > biggest_points) {
- *from = remembered_from;
- *to = i - 1;
- biggest_points = points;
- }
- }
- return biggest_points;
-}
-
-// Take all the characters that will not prevent a successful match if they
-// occur in the subject string in the range between min_lookahead and
-// max_lookahead (inclusive) measured from the current position. If the
-// character at max_lookahead offset is not one of these characters, then we
-// can safely skip forwards by the number of characters in the range.
-int BoyerMooreLookahead::GetSkipTable(int min_lookahead, int max_lookahead,
- Handle<ByteArray> boolean_skip_table) {
- const int kSkipArrayEntry = 0;
- const int kDontSkipArrayEntry = 1;
-
- std::memset(boolean_skip_table->GetDataStartAddress(), kSkipArrayEntry,
- boolean_skip_table->length());
-
- for (int i = max_lookahead; i >= min_lookahead; i--) {
- BoyerMoorePositionInfo::Bitset bitset = bitmaps_->at(i)->raw_bitset();
-
- // Iterate only over set bits.
- int j;
- while ((j = BitsetFirstSetBit(bitset)) != -1) {
- DCHECK(bitset[j]); // Sanity check.
- boolean_skip_table->set(j, kDontSkipArrayEntry);
- bitset.reset(j);
- }
- }
-
- const int skip = max_lookahead + 1 - min_lookahead;
- return skip;
-}
-
-// See comment above on the implementation of GetSkipTable.
-void BoyerMooreLookahead::EmitSkipInstructions(RegExpMacroAssembler* masm) {
- const int kSize = RegExpMacroAssembler::kTableSize;
-
- int min_lookahead = 0;
- int max_lookahead = 0;
-
- if (!FindWorthwhileInterval(&min_lookahead, &max_lookahead)) return;
-
- // Check if we only have a single non-empty position info, and that info
- // contains precisely one character.
- bool found_single_character = false;
- int single_character = 0;
- for (int i = max_lookahead; i >= min_lookahead; i--) {
- BoyerMoorePositionInfo* map = bitmaps_->at(i);
- if (map->map_count() == 0) continue;
-
- if (found_single_character || map->map_count() > 1) {
- found_single_character = false;
- break;
- }
-
- DCHECK(!found_single_character);
- DCHECK_EQ(map->map_count(), 1);
-
- found_single_character = true;
- single_character = BitsetFirstSetBit(map->raw_bitset());
-
- DCHECK_NE(single_character, -1);
- }
-
- int lookahead_width = max_lookahead + 1 - min_lookahead;
-
- if (found_single_character && lookahead_width == 1 && max_lookahead < 3) {
- // The mask-compare can probably handle this better.
- return;
- }
-
- if (found_single_character) {
- Label cont, again;
- masm->Bind(&again);
- masm->LoadCurrentCharacter(max_lookahead, &cont, true);
- if (max_char_ > kSize) {
- masm->CheckCharacterAfterAnd(single_character,
- RegExpMacroAssembler::kTableMask, &cont);
- } else {
- masm->CheckCharacter(single_character, &cont);
- }
- masm->AdvanceCurrentPosition(lookahead_width);
- masm->GoTo(&again);
- masm->Bind(&cont);
- return;
- }
-
- Factory* factory = masm->isolate()->factory();
- Handle<ByteArray> boolean_skip_table =
- factory->NewByteArray(kSize, AllocationType::kOld);
- int skip_distance =
- GetSkipTable(min_lookahead, max_lookahead, boolean_skip_table);
- DCHECK_NE(0, skip_distance);
-
- Label cont, again;
- masm->Bind(&again);
- masm->LoadCurrentCharacter(max_lookahead, &cont, true);
- masm->CheckBitInTable(boolean_skip_table, &cont);
- masm->AdvanceCurrentPosition(skip_distance);
- masm->GoTo(&again);
- masm->Bind(&cont);
-}
-
-/* Code generation for choice nodes.
- *
- * We generate quick checks that do a mask and compare to eliminate a
- * choice. If the quick check succeeds then it jumps to the continuation to
- * do slow checks and check subsequent nodes. If it fails (the common case)
- * it falls through to the next choice.
- *
- * Here is the desired flow graph. Nodes directly below each other imply
- * fallthrough. Alternatives 1 and 2 have quick checks. Alternative
- * 3 doesn't have a quick check so we have to call the slow check.
- * Nodes are marked Qn for quick checks and Sn for slow checks. The entire
- * regexp continuation is generated directly after the Sn node, up to the
- * next GoTo if we decide to reuse some already generated code. Some
- * nodes expect preload_characters to be preloaded into the current
- * character register. R nodes do this preloading. Vertices are marked
- * F for failures and S for success (possible success in the case of quick
- * nodes). L, V, < and > are used as arrow heads.
- *
- * ----------> R
- * |
- * V
- * Q1 -----> S1
- * | S /
- * F| /
- * | F/
- * | /
- * | R
- * | /
- * V L
- * Q2 -----> S2
- * | S /
- * F| /
- * | F/
- * | /
- * | R
- * | /
- * V L
- * S3
- * |
- * F|
- * |
- * R
- * |
- * backtrack V
- * <----------Q4
- * \ F |
- * \ |S
- * \ F V
- * \-----S4
- *
- * For greedy loops we push the current position, then generate the code that
- * eats the input specially in EmitGreedyLoop. The other choice (the
- * continuation) is generated by the normal code in EmitChoices, and steps back
- * in the input to the starting position when it fails to match. The loop code
- * looks like this (U is the unwind code that steps back in the greedy loop).
- *
- * _____
- * / \
- * V |
- * ----------> S1 |
- * /| |
- * / |S |
- * F/ \_____/
- * /
- * |<-----
- * | \
- * V |S
- * Q2 ---> U----->backtrack
- * | F /
- * S| /
- * V F /
- * S2--/
- */
-
-GreedyLoopState::GreedyLoopState(bool not_at_start) {
- counter_backtrack_trace_.set_backtrack(&label_);
- if (not_at_start) counter_backtrack_trace_.set_at_start(Trace::FALSE_VALUE);
-}
-
-void ChoiceNode::AssertGuardsMentionRegisters(Trace* trace) {
-#ifdef DEBUG
- int choice_count = alternatives_->length();
- for (int i = 0; i < choice_count - 1; i++) {
- GuardedAlternative alternative = alternatives_->at(i);
- ZoneList<Guard*>* guards = alternative.guards();
- int guard_count = (guards == nullptr) ? 0 : guards->length();
- for (int j = 0; j < guard_count; j++) {
- DCHECK(!trace->mentions_reg(guards->at(j)->reg()));
- }
- }
-#endif
-}
-
-void ChoiceNode::SetUpPreLoad(RegExpCompiler* compiler, Trace* current_trace,
- PreloadState* state) {
- if (state->eats_at_least_ == PreloadState::kEatsAtLeastNotYetInitialized) {
- // Save some time by looking at most one machine word ahead.
- state->eats_at_least_ =
- EatsAtLeast(current_trace->at_start() == Trace::FALSE_VALUE);
- }
- state->preload_characters_ =
- CalculatePreloadCharacters(compiler, state->eats_at_least_);
-
- state->preload_is_current_ =
- (current_trace->characters_preloaded() == state->preload_characters_);
- state->preload_has_checked_bounds_ = state->preload_is_current_;
-}
-
-void ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
- int choice_count = alternatives_->length();
-
- if (choice_count == 1 && alternatives_->at(0).guards() == nullptr) {
- alternatives_->at(0).node()->Emit(compiler, trace);
- return;
- }
-
- AssertGuardsMentionRegisters(trace);
-
- LimitResult limit_result = LimitVersions(compiler, trace);
- if (limit_result == DONE) return;
- DCHECK(limit_result == CONTINUE);
-
- // For loop nodes we already flushed (see LoopChoiceNode::Emit), but for
- // other choice nodes we only flush if we are out of code size budget.
- if (trace->flush_budget() == 0 && trace->actions() != nullptr) {
- trace->Flush(compiler, this);
- return;
- }
-
- RecursionCheck rc(compiler);
-
- PreloadState preload;
- preload.init();
- GreedyLoopState greedy_loop_state(not_at_start());
-
- int text_length = GreedyLoopTextLengthForAlternative(&alternatives_->at(0));
- AlternativeGenerationList alt_gens(choice_count, zone());
-
- if (choice_count > 1 && text_length != kNodeIsTooComplexForGreedyLoops) {
- trace = EmitGreedyLoop(compiler, trace, &alt_gens, &preload,
- &greedy_loop_state, text_length);
- } else {
- // TODO(erikcorry): Delete this. We don't need this label, but it makes us
- // match the traces produced pre-cleanup.
- Label second_choice;
- compiler->macro_assembler()->Bind(&second_choice);
-
- preload.eats_at_least_ = EmitOptimizedUnanchoredSearch(compiler, trace);
-
- EmitChoices(compiler, &alt_gens, 0, trace, &preload);
- }
-
- // At this point we need to generate slow checks for the alternatives where
- // the quick check was inlined. We can recognize these because the associated
- // label was bound.
- int new_flush_budget = trace->flush_budget() / choice_count;
- for (int i = 0; i < choice_count; i++) {
- AlternativeGeneration* alt_gen = alt_gens.at(i);
- Trace new_trace(*trace);
- // If there are actions to be flushed we have to limit how many times
- // they are flushed. Take the budget of the parent trace and distribute
- // it fairly amongst the children.
- if (new_trace.actions() != nullptr) {
- new_trace.set_flush_budget(new_flush_budget);
- }
- bool next_expects_preload =
- i == choice_count - 1 ? false : alt_gens.at(i + 1)->expects_preload;
- EmitOutOfLineContinuation(compiler, &new_trace, alternatives_->at(i),
- alt_gen, preload.preload_characters_,
- next_expects_preload);
- }
-}
-
-Trace* ChoiceNode::EmitGreedyLoop(RegExpCompiler* compiler, Trace* trace,
- AlternativeGenerationList* alt_gens,
- PreloadState* preload,
- GreedyLoopState* greedy_loop_state,
- int text_length) {
- RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
- // Here we have special handling for greedy loops containing only text nodes
- // and other simple nodes. These are handled by pushing the current
- // position on the stack and then incrementing the current position each
- // time around the switch. On backtrack we decrement the current position
- // and check it against the pushed value. This avoids pushing backtrack
- // information for each iteration of the loop, which could take up a lot of
- // space.
- DCHECK(trace->stop_node() == nullptr);
- macro_assembler->PushCurrentPosition();
- Label greedy_match_failed;
- Trace greedy_match_trace;
- if (not_at_start()) greedy_match_trace.set_at_start(Trace::FALSE_VALUE);
- greedy_match_trace.set_backtrack(&greedy_match_failed);
- Label loop_label;
- macro_assembler->Bind(&loop_label);
- greedy_match_trace.set_stop_node(this);
- greedy_match_trace.set_loop_label(&loop_label);
- alternatives_->at(0).node()->Emit(compiler, &greedy_match_trace);
- macro_assembler->Bind(&greedy_match_failed);
-
- Label second_choice; // For use in greedy matches.
- macro_assembler->Bind(&second_choice);
-
- Trace* new_trace = greedy_loop_state->counter_backtrack_trace();
-
- EmitChoices(compiler, alt_gens, 1, new_trace, preload);
-
- macro_assembler->Bind(greedy_loop_state->label());
- // If we have unwound to the bottom then backtrack.
- macro_assembler->CheckGreedyLoop(trace->backtrack());
- // Otherwise try the second priority at an earlier position.
- macro_assembler->AdvanceCurrentPosition(-text_length);
- macro_assembler->GoTo(&second_choice);
- return new_trace;
-}
-
-int ChoiceNode::EmitOptimizedUnanchoredSearch(RegExpCompiler* compiler,
- Trace* trace) {
- int eats_at_least = PreloadState::kEatsAtLeastNotYetInitialized;
- if (alternatives_->length() != 2) return eats_at_least;
-
- GuardedAlternative alt1 = alternatives_->at(1);
- if (alt1.guards() != nullptr && alt1.guards()->length() != 0) {
- return eats_at_least;
- }
- RegExpNode* eats_anything_node = alt1.node();
- if (eats_anything_node->GetSuccessorOfOmnivorousTextNode(compiler) != this) {
- return eats_at_least;
- }
-
- // Really we should be creating a new trace when we execute this function,
- // but there is no need, because the code it generates cannot backtrack, and
- // we always arrive here with a trivial trace (since it's the entry to a
- // loop. That also implies that there are no preloaded characters, which is
- // good, because it means we won't be violating any assumptions by
- // overwriting those characters with new load instructions.
- DCHECK(trace->is_trivial());
-
- RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
- Isolate* isolate = macro_assembler->isolate();
- // At this point we know that we are at a non-greedy loop that will eat
- // any character one at a time. Any non-anchored regexp has such a
- // loop prepended to it in order to find where it starts. We look for
- // a pattern of the form ...abc... where we can look 6 characters ahead
- // and step forwards 3 if the character is not one of abc. Abc need
- // not be atoms, they can be any reasonably limited character class or
- // small alternation.
- BoyerMooreLookahead* bm = bm_info(false);
- if (bm == nullptr) {
- eats_at_least = Min(kMaxLookaheadForBoyerMoore, EatsAtLeast(false));
- if (eats_at_least >= 1) {
- bm = new (zone()) BoyerMooreLookahead(eats_at_least, compiler, zone());
- GuardedAlternative alt0 = alternatives_->at(0);
- alt0.node()->FillInBMInfo(isolate, 0, kRecursionBudget, bm, false);
- }
- }
- if (bm != nullptr) {
- bm->EmitSkipInstructions(macro_assembler);
- }
- return eats_at_least;
-}
-
-void ChoiceNode::EmitChoices(RegExpCompiler* compiler,
- AlternativeGenerationList* alt_gens,
- int first_choice, Trace* trace,
- PreloadState* preload) {
- RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
- SetUpPreLoad(compiler, trace, preload);
-
- // For now we just call all choices one after the other. The idea ultimately
- // is to use the Dispatch table to try only the relevant ones.
- int choice_count = alternatives_->length();
-
- int new_flush_budget = trace->flush_budget() / choice_count;
-
- for (int i = first_choice; i < choice_count; i++) {
- bool is_last = i == choice_count - 1;
- bool fall_through_on_failure = !is_last;
- GuardedAlternative alternative = alternatives_->at(i);
- AlternativeGeneration* alt_gen = alt_gens->at(i);
- alt_gen->quick_check_details.set_characters(preload->preload_characters_);
- ZoneList<Guard*>* guards = alternative.guards();
- int guard_count = (guards == nullptr) ? 0 : guards->length();
- Trace new_trace(*trace);
- new_trace.set_characters_preloaded(
- preload->preload_is_current_ ? preload->preload_characters_ : 0);
- if (preload->preload_has_checked_bounds_) {
- new_trace.set_bound_checked_up_to(preload->preload_characters_);
- }
- new_trace.quick_check_performed()->Clear();
- if (not_at_start_) new_trace.set_at_start(Trace::FALSE_VALUE);
- if (!is_last) {
- new_trace.set_backtrack(&alt_gen->after);
- }
- alt_gen->expects_preload = preload->preload_is_current_;
- bool generate_full_check_inline = false;
- if (compiler->optimize() &&
- try_to_emit_quick_check_for_alternative(i == 0) &&
- alternative.node()->EmitQuickCheck(
- compiler, trace, &new_trace, preload->preload_has_checked_bounds_,
- &alt_gen->possible_success, &alt_gen->quick_check_details,
- fall_through_on_failure, this)) {
- // Quick check was generated for this choice.
- preload->preload_is_current_ = true;
- preload->preload_has_checked_bounds_ = true;
- // If we generated the quick check to fall through on possible success,
- // we now need to generate the full check inline.
- if (!fall_through_on_failure) {
- macro_assembler->Bind(&alt_gen->possible_success);
- new_trace.set_quick_check_performed(&alt_gen->quick_check_details);
- new_trace.set_characters_preloaded(preload->preload_characters_);
- new_trace.set_bound_checked_up_to(preload->preload_characters_);
- generate_full_check_inline = true;
- }
- } else if (alt_gen->quick_check_details.cannot_match()) {
- if (!fall_through_on_failure) {
- macro_assembler->GoTo(trace->backtrack());
- }
- continue;
- } else {
- // No quick check was generated. Put the full code here.
- // If this is not the first choice then there could be slow checks from
- // previous cases that go here when they fail. There's no reason to
- // insist that they preload characters since the slow check we are about
- // to generate probably can't use it.
- if (i != first_choice) {
- alt_gen->expects_preload = false;
- new_trace.InvalidateCurrentCharacter();
- }
- generate_full_check_inline = true;
- }
- if (generate_full_check_inline) {
- if (new_trace.actions() != nullptr) {
- new_trace.set_flush_budget(new_flush_budget);
- }
- for (int j = 0; j < guard_count; j++) {
- GenerateGuard(macro_assembler, guards->at(j), &new_trace);
- }
- alternative.node()->Emit(compiler, &new_trace);
- preload->preload_is_current_ = false;
- }
- macro_assembler->Bind(&alt_gen->after);
- }
-}
-
-void ChoiceNode::EmitOutOfLineContinuation(RegExpCompiler* compiler,
- Trace* trace,
- GuardedAlternative alternative,
- AlternativeGeneration* alt_gen,
- int preload_characters,
- bool next_expects_preload) {
- if (!alt_gen->possible_success.is_linked()) return;
-
- RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
- macro_assembler->Bind(&alt_gen->possible_success);
- Trace out_of_line_trace(*trace);
- out_of_line_trace.set_characters_preloaded(preload_characters);
- out_of_line_trace.set_quick_check_performed(&alt_gen->quick_check_details);
- if (not_at_start_) out_of_line_trace.set_at_start(Trace::FALSE_VALUE);
- ZoneList<Guard*>* guards = alternative.guards();
- int guard_count = (guards == nullptr) ? 0 : guards->length();
- if (next_expects_preload) {
- Label reload_current_char;
- out_of_line_trace.set_backtrack(&reload_current_char);
- for (int j = 0; j < guard_count; j++) {
- GenerateGuard(macro_assembler, guards->at(j), &out_of_line_trace);
- }
- alternative.node()->Emit(compiler, &out_of_line_trace);
- macro_assembler->Bind(&reload_current_char);
- // Reload the current character, since the next quick check expects that.
- // We don't need to check bounds here because we only get into this
- // code through a quick check which already did the checked load.
- macro_assembler->LoadCurrentCharacter(trace->cp_offset(), nullptr, false,
- preload_characters);
- macro_assembler->GoTo(&(alt_gen->after));
- } else {
- out_of_line_trace.set_backtrack(&(alt_gen->after));
- for (int j = 0; j < guard_count; j++) {
- GenerateGuard(macro_assembler, guards->at(j), &out_of_line_trace);
- }
- alternative.node()->Emit(compiler, &out_of_line_trace);
- }
-}
-
-void ActionNode::Emit(RegExpCompiler* compiler, Trace* trace) {
- RegExpMacroAssembler* assembler = compiler->macro_assembler();
- LimitResult limit_result = LimitVersions(compiler, trace);
- if (limit_result == DONE) return;
- DCHECK(limit_result == CONTINUE);
-
- RecursionCheck rc(compiler);
-
- switch (action_type_) {
- case STORE_POSITION: {
- Trace::DeferredCapture new_capture(data_.u_position_register.reg,
- data_.u_position_register.is_capture,
- trace);
- Trace new_trace = *trace;
- new_trace.add_action(&new_capture);
- on_success()->Emit(compiler, &new_trace);
- break;
- }
- case INCREMENT_REGISTER: {
- Trace::DeferredIncrementRegister new_increment(
- data_.u_increment_register.reg);
- Trace new_trace = *trace;
- new_trace.add_action(&new_increment);
- on_success()->Emit(compiler, &new_trace);
- break;
- }
- case SET_REGISTER_FOR_LOOP: {
- Trace::DeferredSetRegisterForLoop new_set(data_.u_store_register.reg,
- data_.u_store_register.value);
- Trace new_trace = *trace;
- new_trace.add_action(&new_set);
- on_success()->Emit(compiler, &new_trace);
- break;
- }
- case CLEAR_CAPTURES: {
- Trace::DeferredClearCaptures new_capture(Interval(
- data_.u_clear_captures.range_from, data_.u_clear_captures.range_to));
- Trace new_trace = *trace;
- new_trace.add_action(&new_capture);
- on_success()->Emit(compiler, &new_trace);
- break;
- }
- case BEGIN_SUBMATCH:
- if (!trace->is_trivial()) {
- trace->Flush(compiler, this);
- } else {
- assembler->WriteCurrentPositionToRegister(
- data_.u_submatch.current_position_register, 0);
- assembler->WriteStackPointerToRegister(
- data_.u_submatch.stack_pointer_register);
- on_success()->Emit(compiler, trace);
- }
- break;
- case EMPTY_MATCH_CHECK: {
- int start_pos_reg = data_.u_empty_match_check.start_register;
- int stored_pos = 0;
- int rep_reg = data_.u_empty_match_check.repetition_register;
- bool has_minimum = (rep_reg != RegExpCompiler::kNoRegister);
- bool know_dist = trace->GetStoredPosition(start_pos_reg, &stored_pos);
- if (know_dist && !has_minimum && stored_pos == trace->cp_offset()) {
- // If we know we haven't advanced and there is no minimum we
- // can just backtrack immediately.
- assembler->GoTo(trace->backtrack());
- } else if (know_dist && stored_pos < trace->cp_offset()) {
- // If we know we've advanced we can generate the continuation
- // immediately.
- on_success()->Emit(compiler, trace);
- } else if (!trace->is_trivial()) {
- trace->Flush(compiler, this);
- } else {
- Label skip_empty_check;
- // If we have a minimum number of repetitions we check the current
- // number first and skip the empty check if it's not enough.
- if (has_minimum) {
- int limit = data_.u_empty_match_check.repetition_limit;
- assembler->IfRegisterLT(rep_reg, limit, &skip_empty_check);
- }
- // If the match is empty we bail out, otherwise we fall through
- // to the on-success continuation.
- assembler->IfRegisterEqPos(data_.u_empty_match_check.start_register,
- trace->backtrack());
- assembler->Bind(&skip_empty_check);
- on_success()->Emit(compiler, trace);
- }
- break;
- }
- case POSITIVE_SUBMATCH_SUCCESS: {
- if (!trace->is_trivial()) {
- trace->Flush(compiler, this);
- return;
- }
- assembler->ReadCurrentPositionFromRegister(
- data_.u_submatch.current_position_register);
- assembler->ReadStackPointerFromRegister(
- data_.u_submatch.stack_pointer_register);
- int clear_register_count = data_.u_submatch.clear_register_count;
- if (clear_register_count == 0) {
- on_success()->Emit(compiler, trace);
- return;
- }
- int clear_registers_from = data_.u_submatch.clear_register_from;
- Label clear_registers_backtrack;
- Trace new_trace = *trace;
- new_trace.set_backtrack(&clear_registers_backtrack);
- on_success()->Emit(compiler, &new_trace);
-
- assembler->Bind(&clear_registers_backtrack);
- int clear_registers_to = clear_registers_from + clear_register_count - 1;
- assembler->ClearRegisters(clear_registers_from, clear_registers_to);
-
- DCHECK(trace->backtrack() == nullptr);
- assembler->Backtrack();
- return;
- }
- default:
- UNREACHABLE();
- }
-}
-
-void BackReferenceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
- RegExpMacroAssembler* assembler = compiler->macro_assembler();
- if (!trace->is_trivial()) {
- trace->Flush(compiler, this);
- return;
- }
-
- LimitResult limit_result = LimitVersions(compiler, trace);
- if (limit_result == DONE) return;
- DCHECK(limit_result == CONTINUE);
-
- RecursionCheck rc(compiler);
-
- DCHECK_EQ(start_reg_ + 1, end_reg_);
- if (IgnoreCase(flags_)) {
- assembler->CheckNotBackReferenceIgnoreCase(start_reg_, read_backward(),
- trace->backtrack());
- } else {
- assembler->CheckNotBackReference(start_reg_, read_backward(),
- trace->backtrack());
- }
- // We are going to advance backward, so we may end up at the start.
- if (read_backward()) trace->set_at_start(Trace::UNKNOWN);
-
- // Check that the back reference does not end inside a surrogate pair.
- if (IsUnicode(flags_) && !compiler->one_byte()) {
- assembler->CheckNotInSurrogatePair(trace->cp_offset(), trace->backtrack());
- }
- on_success()->Emit(compiler, trace);
-}
-
-void TextNode::CalculateOffsets() {
- int element_count = elements()->length();
- // Set up the offsets of the elements relative to the start. This is a fixed
- // quantity since a TextNode can only contain fixed-width things.
- int cp_offset = 0;
- for (int i = 0; i < element_count; i++) {
- TextElement& elm = elements()->at(i);
- elm.set_cp_offset(cp_offset);
- cp_offset += elm.length();
- }
-}
-
-namespace {
-
-// Assertion propagation moves information about assertions such as
-// \b to the affected nodes. For instance, in /.\b./ information must
-// be propagated to the first '.' that whatever follows needs to know
-// if it matched a word or a non-word, and to the second '.' that it
-// has to check if it succeeds a word or non-word. In this case the
-// result will be something like:
-//
-// +-------+ +------------+
-// | . | | . |
-// +-------+ ---> +------------+
-// | word? | | check word |
-// +-------+ +------------+
-class AssertionPropagator : public AllStatic {
- public:
- static void VisitText(TextNode* that) {}
-
- static void VisitAction(ActionNode* that) {
- // If the next node is interested in what it follows then this node
- // has to be interested too so it can pass the information on.
- that->info()->AddFromFollowing(that->on_success()->info());
- }
-
- static void VisitChoice(ChoiceNode* that, int i) {
- // Anything the following nodes need to know has to be known by
- // this node also, so it can pass it on.
- that->info()->AddFromFollowing(that->alternatives()->at(i).node()->info());
- }
-
- static void VisitLoopChoiceContinueNode(LoopChoiceNode* that) {
- that->info()->AddFromFollowing(that->continue_node()->info());
- }
-
- static void VisitLoopChoiceLoopNode(LoopChoiceNode* that) {
- that->info()->AddFromFollowing(that->loop_node()->info());
- }
-
- static void VisitNegativeLookaroundChoiceLookaroundNode(
- NegativeLookaroundChoiceNode* that) {
- VisitChoice(that, NegativeLookaroundChoiceNode::kLookaroundIndex);
- }
-
- static void VisitNegativeLookaroundChoiceContinueNode(
- NegativeLookaroundChoiceNode* that) {
- VisitChoice(that, NegativeLookaroundChoiceNode::kContinueIndex);
- }
-
- static void VisitBackReference(BackReferenceNode* that) {}
-
- static void VisitAssertion(AssertionNode* that) {}
-};
-
-// Propagates information about the minimum size of successful matches from
-// successor nodes to their predecessors. Note that all eats_at_least values
-// are initialized to zero before analysis.
-class EatsAtLeastPropagator : public AllStatic {
- public:
- static void VisitText(TextNode* that) {
- // The eats_at_least value is not used if reading backward.
- if (!that->read_backward()) {
- // We are not at the start after this node, and thus we can use the
- // successor's eats_at_least_from_not_start value.
- uint8_t eats_at_least = base::saturated_cast<uint8_t>(
- that->Length() + that->on_success()
- ->eats_at_least_info()
- ->eats_at_least_from_not_start);
- that->set_eats_at_least_info(EatsAtLeastInfo(eats_at_least));
- }
- }
-
- static void VisitAction(ActionNode* that) {
- // POSITIVE_SUBMATCH_SUCCESS rewinds input, so we must not consider
- // successor nodes for eats_at_least. SET_REGISTER_FOR_LOOP indicates a loop
- // entry point, which means the loop body will run at least the minimum
- // number of times before the continuation case can run. Otherwise the
- // current node eats at least as much as its successor.
- switch (that->action_type()) {
- case ActionNode::POSITIVE_SUBMATCH_SUCCESS:
- break; // Was already initialized to zero.
- case ActionNode::SET_REGISTER_FOR_LOOP:
- that->set_eats_at_least_info(
- that->on_success()->EatsAtLeastFromLoopEntry());
- break;
- default:
- that->set_eats_at_least_info(*that->on_success()->eats_at_least_info());
- break;
- }
- }
-
- static void VisitChoice(ChoiceNode* that, int i) {
- // The minimum possible match from a choice node is the minimum of its
- // successors.
- EatsAtLeastInfo eats_at_least =
- i == 0 ? EatsAtLeastInfo(UINT8_MAX) : *that->eats_at_least_info();
- eats_at_least.SetMin(
- *that->alternatives()->at(i).node()->eats_at_least_info());
- that->set_eats_at_least_info(eats_at_least);
- }
-
- static void VisitLoopChoiceContinueNode(LoopChoiceNode* that) {
- that->set_eats_at_least_info(*that->continue_node()->eats_at_least_info());
- }
-
- static void VisitLoopChoiceLoopNode(LoopChoiceNode* that) {}
-
- static void VisitNegativeLookaroundChoiceLookaroundNode(
- NegativeLookaroundChoiceNode* that) {}
-
- static void VisitNegativeLookaroundChoiceContinueNode(
- NegativeLookaroundChoiceNode* that) {
- that->set_eats_at_least_info(*that->continue_node()->eats_at_least_info());
- }
-
- static void VisitBackReference(BackReferenceNode* that) {
- if (!that->read_backward()) {
- that->set_eats_at_least_info(*that->on_success()->eats_at_least_info());
- }
- }
-
- static void VisitAssertion(AssertionNode* that) {
- EatsAtLeastInfo eats_at_least = *that->on_success()->eats_at_least_info();
- if (that->assertion_type() == AssertionNode::AT_START) {
- // If we know we are not at the start and we are asked "how many
- // characters will you match if you succeed?" then we can answer anything
- // since false implies false. So let's just set the max answer
- // (UINT8_MAX) since that won't prevent us from preloading a lot of
- // characters for the other branches in the node graph.
- eats_at_least.eats_at_least_from_not_start = UINT8_MAX;
- }
- that->set_eats_at_least_info(eats_at_least);
- }
-};
-
-} // namespace
-
-// -------------------------------------------------------------------
-// Analysis
-
-// Iterates the node graph and provides the opportunity for propagators to set
-// values that depend on successor nodes.
-template <typename... Propagators>
-class Analysis : public NodeVisitor {
- public:
- Analysis(Isolate* isolate, bool is_one_byte)
- : isolate_(isolate),
- is_one_byte_(is_one_byte),
- error_(RegExpError::kNone) {}
-
- void EnsureAnalyzed(RegExpNode* that) {
- StackLimitCheck check(isolate());
- if (check.HasOverflowed()) {
- if (FLAG_correctness_fuzzer_suppressions) {
- FATAL("Analysis: Aborting on stack overflow");
- }
- fail(RegExpError::kAnalysisStackOverflow);
- return;
- }
- if (that->info()->been_analyzed || that->info()->being_analyzed) return;
- that->info()->being_analyzed = true;
- that->Accept(this);
- that->info()->being_analyzed = false;
- that->info()->been_analyzed = true;
- }
-
- bool has_failed() { return error_ != RegExpError::kNone; }
- RegExpError error() {
- DCHECK(error_ != RegExpError::kNone);
- return error_;
- }
- void fail(RegExpError error) { error_ = error; }
-
- Isolate* isolate() const { return isolate_; }
-
- void VisitEnd(EndNode* that) override {
- // nothing to do
- }
-
-// Used to call the given static function on each propagator / variadic template
-// argument.
-#define STATIC_FOR_EACH(expr) \
- do { \
- int dummy[] = {((expr), 0)...}; \
- USE(dummy); \
- } while (false)
-
- void VisitText(TextNode* that) override {
- that->MakeCaseIndependent(isolate(), is_one_byte_);
- EnsureAnalyzed(that->on_success());
- if (has_failed()) return;
- that->CalculateOffsets();
- STATIC_FOR_EACH(Propagators::VisitText(that));
- }
-
- void VisitAction(ActionNode* that) override {
- EnsureAnalyzed(that->on_success());
- if (has_failed()) return;
- STATIC_FOR_EACH(Propagators::VisitAction(that));
- }
-
- void VisitChoice(ChoiceNode* that) override {
- for (int i = 0; i < that->alternatives()->length(); i++) {
- EnsureAnalyzed(that->alternatives()->at(i).node());
- if (has_failed()) return;
- STATIC_FOR_EACH(Propagators::VisitChoice(that, i));
- }
- }
-
- void VisitLoopChoice(LoopChoiceNode* that) override {
- DCHECK_EQ(that->alternatives()->length(), 2); // Just loop and continue.
-
- // First propagate all information from the continuation node.
- EnsureAnalyzed(that->continue_node());
- if (has_failed()) return;
- STATIC_FOR_EACH(Propagators::VisitLoopChoiceContinueNode(that));
-
- // Check the loop last since it may need the value of this node
- // to get a correct result.
- EnsureAnalyzed(that->loop_node());
- if (has_failed()) return;
- STATIC_FOR_EACH(Propagators::VisitLoopChoiceLoopNode(that));
- }
-
- void VisitNegativeLookaroundChoice(
- NegativeLookaroundChoiceNode* that) override {
- DCHECK_EQ(that->alternatives()->length(), 2); // Lookaround and continue.
-
- EnsureAnalyzed(that->lookaround_node());
- if (has_failed()) return;
- STATIC_FOR_EACH(
- Propagators::VisitNegativeLookaroundChoiceLookaroundNode(that));
-
- EnsureAnalyzed(that->continue_node());
- if (has_failed()) return;
- STATIC_FOR_EACH(
- Propagators::VisitNegativeLookaroundChoiceContinueNode(that));
- }
-
- void VisitBackReference(BackReferenceNode* that) override {
- EnsureAnalyzed(that->on_success());
- if (has_failed()) return;
- STATIC_FOR_EACH(Propagators::VisitBackReference(that));
- }
-
- void VisitAssertion(AssertionNode* that) override {
- EnsureAnalyzed(that->on_success());
- if (has_failed()) return;
- STATIC_FOR_EACH(Propagators::VisitAssertion(that));
- }
-
-#undef STATIC_FOR_EACH
-
- private:
- Isolate* isolate_;
- bool is_one_byte_;
- RegExpError error_;
-
- DISALLOW_IMPLICIT_CONSTRUCTORS(Analysis);
-};
-
-RegExpError AnalyzeRegExp(Isolate* isolate, bool is_one_byte,
- RegExpNode* node) {
- Analysis<AssertionPropagator, EatsAtLeastPropagator> analysis(isolate,
- is_one_byte);
- DCHECK_EQ(node->info()->been_analyzed, false);
- analysis.EnsureAnalyzed(node);
- DCHECK_IMPLIES(analysis.has_failed(), analysis.error() != RegExpError::kNone);
- return analysis.has_failed() ? analysis.error() : RegExpError::kNone;
-}
-
-void BackReferenceNode::FillInBMInfo(Isolate* isolate, int offset, int budget,
- BoyerMooreLookahead* bm,
- bool not_at_start) {
- // Working out the set of characters that a backreference can match is too
- // hard, so we just say that any character can match.
- bm->SetRest(offset);
- SaveBMInfo(bm, not_at_start, offset);
-}
-
-STATIC_ASSERT(BoyerMoorePositionInfo::kMapSize ==
- RegExpMacroAssembler::kTableSize);
-
-void ChoiceNode::FillInBMInfo(Isolate* isolate, int offset, int budget,
- BoyerMooreLookahead* bm, bool not_at_start) {
- ZoneList<GuardedAlternative>* alts = alternatives();
- budget = (budget - 1) / alts->length();
- for (int i = 0; i < alts->length(); i++) {
- GuardedAlternative& alt = alts->at(i);
- if (alt.guards() != nullptr && alt.guards()->length() != 0) {
- bm->SetRest(offset); // Give up trying to fill in info.
- SaveBMInfo(bm, not_at_start, offset);
- return;
- }
- alt.node()->FillInBMInfo(isolate, offset, budget, bm, not_at_start);
- }
- SaveBMInfo(bm, not_at_start, offset);
-}
-
-void TextNode::FillInBMInfo(Isolate* isolate, int initial_offset, int budget,
- BoyerMooreLookahead* bm, bool not_at_start) {
- if (initial_offset >= bm->length()) return;
- int offset = initial_offset;
- int max_char = bm->max_char();
- for (int i = 0; i < elements()->length(); i++) {
- if (offset >= bm->length()) {
- if (initial_offset == 0) set_bm_info(not_at_start, bm);
- return;
- }
- TextElement text = elements()->at(i);
- if (text.text_type() == TextElement::ATOM) {
- RegExpAtom* atom = text.atom();
- for (int j = 0; j < atom->length(); j++, offset++) {
- if (offset >= bm->length()) {
- if (initial_offset == 0) set_bm_info(not_at_start, bm);
- return;
- }
- uc16 character = atom->data()[j];
- if (IgnoreCase(atom->flags())) {
- unibrow::uchar chars[4];
- int length = GetCaseIndependentLetters(
- isolate, character, bm->max_char() == String::kMaxOneByteCharCode,
- chars, 4);
- for (int j = 0; j < length; j++) {
- bm->Set(offset, chars[j]);
- }
- } else {
- if (character <= max_char) bm->Set(offset, character);
- }
- }
- } else {
- DCHECK_EQ(TextElement::CHAR_CLASS, text.text_type());
- RegExpCharacterClass* char_class = text.char_class();
- ZoneList<CharacterRange>* ranges = char_class->ranges(zone());
- if (char_class->is_negated()) {
- bm->SetAll(offset);
- } else {
- for (int k = 0; k < ranges->length(); k++) {
- CharacterRange& range = ranges->at(k);
- if (range.from() > max_char) continue;
- int to = Min(max_char, static_cast<int>(range.to()));
- bm->SetInterval(offset, Interval(range.from(), to));
- }
- }
- offset++;
- }
- }
- if (offset >= bm->length()) {
- if (initial_offset == 0) set_bm_info(not_at_start, bm);
- return;
- }
- on_success()->FillInBMInfo(isolate, offset, budget - 1, bm,
- true); // Not at start after a text node.
- if (initial_offset == 0) set_bm_info(not_at_start, bm);
-}
-
-// static
-RegExpNode* RegExpCompiler::OptionallyStepBackToLeadSurrogate(
- RegExpCompiler* compiler, RegExpNode* on_success, JSRegExp::Flags flags) {
- DCHECK(!compiler->read_backward());
- Zone* zone = compiler->zone();
- ZoneList<CharacterRange>* lead_surrogates = CharacterRange::List(
- zone, CharacterRange::Range(kLeadSurrogateStart, kLeadSurrogateEnd));
- ZoneList<CharacterRange>* trail_surrogates = CharacterRange::List(
- zone, CharacterRange::Range(kTrailSurrogateStart, kTrailSurrogateEnd));
-
- ChoiceNode* optional_step_back = new (zone) ChoiceNode(2, zone);
-
- int stack_register = compiler->UnicodeLookaroundStackRegister();
- int position_register = compiler->UnicodeLookaroundPositionRegister();
- RegExpNode* step_back = TextNode::CreateForCharacterRanges(
- zone, lead_surrogates, true, on_success, flags);
- RegExpLookaround::Builder builder(true, step_back, stack_register,
- position_register);
- RegExpNode* match_trail = TextNode::CreateForCharacterRanges(
- zone, trail_surrogates, false, builder.on_match_success(), flags);
-
- optional_step_back->AddAlternative(
- GuardedAlternative(builder.ForMatch(match_trail)));
- optional_step_back->AddAlternative(GuardedAlternative(on_success));
-
- return optional_step_back;
-}
-
-} // namespace internal
-} // namespace v8
diff --git a/js/src/new-regexp/regexp-compiler.h b/js/src/new-regexp/regexp-compiler.h
deleted file mode 100644
index 186d5e838..000000000
--- a/js/src/new-regexp/regexp-compiler.h
+++ /dev/null
@@ -1,621 +0,0 @@
-// Copyright 2019 the V8 project authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#ifndef V8_REGEXP_REGEXP_COMPILER_H_
-#define V8_REGEXP_REGEXP_COMPILER_H_
-
-#include <bitset>
-
-#include "new-regexp/regexp-nodes.h"
-
-namespace v8 {
-namespace internal {
-
-class DynamicBitSet;
-class Isolate;
-
-namespace regexp_compiler_constants {
-
-// The '2' variant is has inclusive from and exclusive to.
-// This covers \s as defined in ECMA-262 5.1, 15.10.2.12,
-// which include WhiteSpace (7.2) or LineTerminator (7.3) values.
-constexpr uc32 kRangeEndMarker = 0x110000;
-constexpr int kSpaceRanges[] = {
- '\t', '\r' + 1, ' ', ' ' + 1, 0x00A0, 0x00A1, 0x1680,
- 0x1681, 0x2000, 0x200B, 0x2028, 0x202A, 0x202F, 0x2030,
- 0x205F, 0x2060, 0x3000, 0x3001, 0xFEFF, 0xFF00, kRangeEndMarker};
-constexpr int kSpaceRangeCount = arraysize(kSpaceRanges);
-
-constexpr int kWordRanges[] = {'0', '9' + 1, 'A', 'Z' + 1, '_',
- '_' + 1, 'a', 'z' + 1, kRangeEndMarker};
-constexpr int kWordRangeCount = arraysize(kWordRanges);
-constexpr int kDigitRanges[] = {'0', '9' + 1, kRangeEndMarker};
-constexpr int kDigitRangeCount = arraysize(kDigitRanges);
-constexpr int kSurrogateRanges[] = {kLeadSurrogateStart,
- kLeadSurrogateStart + 1, kRangeEndMarker};
-constexpr int kSurrogateRangeCount = arraysize(kSurrogateRanges);
-constexpr int kLineTerminatorRanges[] = {0x000A, 0x000B, 0x000D, 0x000E,
- 0x2028, 0x202A, kRangeEndMarker};
-constexpr int kLineTerminatorRangeCount = arraysize(kLineTerminatorRanges);
-
-// More makes code generation slower, less makes V8 benchmark score lower.
-constexpr int kMaxLookaheadForBoyerMoore = 8;
-// In a 3-character pattern you can maximally step forwards 3 characters
-// at a time, which is not always enough to pay for the extra logic.
-constexpr int kPatternTooShortForBoyerMoore = 2;
-
-} // namespace regexp_compiler_constants
-
-inline bool IgnoreCase(JSRegExp::Flags flags) {
- return (flags & JSRegExp::kIgnoreCase) != 0;
-}
-
-inline bool IsUnicode(JSRegExp::Flags flags) {
- return (flags & JSRegExp::kUnicode) != 0;
-}
-
-inline bool IsSticky(JSRegExp::Flags flags) {
- return (flags & JSRegExp::kSticky) != 0;
-}
-
-inline bool IsGlobal(JSRegExp::Flags flags) {
- return (flags & JSRegExp::kGlobal) != 0;
-}
-
-inline bool DotAll(JSRegExp::Flags flags) {
- return (flags & JSRegExp::kDotAll) != 0;
-}
-
-inline bool Multiline(JSRegExp::Flags flags) {
- return (flags & JSRegExp::kMultiline) != 0;
-}
-
-inline bool NeedsUnicodeCaseEquivalents(JSRegExp::Flags flags) {
- // Both unicode and ignore_case flags are set. We need to use ICU to find
- // the closure over case equivalents.
- return IsUnicode(flags) && IgnoreCase(flags);
-}
-
-// Details of a quick mask-compare check that can look ahead in the
-// input stream.
-class QuickCheckDetails {
- public:
- QuickCheckDetails()
- : characters_(0), mask_(0), value_(0), cannot_match_(false) {}
- explicit QuickCheckDetails(int characters)
- : characters_(characters), mask_(0), value_(0), cannot_match_(false) {}
- bool Rationalize(bool one_byte);
- // Merge in the information from another branch of an alternation.
- void Merge(QuickCheckDetails* other, int from_index);
- // Advance the current position by some amount.
- void Advance(int by, bool one_byte);
- void Clear();
- bool cannot_match() { return cannot_match_; }
- void set_cannot_match() { cannot_match_ = true; }
- struct Position {
- Position() : mask(0), value(0), determines_perfectly(false) {}
- uc16 mask;
- uc16 value;
- bool determines_perfectly;
- };
- int characters() { return characters_; }
- void set_characters(int characters) { characters_ = characters; }
- Position* positions(int index) {
- DCHECK_LE(0, index);
- DCHECK_GT(characters_, index);
- return positions_ + index;
- }
- uint32_t mask() { return mask_; }
- uint32_t value() { return value_; }
-
- private:
- // How many characters do we have quick check information from. This is
- // the same for all branches of a choice node.
- int characters_;
- Position positions_[4];
- // These values are the condensate of the above array after Rationalize().
- uint32_t mask_;
- uint32_t value_;
- // If set to true, there is no way this quick check can match at all.
- // E.g., if it requires to be at the start of the input, and isn't.
- bool cannot_match_;
-};
-
-// Improve the speed that we scan for an initial point where a non-anchored
-// regexp can match by using a Boyer-Moore-like table. This is done by
-// identifying non-greedy non-capturing loops in the nodes that eat any
-// character one at a time. For example in the middle of the regexp
-// /foo[\s\S]*?bar/ we find such a loop. There is also such a loop implicitly
-// inserted at the start of any non-anchored regexp.
-//
-// When we have found such a loop we look ahead in the nodes to find the set of
-// characters that can come at given distances. For example for the regexp
-// /.?foo/ we know that there are at least 3 characters ahead of us, and the
-// sets of characters that can occur are [any, [f, o], [o]]. We find a range in
-// the lookahead info where the set of characters is reasonably constrained. In
-// our example this is from index 1 to 2 (0 is not constrained). We can now
-// look 3 characters ahead and if we don't find one of [f, o] (the union of
-// [f, o] and [o]) then we can skip forwards by the range size (in this case 2).
-//
-// For Unicode input strings we do the same, but modulo 128.
-//
-// We also look at the first string fed to the regexp and use that to get a hint
-// of the character frequencies in the inputs. This affects the assessment of
-// whether the set of characters is 'reasonably constrained'.
-//
-// We also have another lookahead mechanism (called quick check in the code),
-// which uses a wide load of multiple characters followed by a mask and compare
-// to determine whether a match is possible at this point.
-enum ContainedInLattice {
- kNotYet = 0,
- kLatticeIn = 1,
- kLatticeOut = 2,
- kLatticeUnknown = 3 // Can also mean both in and out.
-};
-
-inline ContainedInLattice Combine(ContainedInLattice a, ContainedInLattice b) {
- return static_cast<ContainedInLattice>(a | b);
-}
-
-class BoyerMoorePositionInfo : public ZoneObject {
- public:
- bool at(int i) const { return map_[i]; }
-
- static constexpr int kMapSize = 128;
- static constexpr int kMask = kMapSize - 1;
-
- int map_count() const { return map_count_; }
-
- void Set(int character);
- void SetInterval(const Interval& interval);
- void SetAll();
-
- bool is_non_word() { return w_ == kLatticeOut; }
- bool is_word() { return w_ == kLatticeIn; }
-
- using Bitset = std::bitset<kMapSize>;
- Bitset raw_bitset() const { return map_; }
-
- private:
- Bitset map_;
- int map_count_ = 0; // Number of set bits in the map.
- ContainedInLattice w_ = kNotYet; // The \w character class.
-};
-
-class BoyerMooreLookahead : public ZoneObject {
- public:
- BoyerMooreLookahead(int length, RegExpCompiler* compiler, Zone* zone);
-
- int length() { return length_; }
- int max_char() { return max_char_; }
- RegExpCompiler* compiler() { return compiler_; }
-
- int Count(int map_number) { return bitmaps_->at(map_number)->map_count(); }
-
- BoyerMoorePositionInfo* at(int i) { return bitmaps_->at(i); }
-
- void Set(int map_number, int character) {
- if (character > max_char_) return;
- BoyerMoorePositionInfo* info = bitmaps_->at(map_number);
- info->Set(character);
- }
-
- void SetInterval(int map_number, const Interval& interval) {
- if (interval.from() > max_char_) return;
- BoyerMoorePositionInfo* info = bitmaps_->at(map_number);
- if (interval.to() > max_char_) {
- info->SetInterval(Interval(interval.from(), max_char_));
- } else {
- info->SetInterval(interval);
- }
- }
-
- void SetAll(int map_number) { bitmaps_->at(map_number)->SetAll(); }
-
- void SetRest(int from_map) {
- for (int i = from_map; i < length_; i++) SetAll(i);
- }
- void EmitSkipInstructions(RegExpMacroAssembler* masm);
-
- private:
- // This is the value obtained by EatsAtLeast. If we do not have at least this
- // many characters left in the sample string then the match is bound to fail.
- // Therefore it is OK to read a character this far ahead of the current match
- // point.
- int length_;
- RegExpCompiler* compiler_;
- // 0xff for Latin1, 0xffff for UTF-16.
- int max_char_;
- ZoneList<BoyerMoorePositionInfo*>* bitmaps_;
-
- int GetSkipTable(int min_lookahead, int max_lookahead,
- Handle<ByteArray> boolean_skip_table);
- bool FindWorthwhileInterval(int* from, int* to);
- int FindBestInterval(int max_number_of_chars, int old_biggest_points,
- int* from, int* to);
-};
-
-// There are many ways to generate code for a node. This class encapsulates
-// the current way we should be generating. In other words it encapsulates
-// the current state of the code generator. The effect of this is that we
-// generate code for paths that the matcher can take through the regular
-// expression. A given node in the regexp can be code-generated several times
-// as it can be part of several traces. For example for the regexp:
-// /foo(bar|ip)baz/ the code to match baz will be generated twice, once as part
-// of the foo-bar-baz trace and once as part of the foo-ip-baz trace. The code
-// to match foo is generated only once (the traces have a common prefix). The
-// code to store the capture is deferred and generated (twice) after the places
-// where baz has been matched.
-class Trace {
- public:
- // A value for a property that is either known to be true, know to be false,
- // or not known.
- enum TriBool { UNKNOWN = -1, FALSE_VALUE = 0, TRUE_VALUE = 1 };
-
- class DeferredAction {
- public:
- DeferredAction(ActionNode::ActionType action_type, int reg)
- : action_type_(action_type), reg_(reg), next_(nullptr) {}
- DeferredAction* next() { return next_; }
- bool Mentions(int reg);
- int reg() { return reg_; }
- ActionNode::ActionType action_type() { return action_type_; }
-
- private:
- ActionNode::ActionType action_type_;
- int reg_;
- DeferredAction* next_;
- friend class Trace;
- };
-
- class DeferredCapture : public DeferredAction {
- public:
- DeferredCapture(int reg, bool is_capture, Trace* trace)
- : DeferredAction(ActionNode::STORE_POSITION, reg),
- cp_offset_(trace->cp_offset()),
- is_capture_(is_capture) {}
- int cp_offset() { return cp_offset_; }
- bool is_capture() { return is_capture_; }
-
- private:
- int cp_offset_;
- bool is_capture_;
- void set_cp_offset(int cp_offset) { cp_offset_ = cp_offset; }
- };
-
- class DeferredSetRegisterForLoop : public DeferredAction {
- public:
- DeferredSetRegisterForLoop(int reg, int value)
- : DeferredAction(ActionNode::SET_REGISTER_FOR_LOOP, reg),
- value_(value) {}
- int value() { return value_; }
-
- private:
- int value_;
- };
-
- class DeferredClearCaptures : public DeferredAction {
- public:
- explicit DeferredClearCaptures(Interval range)
- : DeferredAction(ActionNode::CLEAR_CAPTURES, -1), range_(range) {}
- Interval range() { return range_; }
-
- private:
- Interval range_;
- };
-
- class DeferredIncrementRegister : public DeferredAction {
- public:
- explicit DeferredIncrementRegister(int reg)
- : DeferredAction(ActionNode::INCREMENT_REGISTER, reg) {}
- };
-
- Trace()
- : cp_offset_(0),
- actions_(nullptr),
- backtrack_(nullptr),
- stop_node_(nullptr),
- loop_label_(nullptr),
- characters_preloaded_(0),
- bound_checked_up_to_(0),
- flush_budget_(100),
- at_start_(UNKNOWN) {}
-
- // End the trace. This involves flushing the deferred actions in the trace
- // and pushing a backtrack location onto the backtrack stack. Once this is
- // done we can start a new trace or go to one that has already been
- // generated.
- void Flush(RegExpCompiler* compiler, RegExpNode* successor);
- int cp_offset() { return cp_offset_; }
- DeferredAction* actions() { return actions_; }
- // A trivial trace is one that has no deferred actions or other state that
- // affects the assumptions used when generating code. There is no recorded
- // backtrack location in a trivial trace, so with a trivial trace we will
- // generate code that, on a failure to match, gets the backtrack location
- // from the backtrack stack rather than using a direct jump instruction. We
- // always start code generation with a trivial trace and non-trivial traces
- // are created as we emit code for nodes or add to the list of deferred
- // actions in the trace. The location of the code generated for a node using
- // a trivial trace is recorded in a label in the node so that gotos can be
- // generated to that code.
- bool is_trivial() {
- return backtrack_ == nullptr && actions_ == nullptr && cp_offset_ == 0 &&
- characters_preloaded_ == 0 && bound_checked_up_to_ == 0 &&
- quick_check_performed_.characters() == 0 && at_start_ == UNKNOWN;
- }
- TriBool at_start() { return at_start_; }
- void set_at_start(TriBool at_start) { at_start_ = at_start; }
- Label* backtrack() { return backtrack_; }
- Label* loop_label() { return loop_label_; }
- RegExpNode* stop_node() { return stop_node_; }
- int characters_preloaded() { return characters_preloaded_; }
- int bound_checked_up_to() { return bound_checked_up_to_; }
- int flush_budget() { return flush_budget_; }
- QuickCheckDetails* quick_check_performed() { return &quick_check_performed_; }
- bool mentions_reg(int reg);
- // Returns true if a deferred position store exists to the specified
- // register and stores the offset in the out-parameter. Otherwise
- // returns false.
- bool GetStoredPosition(int reg, int* cp_offset);
- // These set methods and AdvanceCurrentPositionInTrace should be used only on
- // new traces - the intention is that traces are immutable after creation.
- void add_action(DeferredAction* new_action) {
- DCHECK(new_action->next_ == nullptr);
- new_action->next_ = actions_;
- actions_ = new_action;
- }
- void set_backtrack(Label* backtrack) { backtrack_ = backtrack; }
- void set_stop_node(RegExpNode* node) { stop_node_ = node; }
- void set_loop_label(Label* label) { loop_label_ = label; }
- void set_characters_preloaded(int count) { characters_preloaded_ = count; }
- void set_bound_checked_up_to(int to) { bound_checked_up_to_ = to; }
- void set_flush_budget(int to) { flush_budget_ = to; }
- void set_quick_check_performed(QuickCheckDetails* d) {
- quick_check_performed_ = *d;
- }
- void InvalidateCurrentCharacter();
- void AdvanceCurrentPositionInTrace(int by, RegExpCompiler* compiler);
-
- private:
- int FindAffectedRegisters(DynamicBitSet* affected_registers, Zone* zone);
- void PerformDeferredActions(RegExpMacroAssembler* macro, int max_register,
- const DynamicBitSet& affected_registers,
- DynamicBitSet* registers_to_pop,
- DynamicBitSet* registers_to_clear, Zone* zone);
- void RestoreAffectedRegisters(RegExpMacroAssembler* macro, int max_register,
- const DynamicBitSet& registers_to_pop,
- const DynamicBitSet& registers_to_clear);
- int cp_offset_;
- DeferredAction* actions_;
- Label* backtrack_;
- RegExpNode* stop_node_;
- Label* loop_label_;
- int characters_preloaded_;
- int bound_checked_up_to_;
- QuickCheckDetails quick_check_performed_;
- int flush_budget_;
- TriBool at_start_;
-};
-
-class GreedyLoopState {
- public:
- explicit GreedyLoopState(bool not_at_start);
-
- Label* label() { return &label_; }
- Trace* counter_backtrack_trace() { return &counter_backtrack_trace_; }
-
- private:
- Label label_;
- Trace counter_backtrack_trace_;
-};
-
-struct PreloadState {
- static const int kEatsAtLeastNotYetInitialized = -1;
- bool preload_is_current_;
- bool preload_has_checked_bounds_;
- int preload_characters_;
- int eats_at_least_;
- void init() { eats_at_least_ = kEatsAtLeastNotYetInitialized; }
-};
-
-// Analysis performs assertion propagation and computes eats_at_least_ values.
-// See the comments on AssertionPropagator and EatsAtLeastPropagator for more
-// details.
-RegExpError AnalyzeRegExp(Isolate* isolate, bool is_one_byte, RegExpNode* node);
-
-class FrequencyCollator {
- public:
- FrequencyCollator() : total_samples_(0) {
- for (int i = 0; i < RegExpMacroAssembler::kTableSize; i++) {
- frequencies_[i] = CharacterFrequency(i);
- }
- }
-
- void CountCharacter(int character) {
- int index = (character & RegExpMacroAssembler::kTableMask);
- frequencies_[index].Increment();
- total_samples_++;
- }
-
- // Does not measure in percent, but rather per-128 (the table size from the
- // regexp macro assembler).
- int Frequency(int in_character) {
- DCHECK((in_character & RegExpMacroAssembler::kTableMask) == in_character);
- if (total_samples_ < 1) return 1; // Division by zero.
- int freq_in_per128 =
- (frequencies_[in_character].counter() * 128) / total_samples_;
- return freq_in_per128;
- }
-
- private:
- class CharacterFrequency {
- public:
- CharacterFrequency() : counter_(0), character_(-1) {}
- explicit CharacterFrequency(int character)
- : counter_(0), character_(character) {}
-
- void Increment() { counter_++; }
- int counter() { return counter_; }
- int character() { return character_; }
-
- private:
- int counter_;
- int character_;
- };
-
- private:
- CharacterFrequency frequencies_[RegExpMacroAssembler::kTableSize];
- int total_samples_;
-};
-
-class RegExpCompiler {
- public:
- RegExpCompiler(Isolate* isolate, Zone* zone, int capture_count,
- bool is_one_byte);
-
- int AllocateRegister() {
- if (next_register_ >= RegExpMacroAssembler::kMaxRegister) {
- reg_exp_too_big_ = true;
- return next_register_;
- }
- return next_register_++;
- }
-
- // Lookarounds to match lone surrogates for unicode character class matches
- // are never nested. We can therefore reuse registers.
- int UnicodeLookaroundStackRegister() {
- if (unicode_lookaround_stack_register_ == kNoRegister) {
- unicode_lookaround_stack_register_ = AllocateRegister();
- }
- return unicode_lookaround_stack_register_;
- }
-
- int UnicodeLookaroundPositionRegister() {
- if (unicode_lookaround_position_register_ == kNoRegister) {
- unicode_lookaround_position_register_ = AllocateRegister();
- }
- return unicode_lookaround_position_register_;
- }
-
- struct CompilationResult final {
- explicit CompilationResult(RegExpError err) : error(err) {}
- CompilationResult(Object code, int registers)
- : code(code), num_registers(registers) {}
-
- static CompilationResult RegExpTooBig() {
- return CompilationResult(RegExpError::kTooLarge);
- }
-
- bool Succeeded() const { return error == RegExpError::kNone; }
-
- const RegExpError error = RegExpError::kNone;
- Object code;
- int num_registers = 0;
- };
-
- CompilationResult Assemble(Isolate* isolate, RegExpMacroAssembler* assembler,
- RegExpNode* start, int capture_count,
- Handle<String> pattern);
-
- // If the regexp matching starts within a surrogate pair, step back to the
- // lead surrogate and start matching from there.
- static RegExpNode* OptionallyStepBackToLeadSurrogate(RegExpCompiler* compiler,
- RegExpNode* on_success,
- JSRegExp::Flags flags);
-
- inline void AddWork(RegExpNode* node) {
- if (!node->on_work_list() && !node->label()->is_bound()) {
- node->set_on_work_list(true);
- work_list_->push_back(node);
- }
- }
-
- static const int kImplementationOffset = 0;
- static const int kNumberOfRegistersOffset = 0;
- static const int kCodeOffset = 1;
-
- RegExpMacroAssembler* macro_assembler() { return macro_assembler_; }
- EndNode* accept() { return accept_; }
-
- static const int kMaxRecursion = 100;
- inline int recursion_depth() { return recursion_depth_; }
- inline void IncrementRecursionDepth() { recursion_depth_++; }
- inline void DecrementRecursionDepth() { recursion_depth_--; }
-
- void SetRegExpTooBig() { reg_exp_too_big_ = true; }
-
- inline bool one_byte() { return one_byte_; }
- inline bool optimize() { return optimize_; }
- inline void set_optimize(bool value) { optimize_ = value; }
- inline bool limiting_recursion() { return limiting_recursion_; }
- inline void set_limiting_recursion(bool value) {
- limiting_recursion_ = value;
- }
- bool read_backward() { return read_backward_; }
- void set_read_backward(bool value) { read_backward_ = value; }
- FrequencyCollator* frequency_collator() { return &frequency_collator_; }
-
- int current_expansion_factor() { return current_expansion_factor_; }
- void set_current_expansion_factor(int value) {
- current_expansion_factor_ = value;
- }
-
- Isolate* isolate() const { return isolate_; }
- Zone* zone() const { return zone_; }
-
- static const int kNoRegister = -1;
-
- private:
- EndNode* accept_;
- int next_register_;
- int unicode_lookaround_stack_register_;
- int unicode_lookaround_position_register_;
- ZoneVector<RegExpNode*>* work_list_;
- int recursion_depth_;
- RegExpMacroAssembler* macro_assembler_;
- bool one_byte_;
- bool reg_exp_too_big_;
- bool limiting_recursion_;
- bool optimize_;
- bool read_backward_;
- int current_expansion_factor_;
- FrequencyCollator frequency_collator_;
- Isolate* isolate_;
- Zone* zone_;
-};
-
-// Categorizes character ranges into BMP, non-BMP, lead, and trail surrogates.
-class UnicodeRangeSplitter {
- public:
- V8_EXPORT_PRIVATE UnicodeRangeSplitter(ZoneList<CharacterRange>* base);
-
- static constexpr int kInitialSize = 8;
- using CharacterRangeVector = base::SmallVector<CharacterRange, kInitialSize>;
-
- const CharacterRangeVector* bmp() const { return &bmp_; }
- const CharacterRangeVector* lead_surrogates() const {
- return &lead_surrogates_;
- }
- const CharacterRangeVector* trail_surrogates() const {
- return &trail_surrogates_;
- }
- const CharacterRangeVector* non_bmp() const { return &non_bmp_; }
-
- private:
- void AddRange(CharacterRange range);
-
- CharacterRangeVector bmp_;
- CharacterRangeVector lead_surrogates_;
- CharacterRangeVector trail_surrogates_;
- CharacterRangeVector non_bmp_;
-};
-
-// We need to check for the following characters: 0x39C 0x3BC 0x178.
-// TODO(jgruber): Move to CharacterRange.
-bool RangeContainsLatin1Equivalents(CharacterRange range);
-
-} // namespace internal
-} // namespace v8
-
-#endif // V8_REGEXP_REGEXP_COMPILER_H_
diff --git a/js/src/new-regexp/regexp-dotprinter.cc b/js/src/new-regexp/regexp-dotprinter.cc
deleted file mode 100644
index 2bf393c32..000000000
--- a/js/src/new-regexp/regexp-dotprinter.cc
+++ /dev/null
@@ -1,252 +0,0 @@
-// Copyright 2019 the V8 project authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#include "new-regexp/regexp-dotprinter.h"
-
-#include "new-regexp/regexp-compiler.h"
-
-namespace v8 {
-namespace internal {
-
-// -------------------------------------------------------------------
-// Dot/dotty output
-
-#ifdef DEBUG
-
-class DotPrinterImpl : public NodeVisitor {
- public:
- explicit DotPrinterImpl(std::ostream& os) : os_(os) {}
- void PrintNode(const char* label, RegExpNode* node);
- void Visit(RegExpNode* node);
- void PrintAttributes(RegExpNode* from);
- void PrintOnFailure(RegExpNode* from, RegExpNode* to);
-#define DECLARE_VISIT(Type) virtual void Visit##Type(Type##Node* that);
- FOR_EACH_NODE_TYPE(DECLARE_VISIT)
-#undef DECLARE_VISIT
- private:
- std::ostream& os_;
-};
-
-void DotPrinterImpl::PrintNode(const char* label, RegExpNode* node) {
- os_ << "digraph G {\n graph [label=\"";
- for (int i = 0; label[i]; i++) {
- switch (label[i]) {
- case '\\':
- os_ << "\\\\";
- break;
- case '"':
- os_ << "\"";
- break;
- default:
- os_ << label[i];
- break;
- }
- }
- os_ << "\"];\n";
- Visit(node);
- os_ << "}" << std::endl;
-}
-
-void DotPrinterImpl::Visit(RegExpNode* node) {
- if (node->info()->visited) return;
- node->info()->visited = true;
- node->Accept(this);
-}
-
-void DotPrinterImpl::PrintOnFailure(RegExpNode* from, RegExpNode* on_failure) {
- os_ << " n" << from << " -> n" << on_failure << " [style=dotted];\n";
- Visit(on_failure);
-}
-
-class AttributePrinter {
- public:
- explicit AttributePrinter(std::ostream& os) // NOLINT
- : os_(os), first_(true) {}
- void PrintSeparator() {
- if (first_) {
- first_ = false;
- } else {
- os_ << "|";
- }
- }
- void PrintBit(const char* name, bool value) {
- if (!value) return;
- PrintSeparator();
- os_ << "{" << name << "}";
- }
- void PrintPositive(const char* name, int value) {
- if (value < 0) return;
- PrintSeparator();
- os_ << "{" << name << "|" << value << "}";
- }
-
- private:
- std::ostream& os_;
- bool first_;
-};
-
-void DotPrinterImpl::PrintAttributes(RegExpNode* that) {
- os_ << " a" << that << " [shape=Mrecord, color=grey, fontcolor=grey, "
- << "margin=0.1, fontsize=10, label=\"{";
- AttributePrinter printer(os_);
- NodeInfo* info = that->info();
- printer.PrintBit("NI", info->follows_newline_interest);
- printer.PrintBit("WI", info->follows_word_interest);
- printer.PrintBit("SI", info->follows_start_interest);
- Label* label = that->label();
- if (label->is_bound()) printer.PrintPositive("@", label->pos());
- os_ << "}\"];\n"
- << " a" << that << " -> n" << that
- << " [style=dashed, color=grey, arrowhead=none];\n";
-}
-
-void DotPrinterImpl::VisitChoice(ChoiceNode* that) {
- os_ << " n" << that << " [shape=Mrecord, label=\"?\"];\n";
- for (int i = 0; i < that->alternatives()->length(); i++) {
- GuardedAlternative alt = that->alternatives()->at(i);
- os_ << " n" << that << " -> n" << alt.node();
- }
- for (int i = 0; i < that->alternatives()->length(); i++) {
- GuardedAlternative alt = that->alternatives()->at(i);
- alt.node()->Accept(this);
- }
-}
-
-void DotPrinterImpl::VisitLoopChoice(LoopChoiceNode* that) {
- VisitChoice(that);
-}
-
-void DotPrinterImpl::VisitNegativeLookaroundChoice(
- NegativeLookaroundChoiceNode* that) {
- VisitChoice(that);
-}
-
-void DotPrinterImpl::VisitText(TextNode* that) {
- Zone* zone = that->zone();
- os_ << " n" << that << " [label=\"";
- for (int i = 0; i < that->elements()->length(); i++) {
- if (i > 0) os_ << " ";
- TextElement elm = that->elements()->at(i);
- switch (elm.text_type()) {
- case TextElement::ATOM: {
- Vector<const uc16> data = elm.atom()->data();
- for (int i = 0; i < data.length(); i++) {
- os_ << static_cast<char>(data[i]);
- }
- break;
- }
- case TextElement::CHAR_CLASS: {
- RegExpCharacterClass* node = elm.char_class();
- os_ << "[";
- if (node->is_negated()) os_ << "^";
- for (int j = 0; j < node->ranges(zone)->length(); j++) {
- CharacterRange range = node->ranges(zone)->at(j);
- os_ << AsUC16(range.from()) << "-" << AsUC16(range.to());
- }
- os_ << "]";
- break;
- }
- default:
- UNREACHABLE();
- }
- }
- os_ << "\", shape=box, peripheries=2];\n";
- PrintAttributes(that);
- os_ << " n" << that << " -> n" << that->on_success() << ";\n";
- Visit(that->on_success());
-}
-
-void DotPrinterImpl::VisitBackReference(BackReferenceNode* that) {
- os_ << " n" << that << " [label=\"$" << that->start_register() << "..$"
- << that->end_register() << "\", shape=doubleoctagon];\n";
- PrintAttributes(that);
- os_ << " n" << that << " -> n" << that->on_success() << ";\n";
- Visit(that->on_success());
-}
-
-void DotPrinterImpl::VisitEnd(EndNode* that) {
- os_ << " n" << that << " [style=bold, shape=point];\n";
- PrintAttributes(that);
-}
-
-void DotPrinterImpl::VisitAssertion(AssertionNode* that) {
- os_ << " n" << that << " [";
- switch (that->assertion_type()) {
- case AssertionNode::AT_END:
- os_ << "label=\"$\", shape=septagon";
- break;
- case AssertionNode::AT_START:
- os_ << "label=\"^\", shape=septagon";
- break;
- case AssertionNode::AT_BOUNDARY:
- os_ << "label=\"\\b\", shape=septagon";
- break;
- case AssertionNode::AT_NON_BOUNDARY:
- os_ << "label=\"\\B\", shape=septagon";
- break;
- case AssertionNode::AFTER_NEWLINE:
- os_ << "label=\"(?<=\\n)\", shape=septagon";
- break;
- }
- os_ << "];\n";
- PrintAttributes(that);
- RegExpNode* successor = that->on_success();
- os_ << " n" << that << " -> n" << successor << ";\n";
- Visit(successor);
-}
-
-void DotPrinterImpl::VisitAction(ActionNode* that) {
- os_ << " n" << that << " [";
- switch (that->action_type_) {
- case ActionNode::SET_REGISTER_FOR_LOOP:
- os_ << "label=\"$" << that->data_.u_store_register.reg
- << ":=" << that->data_.u_store_register.value << "\", shape=octagon";
- break;
- case ActionNode::INCREMENT_REGISTER:
- os_ << "label=\"$" << that->data_.u_increment_register.reg
- << "++\", shape=octagon";
- break;
- case ActionNode::STORE_POSITION:
- os_ << "label=\"$" << that->data_.u_position_register.reg
- << ":=$pos\", shape=octagon";
- break;
- case ActionNode::BEGIN_SUBMATCH:
- os_ << "label=\"$" << that->data_.u_submatch.current_position_register
- << ":=$pos,begin\", shape=septagon";
- break;
- case ActionNode::POSITIVE_SUBMATCH_SUCCESS:
- os_ << "label=\"escape\", shape=septagon";
- break;
- case ActionNode::EMPTY_MATCH_CHECK:
- os_ << "label=\"$" << that->data_.u_empty_match_check.start_register
- << "=$pos?,$" << that->data_.u_empty_match_check.repetition_register
- << "<" << that->data_.u_empty_match_check.repetition_limit
- << "?\", shape=septagon";
- break;
- case ActionNode::CLEAR_CAPTURES: {
- os_ << "label=\"clear $" << that->data_.u_clear_captures.range_from
- << " to $" << that->data_.u_clear_captures.range_to
- << "\", shape=septagon";
- break;
- }
- }
- os_ << "];\n";
- PrintAttributes(that);
- RegExpNode* successor = that->on_success();
- os_ << " n" << that << " -> n" << successor << ";\n";
- Visit(successor);
-}
-
-#endif // DEBUG
-
-void DotPrinter::DotPrint(const char* label, RegExpNode* node) {
-#ifdef DEBUG
- StdoutStream os;
- DotPrinterImpl printer(os);
- printer.PrintNode(label, node);
-#endif // DEBUG
-}
-
-} // namespace internal
-} // namespace v8
diff --git a/js/src/new-regexp/regexp-dotprinter.h b/js/src/new-regexp/regexp-dotprinter.h
deleted file mode 100644
index 0bd03e77f..000000000
--- a/js/src/new-regexp/regexp-dotprinter.h
+++ /dev/null
@@ -1,23 +0,0 @@
-// Copyright 2019 the V8 project authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#ifndef V8_REGEXP_REGEXP_DOTPRINTER_H_
-#define V8_REGEXP_REGEXP_DOTPRINTER_H_
-
-#include "new-regexp/regexp-shim.h"
-
-namespace v8 {
-namespace internal {
-
-class RegExpNode;
-
-class DotPrinter final : public AllStatic {
- public:
- static void DotPrint(const char* label, RegExpNode* node);
-};
-
-} // namespace internal
-} // namespace v8
-
-#endif // V8_REGEXP_REGEXP_DOTPRINTER_H_
diff --git a/js/src/new-regexp/regexp-error.cc b/js/src/new-regexp/regexp-error.cc
deleted file mode 100644
index 9db98d4b8..000000000
--- a/js/src/new-regexp/regexp-error.cc
+++ /dev/null
@@ -1,22 +0,0 @@
-// Copyright 2020 the V8 project authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#include "new-regexp/regexp-error.h"
-
-namespace v8 {
-namespace internal {
-
-const char* kRegExpErrorStrings[] = {
-#define TEMPLATE(NAME, STRING) STRING,
- REGEXP_ERROR_MESSAGES(TEMPLATE)
-#undef TEMPLATE
-};
-
-const char* RegExpErrorString(RegExpError error) {
- DCHECK_LT(error, RegExpError::NumErrors);
- return kRegExpErrorStrings[static_cast<int>(error)];
-}
-
-} // namespace internal
-} // namespace v8
diff --git a/js/src/new-regexp/regexp-error.h b/js/src/new-regexp/regexp-error.h
deleted file mode 100644
index 4b495f07d..000000000
--- a/js/src/new-regexp/regexp-error.h
+++ /dev/null
@@ -1,57 +0,0 @@
-// Copyright 2020 the V8 project authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#ifndef V8_REGEXP_REGEXP_ERROR_H_
-#define V8_REGEXP_REGEXP_ERROR_H_
-
-#include "regexp-shim.h"
-
-namespace v8 {
-namespace internal {
-
-#define REGEXP_ERROR_MESSAGES(T) \
- T(None, "") \
- T(StackOverflow, "Maximum call stack size exceeded") \
- T(AnalysisStackOverflow, "Stack overflow") \
- T(TooLarge, "Regular expression too large") \
- T(UnterminatedGroup, "Unterminated group") \
- T(UnmatchedParen, "Unmatched ')'") \
- T(EscapeAtEndOfPattern, "\\ at end of pattern") \
- T(InvalidPropertyName, "Invalid property name") \
- T(InvalidEscape, "Invalid escape") \
- T(InvalidDecimalEscape, "Invalid decimal escape") \
- T(InvalidUnicodeEscape, "Invalid Unicode escape") \
- T(NothingToRepeat, "Nothing to repeat") \
- T(LoneQuantifierBrackets, "Lone quantifier brackets") \
- T(RangeOutOfOrder, "numbers out of order in {} quantifier") \
- T(IncompleteQuantifier, "Incomplete quantifier") \
- T(InvalidQuantifier, "Invalid quantifier") \
- T(InvalidGroup, "Invalid group") \
- T(MultipleFlagDashes, "Multiple dashes in flag group") \
- T(RepeatedFlag, "Repeated flag in flag group") \
- T(InvalidFlagGroup, "Invalid flag group") \
- T(TooManyCaptures, "Too many captures") \
- T(InvalidCaptureGroupName, "Invalid capture group name") \
- T(DuplicateCaptureGroupName, "Duplicate capture group name") \
- T(InvalidNamedReference, "Invalid named reference") \
- T(InvalidNamedCaptureReference, "Invalid named capture referenced") \
- T(InvalidClassEscape, "Invalid class escape") \
- T(InvalidClassPropertyName, "Invalid property name in character class") \
- T(InvalidCharacterClass, "Invalid character class") \
- T(UnterminatedCharacterClass, "Unterminated character class") \
- T(OutOfOrderCharacterClass, "Range out of order in character class")
-
-enum class RegExpError : uint32_t {
-#define TEMPLATE(NAME, STRING) k##NAME,
- REGEXP_ERROR_MESSAGES(TEMPLATE)
-#undef TEMPLATE
- NumErrors
-};
-
-V8_EXPORT_PRIVATE const char* RegExpErrorString(RegExpError error);
-
-} // namespace internal
-} // namespace v8
-
-#endif // V8_REGEXP_REGEXP_ERROR_H_
diff --git a/js/src/new-regexp/regexp-interpreter.cc b/js/src/new-regexp/regexp-interpreter.cc
deleted file mode 100644
index 7a492fca2..000000000
--- a/js/src/new-regexp/regexp-interpreter.cc
+++ /dev/null
@@ -1,1039 +0,0 @@
-// Copyright 2011 the V8 project authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-// A simple interpreter for the Irregexp byte code.
-
-#include "new-regexp/regexp-interpreter.h"
-
-#include "new-regexp/regexp-bytecodes.h"
-#include "new-regexp/regexp-macro-assembler.h"
-#include "new-regexp/regexp-stack.h" // For kMaximumStackSize.
-#include "new-regexp/regexp.h"
-
-#ifdef V8_INTL_SUPPORT
-#include "unicode/uchar.h"
-#endif // V8_INTL_SUPPORT
-
-// Use token threaded dispatch iff the compiler supports computed gotos and the
-// build argument v8_enable_regexp_interpreter_threaded_dispatch was set.
-#if V8_HAS_COMPUTED_GOTO && \
- defined(V8_ENABLE_REGEXP_INTERPRETER_THREADED_DISPATCH)
-#define V8_USE_COMPUTED_GOTO 1
-#endif // V8_HAS_COMPUTED_GOTO
-
-
-namespace v8 {
-namespace internal {
-
-namespace {
-
-bool BackRefMatchesNoCase(Isolate* isolate, int from, int current, int len,
- Vector<const uc16> subject) {
- Address offset_a =
- reinterpret_cast<Address>(const_cast<uc16*>(&subject.at(from)));
- Address offset_b =
- reinterpret_cast<Address>(const_cast<uc16*>(&subject.at(current)));
- size_t length = len * kUC16Size;
- return RegExpMacroAssembler::CaseInsensitiveCompareUC16(offset_a, offset_b,
- length, isolate) == 1;
-}
-
-bool BackRefMatchesNoCase(Isolate* isolate, int from, int current, int len,
- Vector<const uint8_t> subject) {
- // For Latin1 characters the unicode flag makes no difference.
- for (int i = 0; i < len; i++) {
- unsigned int old_char = subject[from++];
- unsigned int new_char = subject[current++];
- if (old_char == new_char) continue;
- // Convert both characters to lower case.
- old_char |= 0x20;
- new_char |= 0x20;
- if (old_char != new_char) return false;
- // Not letters in the ASCII range and Latin-1 range.
- if (!(old_char - 'a' <= 'z' - 'a') &&
- !(old_char - 224 <= 254 - 224 && old_char != 247)) {
- return false;
- }
- }
- return true;
-}
-
-#ifdef DEBUG
-void MaybeTraceInterpreter(const byte* code_base, const byte* pc,
- int stack_depth, int current_position,
- uint32_t current_char, int bytecode_length,
- const char* bytecode_name) {
- if (FLAG_trace_regexp_bytecodes) {
- const bool printable = std::isprint(current_char);
- const char* format =
- printable
- ? "pc = %02x, sp = %d, curpos = %d, curchar = %08x (%c), bc = "
- : "pc = %02x, sp = %d, curpos = %d, curchar = %08x .%c., bc = ";
- PrintF(format, pc - code_base, stack_depth, current_position, current_char,
- printable ? current_char : '.');
-
- RegExpBytecodeDisassembleSingle(code_base, pc);
- }
-}
-#endif // DEBUG
-
-int32_t Load32Aligned(const byte* pc) {
- DCHECK_EQ(0, reinterpret_cast<intptr_t>(pc) & 3);
- return *reinterpret_cast<const int32_t*>(pc);
-}
-
-// TODO(jgruber): Rename to Load16AlignedUnsigned.
-uint32_t Load16Aligned(const byte* pc) {
- DCHECK_EQ(0, reinterpret_cast<intptr_t>(pc) & 1);
- return *reinterpret_cast<const uint16_t*>(pc);
-}
-
-int32_t Load16AlignedSigned(const byte* pc) {
- DCHECK_EQ(0, reinterpret_cast<intptr_t>(pc) & 1);
- return *reinterpret_cast<const int16_t*>(pc);
-}
-
-// A simple abstraction over the backtracking stack used by the interpreter.
-//
-// Despite the name 'backtracking' stack, it's actually used as a generic stack
-// that stores both program counters (= offsets into the bytecode) and generic
-// integer values.
-class BacktrackStack {
- public:
- BacktrackStack() = default;
-
- V8_WARN_UNUSED_RESULT bool push(int v) {
- data_.emplace_back(v);
- return (static_cast<int>(data_.size()) <= kMaxSize);
- }
- int peek() const {
- DCHECK(!data_.empty());
- return data_.back();
- }
- int pop() {
- int v = peek();
- data_.pop_back();
- return v;
- }
-
- // The 'sp' is the index of the first empty element in the stack.
- int sp() const { return static_cast<int>(data_.size()); }
- void set_sp(int new_sp) {
- DCHECK_LE(new_sp, sp());
- data_.resize_no_init(new_sp);
- }
-
- private:
- // Semi-arbitrary. Should be large enough for common cases to remain in the
- // static stack-allocated backing store, but small enough not to waste space.
- static constexpr int kStaticCapacity = 64;
-
- using ValueT = int;
- base::SmallVector<ValueT, kStaticCapacity> data_;
-
- static constexpr int kMaxSize =
- RegExpStack::kMaximumStackSize / sizeof(ValueT);
-
- DISALLOW_COPY_AND_ASSIGN(BacktrackStack);
-};
-
-IrregexpInterpreter::Result ThrowStackOverflow(Isolate* isolate,
- RegExp::CallOrigin call_origin) {
- CHECK(call_origin == RegExp::CallOrigin::kFromRuntime);
- // We abort interpreter execution after the stack overflow is thrown, and thus
- // allow allocation here despite the outer DisallowHeapAllocationScope.
- AllowHeapAllocation yes_gc;
- isolate->StackOverflow();
- return IrregexpInterpreter::EXCEPTION;
-}
-
-// Only throws if called from the runtime, otherwise just returns the EXCEPTION
-// status code.
-IrregexpInterpreter::Result MaybeThrowStackOverflow(
- Isolate* isolate, RegExp::CallOrigin call_origin) {
- if (call_origin == RegExp::CallOrigin::kFromRuntime) {
- return ThrowStackOverflow(isolate, call_origin);
- } else {
- return IrregexpInterpreter::EXCEPTION;
- }
-}
-
-template <typename Char>
-void UpdateCodeAndSubjectReferences(
- Isolate* isolate, Handle<ByteArray> code_array,
- Handle<String> subject_string, ByteArray* code_array_out,
- const byte** code_base_out, const byte** pc_out, String* subject_string_out,
- Vector<const Char>* subject_string_vector_out) {
- DisallowHeapAllocation no_gc;
-
- if (*code_base_out != code_array->GetDataStartAddress()) {
- *code_array_out = *code_array;
- const intptr_t pc_offset = *pc_out - *code_base_out;
- DCHECK_GT(pc_offset, 0);
- *code_base_out = code_array->GetDataStartAddress();
- *pc_out = *code_base_out + pc_offset;
- }
-
- DCHECK(subject_string->IsFlat());
- *subject_string_out = *subject_string;
- *subject_string_vector_out = subject_string->GetCharVector<Char>(no_gc);
-}
-
-// Runs all pending interrupts and updates unhandlified object references if
-// necessary.
-template <typename Char>
-IrregexpInterpreter::Result HandleInterrupts(
- Isolate* isolate, RegExp::CallOrigin call_origin, ByteArray* code_array_out,
- String* subject_string_out, const byte** code_base_out,
- Vector<const Char>* subject_string_vector_out, const byte** pc_out) {
- DisallowHeapAllocation no_gc;
-
- StackLimitCheck check(isolate);
- bool js_has_overflowed = check.JsHasOverflowed();
-
- if (call_origin == RegExp::CallOrigin::kFromJs) {
- // Direct calls from JavaScript can be interrupted in two ways:
- // 1. A real stack overflow, in which case we let the caller throw the
- // exception.
- // 2. The stack guard was used to interrupt execution for another purpose,
- // forcing the call through the runtime system.
- if (js_has_overflowed) {
- return IrregexpInterpreter::EXCEPTION;
- } else if (check.InterruptRequested()) {
- return IrregexpInterpreter::RETRY;
- }
- } else {
- DCHECK(call_origin == RegExp::CallOrigin::kFromRuntime);
- // Prepare for possible GC.
- HandleScope handles(isolate);
- Handle<ByteArray> code_handle(*code_array_out, isolate);
- Handle<String> subject_handle(*subject_string_out, isolate);
-
- if (js_has_overflowed) {
- return ThrowStackOverflow(isolate, call_origin);
- } else if (check.InterruptRequested()) {
- const bool was_one_byte =
- String::IsOneByteRepresentationUnderneath(*subject_string_out);
- Object result;
- {
- AllowHeapAllocation yes_gc;
- result = isolate->stack_guard()->HandleInterrupts();
- }
- if (result.IsException(isolate)) {
- return IrregexpInterpreter::EXCEPTION;
- }
-
- // If we changed between a LATIN1 and a UC16 string, we need to restart
- // regexp matching with the appropriate template instantiation of
- // RawMatch.
- if (String::IsOneByteRepresentationUnderneath(*subject_handle) !=
- was_one_byte) {
- return IrregexpInterpreter::RETRY;
- }
-
- UpdateCodeAndSubjectReferences(
- isolate, code_handle, subject_handle, code_array_out, code_base_out,
- pc_out, subject_string_out, subject_string_vector_out);
- }
- }
-
- return IrregexpInterpreter::SUCCESS;
-}
-
-bool CheckBitInTable(const uint32_t current_char, const byte* const table) {
- int mask = RegExpMacroAssembler::kTableMask;
- int b = table[(current_char & mask) >> kBitsPerByteLog2];
- int bit = (current_char & (kBitsPerByte - 1));
- return (b & (1 << bit)) != 0;
-}
-
-// If computed gotos are supported by the compiler, we can get addresses to
-// labels directly in C/C++. Every bytecode handler has its own label and we
-// store the addresses in a dispatch table indexed by bytecode. To execute the
-// next handler we simply jump (goto) directly to its address.
-#if V8_USE_COMPUTED_GOTO
-#define BC_LABEL(name) BC_##name:
-#define DECODE() \
- do { \
- next_insn = Load32Aligned(next_pc); \
- next_handler_addr = dispatch_table[next_insn & BYTECODE_MASK]; \
- } while (false)
-#define DISPATCH() \
- pc = next_pc; \
- insn = next_insn; \
- goto* next_handler_addr
-// Without computed goto support, we fall back to a simple switch-based
-// dispatch (A large switch statement inside a loop with a case for every
-// bytecode).
-#else // V8_USE_COMPUTED_GOTO
-#define BC_LABEL(name) case BC_##name:
-#define DECODE() next_insn = Load32Aligned(next_pc)
-#define DISPATCH() \
- pc = next_pc; \
- insn = next_insn; \
- goto switch_dispatch_continuation
-#endif // V8_USE_COMPUTED_GOTO
-
-// ADVANCE/SET_PC_FROM_OFFSET are separated from DISPATCH, because ideally some
-// instructions can be executed between ADVANCE/SET_PC_FROM_OFFSET and DISPATCH.
-// We want those two macros as far apart as possible, because the goto in
-// DISPATCH is dependent on a memory load in ADVANCE/SET_PC_FROM_OFFSET. If we
-// don't hit the cache and have to fetch the next handler address from physical
-// memory, instructions between ADVANCE/SET_PC_FROM_OFFSET and DISPATCH can
-// potentially be executed unconditionally, reducing memory stall.
-#define ADVANCE(name) \
- next_pc = pc + RegExpBytecodeLength(BC_##name); \
- DECODE()
-#define SET_PC_FROM_OFFSET(offset) \
- next_pc = code_base + offset; \
- DECODE()
-
-#ifdef DEBUG
-#define BYTECODE(name) \
- BC_LABEL(name) \
- MaybeTraceInterpreter(code_base, pc, backtrack_stack.sp(), current, \
- current_char, RegExpBytecodeLength(BC_##name), #name);
-#else
-#define BYTECODE(name) BC_LABEL(name)
-#endif // DEBUG
-
-template <typename Char>
-IrregexpInterpreter::Result RawMatch(Isolate* isolate, ByteArray code_array,
- String subject_string,
- Vector<const Char> subject, int* registers,
- int current, uint32_t current_char,
- RegExp::CallOrigin call_origin,
- const uint32_t backtrack_limit) {
- DisallowHeapAllocation no_gc;
-
-#if V8_USE_COMPUTED_GOTO
-
-// We have to make sure that no OOB access to the dispatch table is possible and
-// all values are valid label addresses.
-// Otherwise jumps to arbitrary addresses could potentially happen.
-// This is ensured as follows:
-// Every index to the dispatch table gets masked using BYTECODE_MASK in
-// DECODE(). This way we can only get values between 0 (only the least
-// significant byte of an integer is used) and kRegExpPaddedBytecodeCount - 1
-// (BYTECODE_MASK is defined to be exactly this value).
-// All entries from kRegExpBytecodeCount to kRegExpPaddedBytecodeCount have to
-// be filled with BREAKs (invalid operation).
-
-// Fill dispatch table from last defined bytecode up to the next power of two
-// with BREAK (invalid operation).
-// TODO(pthier): Find a way to fill up automatically (at compile time)
-// 59 real bytecodes -> 5 fillers
-#define BYTECODE_FILLER_ITERATOR(V) \
- V(BREAK) /* 1 */ \
- V(BREAK) /* 2 */ \
- V(BREAK) /* 3 */ \
- V(BREAK) /* 4 */ \
- V(BREAK) /* 5 */
-
-#define COUNT(...) +1
- static constexpr int kRegExpBytecodeFillerCount =
- BYTECODE_FILLER_ITERATOR(COUNT);
-#undef COUNT
-
- // Make sure kRegExpPaddedBytecodeCount is actually the closest possible power
- // of two.
- DCHECK_EQ(kRegExpPaddedBytecodeCount,
- base::bits::RoundUpToPowerOfTwo32(kRegExpBytecodeCount));
-
- // Make sure every bytecode we get by using BYTECODE_MASK is well defined.
- STATIC_ASSERT(kRegExpBytecodeCount <= kRegExpPaddedBytecodeCount);
- STATIC_ASSERT(kRegExpBytecodeCount + kRegExpBytecodeFillerCount ==
- kRegExpPaddedBytecodeCount);
-
-#define DECLARE_DISPATCH_TABLE_ENTRY(name, ...) &&BC_##name,
- static const void* const dispatch_table[kRegExpPaddedBytecodeCount] = {
- BYTECODE_ITERATOR(DECLARE_DISPATCH_TABLE_ENTRY)
- BYTECODE_FILLER_ITERATOR(DECLARE_DISPATCH_TABLE_ENTRY)};
-#undef DECLARE_DISPATCH_TABLE_ENTRY
-#undef BYTECODE_FILLER_ITERATOR
-
-#endif // V8_USE_COMPUTED_GOTO
-
- const byte* pc = code_array.GetDataStartAddress();
- const byte* code_base = pc;
-
- BacktrackStack backtrack_stack;
-
- uint32_t backtrack_count = 0;
-
-#ifdef DEBUG
- if (FLAG_trace_regexp_bytecodes) {
- PrintF("\n\nStart bytecode interpreter\n\n");
- }
-#endif
-
- while (true) {
- const byte* next_pc = pc;
- int32_t insn;
- int32_t next_insn;
-#if V8_USE_COMPUTED_GOTO
- const void* next_handler_addr;
- DECODE();
- DISPATCH();
-#else
- insn = Load32Aligned(pc);
- switch (insn & BYTECODE_MASK) {
-#endif // V8_USE_COMPUTED_GOTO
- BYTECODE(BREAK) { UNREACHABLE(); }
- BYTECODE(PUSH_CP) {
- ADVANCE(PUSH_CP);
- if (!backtrack_stack.push(current)) {
- return MaybeThrowStackOverflow(isolate, call_origin);
- }
- DISPATCH();
- }
- BYTECODE(PUSH_BT) {
- ADVANCE(PUSH_BT);
- if (!backtrack_stack.push(Load32Aligned(pc + 4))) {
- return MaybeThrowStackOverflow(isolate, call_origin);
- }
- DISPATCH();
- }
- BYTECODE(PUSH_REGISTER) {
- ADVANCE(PUSH_REGISTER);
- if (!backtrack_stack.push(registers[insn >> BYTECODE_SHIFT])) {
- return MaybeThrowStackOverflow(isolate, call_origin);
- }
- DISPATCH();
- }
- BYTECODE(SET_REGISTER) {
- ADVANCE(SET_REGISTER);
- registers[insn >> BYTECODE_SHIFT] = Load32Aligned(pc + 4);
- DISPATCH();
- }
- BYTECODE(ADVANCE_REGISTER) {
- ADVANCE(ADVANCE_REGISTER);
- registers[insn >> BYTECODE_SHIFT] += Load32Aligned(pc + 4);
- DISPATCH();
- }
- BYTECODE(SET_REGISTER_TO_CP) {
- ADVANCE(SET_REGISTER_TO_CP);
- registers[insn >> BYTECODE_SHIFT] = current + Load32Aligned(pc + 4);
- DISPATCH();
- }
- BYTECODE(SET_CP_TO_REGISTER) {
- ADVANCE(SET_CP_TO_REGISTER);
- current = registers[insn >> BYTECODE_SHIFT];
- DISPATCH();
- }
- BYTECODE(SET_REGISTER_TO_SP) {
- ADVANCE(SET_REGISTER_TO_SP);
- registers[insn >> BYTECODE_SHIFT] = backtrack_stack.sp();
- DISPATCH();
- }
- BYTECODE(SET_SP_TO_REGISTER) {
- ADVANCE(SET_SP_TO_REGISTER);
- backtrack_stack.set_sp(registers[insn >> BYTECODE_SHIFT]);
- DISPATCH();
- }
- BYTECODE(POP_CP) {
- ADVANCE(POP_CP);
- current = backtrack_stack.pop();
- DISPATCH();
- }
- BYTECODE(POP_BT) {
- STATIC_ASSERT(JSRegExp::kNoBacktrackLimit == 0);
- if (++backtrack_count == backtrack_limit) {
- // Exceeded limits are treated as a failed match.
- return IrregexpInterpreter::FAILURE;
- }
-
- IrregexpInterpreter::Result return_code =
- HandleInterrupts(isolate, call_origin, &code_array, &subject_string,
- &code_base, &subject, &pc);
- if (return_code != IrregexpInterpreter::SUCCESS) return return_code;
-
- SET_PC_FROM_OFFSET(backtrack_stack.pop());
- DISPATCH();
- }
- BYTECODE(POP_REGISTER) {
- ADVANCE(POP_REGISTER);
- registers[insn >> BYTECODE_SHIFT] = backtrack_stack.pop();
- DISPATCH();
- }
- BYTECODE(FAIL) {
- isolate->counters()->regexp_backtracks()->AddSample(
- static_cast<int>(backtrack_count));
- return IrregexpInterpreter::FAILURE;
- }
- BYTECODE(SUCCEED) {
- isolate->counters()->regexp_backtracks()->AddSample(
- static_cast<int>(backtrack_count));
- return IrregexpInterpreter::SUCCESS;
- }
- BYTECODE(ADVANCE_CP) {
- ADVANCE(ADVANCE_CP);
- current += insn >> BYTECODE_SHIFT;
- DISPATCH();
- }
- BYTECODE(GOTO) {
- SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
- DISPATCH();
- }
- BYTECODE(ADVANCE_CP_AND_GOTO) {
- SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
- current += insn >> BYTECODE_SHIFT;
- DISPATCH();
- }
- BYTECODE(CHECK_GREEDY) {
- if (current == backtrack_stack.peek()) {
- SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
- backtrack_stack.pop();
- } else {
- ADVANCE(CHECK_GREEDY);
- }
- DISPATCH();
- }
- BYTECODE(LOAD_CURRENT_CHAR) {
- int pos = current + (insn >> BYTECODE_SHIFT);
- if (pos >= subject.length() || pos < 0) {
- SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
- } else {
- ADVANCE(LOAD_CURRENT_CHAR);
- current_char = subject[pos];
- }
- DISPATCH();
- }
- BYTECODE(LOAD_CURRENT_CHAR_UNCHECKED) {
- ADVANCE(LOAD_CURRENT_CHAR_UNCHECKED);
- int pos = current + (insn >> BYTECODE_SHIFT);
- current_char = subject[pos];
- DISPATCH();
- }
- BYTECODE(LOAD_2_CURRENT_CHARS) {
- int pos = current + (insn >> BYTECODE_SHIFT);
- if (pos + 2 > subject.length() || pos < 0) {
- SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
- } else {
- ADVANCE(LOAD_2_CURRENT_CHARS);
- Char next = subject[pos + 1];
- current_char = (subject[pos] | (next << (kBitsPerByte * sizeof(Char))));
- }
- DISPATCH();
- }
- BYTECODE(LOAD_2_CURRENT_CHARS_UNCHECKED) {
- ADVANCE(LOAD_2_CURRENT_CHARS_UNCHECKED);
- int pos = current + (insn >> BYTECODE_SHIFT);
- Char next = subject[pos + 1];
- current_char = (subject[pos] | (next << (kBitsPerByte * sizeof(Char))));
- DISPATCH();
- }
- BYTECODE(LOAD_4_CURRENT_CHARS) {
- DCHECK_EQ(1, sizeof(Char));
- int pos = current + (insn >> BYTECODE_SHIFT);
- if (pos + 4 > subject.length() || pos < 0) {
- SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
- } else {
- ADVANCE(LOAD_4_CURRENT_CHARS);
- Char next1 = subject[pos + 1];
- Char next2 = subject[pos + 2];
- Char next3 = subject[pos + 3];
- current_char =
- (subject[pos] | (next1 << 8) | (next2 << 16) | (next3 << 24));
- }
- DISPATCH();
- }
- BYTECODE(LOAD_4_CURRENT_CHARS_UNCHECKED) {
- ADVANCE(LOAD_4_CURRENT_CHARS_UNCHECKED);
- DCHECK_EQ(1, sizeof(Char));
- int pos = current + (insn >> BYTECODE_SHIFT);
- Char next1 = subject[pos + 1];
- Char next2 = subject[pos + 2];
- Char next3 = subject[pos + 3];
- current_char =
- (subject[pos] | (next1 << 8) | (next2 << 16) | (next3 << 24));
- DISPATCH();
- }
- BYTECODE(CHECK_4_CHARS) {
- uint32_t c = Load32Aligned(pc + 4);
- if (c == current_char) {
- SET_PC_FROM_OFFSET(Load32Aligned(pc + 8));
- } else {
- ADVANCE(CHECK_4_CHARS);
- }
- DISPATCH();
- }
- BYTECODE(CHECK_CHAR) {
- uint32_t c = (insn >> BYTECODE_SHIFT);
- if (c == current_char) {
- SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
- } else {
- ADVANCE(CHECK_CHAR);
- }
- DISPATCH();
- }
- BYTECODE(CHECK_NOT_4_CHARS) {
- uint32_t c = Load32Aligned(pc + 4);
- if (c != current_char) {
- SET_PC_FROM_OFFSET(Load32Aligned(pc + 8));
- } else {
- ADVANCE(CHECK_NOT_4_CHARS);
- }
- DISPATCH();
- }
- BYTECODE(CHECK_NOT_CHAR) {
- uint32_t c = (insn >> BYTECODE_SHIFT);
- if (c != current_char) {
- SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
- } else {
- ADVANCE(CHECK_NOT_CHAR);
- }
- DISPATCH();
- }
- BYTECODE(AND_CHECK_4_CHARS) {
- uint32_t c = Load32Aligned(pc + 4);
- if (c == (current_char & Load32Aligned(pc + 8))) {
- SET_PC_FROM_OFFSET(Load32Aligned(pc + 12));
- } else {
- ADVANCE(AND_CHECK_4_CHARS);
- }
- DISPATCH();
- }
- BYTECODE(AND_CHECK_CHAR) {
- uint32_t c = (insn >> BYTECODE_SHIFT);
- if (c == (current_char & Load32Aligned(pc + 4))) {
- SET_PC_FROM_OFFSET(Load32Aligned(pc + 8));
- } else {
- ADVANCE(AND_CHECK_CHAR);
- }
- DISPATCH();
- }
- BYTECODE(AND_CHECK_NOT_4_CHARS) {
- uint32_t c = Load32Aligned(pc + 4);
- if (c != (current_char & Load32Aligned(pc + 8))) {
- SET_PC_FROM_OFFSET(Load32Aligned(pc + 12));
- } else {
- ADVANCE(AND_CHECK_NOT_4_CHARS);
- }
- DISPATCH();
- }
- BYTECODE(AND_CHECK_NOT_CHAR) {
- uint32_t c = (insn >> BYTECODE_SHIFT);
- if (c != (current_char & Load32Aligned(pc + 4))) {
- SET_PC_FROM_OFFSET(Load32Aligned(pc + 8));
- } else {
- ADVANCE(AND_CHECK_NOT_CHAR);
- }
- DISPATCH();
- }
- BYTECODE(MINUS_AND_CHECK_NOT_CHAR) {
- uint32_t c = (insn >> BYTECODE_SHIFT);
- uint32_t minus = Load16Aligned(pc + 4);
- uint32_t mask = Load16Aligned(pc + 6);
- if (c != ((current_char - minus) & mask)) {
- SET_PC_FROM_OFFSET(Load32Aligned(pc + 8));
- } else {
- ADVANCE(MINUS_AND_CHECK_NOT_CHAR);
- }
- DISPATCH();
- }
- BYTECODE(CHECK_CHAR_IN_RANGE) {
- uint32_t from = Load16Aligned(pc + 4);
- uint32_t to = Load16Aligned(pc + 6);
- if (from <= current_char && current_char <= to) {
- SET_PC_FROM_OFFSET(Load32Aligned(pc + 8));
- } else {
- ADVANCE(CHECK_CHAR_IN_RANGE);
- }
- DISPATCH();
- }
- BYTECODE(CHECK_CHAR_NOT_IN_RANGE) {
- uint32_t from = Load16Aligned(pc + 4);
- uint32_t to = Load16Aligned(pc + 6);
- if (from > current_char || current_char > to) {
- SET_PC_FROM_OFFSET(Load32Aligned(pc + 8));
- } else {
- ADVANCE(CHECK_CHAR_NOT_IN_RANGE);
- }
- DISPATCH();
- }
- BYTECODE(CHECK_BIT_IN_TABLE) {
- if (CheckBitInTable(current_char, pc + 8)) {
- SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
- } else {
- ADVANCE(CHECK_BIT_IN_TABLE);
- }
- DISPATCH();
- }
- BYTECODE(CHECK_LT) {
- uint32_t limit = (insn >> BYTECODE_SHIFT);
- if (current_char < limit) {
- SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
- } else {
- ADVANCE(CHECK_LT);
- }
- DISPATCH();
- }
- BYTECODE(CHECK_GT) {
- uint32_t limit = (insn >> BYTECODE_SHIFT);
- if (current_char > limit) {
- SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
- } else {
- ADVANCE(CHECK_GT);
- }
- DISPATCH();
- }
- BYTECODE(CHECK_REGISTER_LT) {
- if (registers[insn >> BYTECODE_SHIFT] < Load32Aligned(pc + 4)) {
- SET_PC_FROM_OFFSET(Load32Aligned(pc + 8));
- } else {
- ADVANCE(CHECK_REGISTER_LT);
- }
- DISPATCH();
- }
- BYTECODE(CHECK_REGISTER_GE) {
- if (registers[insn >> BYTECODE_SHIFT] >= Load32Aligned(pc + 4)) {
- SET_PC_FROM_OFFSET(Load32Aligned(pc + 8));
- } else {
- ADVANCE(CHECK_REGISTER_GE);
- }
- DISPATCH();
- }
- BYTECODE(CHECK_REGISTER_EQ_POS) {
- if (registers[insn >> BYTECODE_SHIFT] == current) {
- SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
- } else {
- ADVANCE(CHECK_REGISTER_EQ_POS);
- }
- DISPATCH();
- }
- BYTECODE(CHECK_NOT_REGS_EQUAL) {
- if (registers[insn >> BYTECODE_SHIFT] ==
- registers[Load32Aligned(pc + 4)]) {
- ADVANCE(CHECK_NOT_REGS_EQUAL);
- } else {
- SET_PC_FROM_OFFSET(Load32Aligned(pc + 8));
- }
- DISPATCH();
- }
- BYTECODE(CHECK_NOT_BACK_REF) {
- int from = registers[insn >> BYTECODE_SHIFT];
- int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from;
- if (from >= 0 && len > 0) {
- if (current + len > subject.length() ||
- CompareChars(&subject[from], &subject[current], len) != 0) {
- SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
- DISPATCH();
- }
- current += len;
- }
- ADVANCE(CHECK_NOT_BACK_REF);
- DISPATCH();
- }
- BYTECODE(CHECK_NOT_BACK_REF_BACKWARD) {
- int from = registers[insn >> BYTECODE_SHIFT];
- int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from;
- if (from >= 0 && len > 0) {
- if (current - len < 0 ||
- CompareChars(&subject[from], &subject[current - len], len) != 0) {
- SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
- DISPATCH();
- }
- current -= len;
- }
- ADVANCE(CHECK_NOT_BACK_REF_BACKWARD);
- DISPATCH();
- }
- BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_UNICODE) {
- UNREACHABLE(); // TODO(jgruber): Remove this unused bytecode.
- }
- BYTECODE(CHECK_NOT_BACK_REF_NO_CASE) {
- int from = registers[insn >> BYTECODE_SHIFT];
- int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from;
- if (from >= 0 && len > 0) {
- if (current + len > subject.length() ||
- !BackRefMatchesNoCase(isolate, from, current, len, subject)) {
- SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
- DISPATCH();
- }
- current += len;
- }
- ADVANCE(CHECK_NOT_BACK_REF_NO_CASE);
- DISPATCH();
- }
- BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_UNICODE_BACKWARD) {
- UNREACHABLE(); // TODO(jgruber): Remove this unused bytecode.
- }
- BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_BACKWARD) {
- int from = registers[insn >> BYTECODE_SHIFT];
- int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from;
- if (from >= 0 && len > 0) {
- if (current - len < 0 ||
- !BackRefMatchesNoCase(isolate, from, current - len, len, subject)) {
- SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
- DISPATCH();
- }
- current -= len;
- }
- ADVANCE(CHECK_NOT_BACK_REF_NO_CASE_BACKWARD);
- DISPATCH();
- }
- BYTECODE(CHECK_AT_START) {
- if (current + (insn >> BYTECODE_SHIFT) == 0) {
- SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
- } else {
- ADVANCE(CHECK_AT_START);
- }
- DISPATCH();
- }
- BYTECODE(CHECK_NOT_AT_START) {
- if (current + (insn >> BYTECODE_SHIFT) == 0) {
- ADVANCE(CHECK_NOT_AT_START);
- } else {
- SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
- }
- DISPATCH();
- }
- BYTECODE(SET_CURRENT_POSITION_FROM_END) {
- ADVANCE(SET_CURRENT_POSITION_FROM_END);
- int by = static_cast<uint32_t>(insn) >> BYTECODE_SHIFT;
- if (subject.length() - current > by) {
- current = subject.length() - by;
- current_char = subject[current - 1];
- }
- DISPATCH();
- }
- BYTECODE(CHECK_CURRENT_POSITION) {
- int pos = current + (insn >> BYTECODE_SHIFT);
- if (pos > subject.length() || pos < 0) {
- SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
- } else {
- ADVANCE(CHECK_CURRENT_POSITION);
- }
- DISPATCH();
- }
- BYTECODE(SKIP_UNTIL_CHAR) {
- int load_offset = (insn >> BYTECODE_SHIFT);
- int32_t advance = Load16AlignedSigned(pc + 4);
- uint32_t c = Load16Aligned(pc + 6);
- while (static_cast<uintptr_t>(current + load_offset) <
- static_cast<uintptr_t>(subject.length())) {
- current_char = subject[current + load_offset];
- if (c == current_char) {
- SET_PC_FROM_OFFSET(Load32Aligned(pc + 8));
- DISPATCH();
- }
- current += advance;
- }
- SET_PC_FROM_OFFSET(Load32Aligned(pc + 12));
- DISPATCH();
- }
- BYTECODE(SKIP_UNTIL_CHAR_AND) {
- int load_offset = (insn >> BYTECODE_SHIFT);
- int32_t advance = Load16AlignedSigned(pc + 4);
- uint16_t c = Load16Aligned(pc + 6);
- uint32_t mask = Load32Aligned(pc + 8);
- int32_t maximum_offset = Load32Aligned(pc + 12);
- while (static_cast<uintptr_t>(current + maximum_offset) <=
- static_cast<uintptr_t>(subject.length())) {
- current_char = subject[current + load_offset];
- if (c == (current_char & mask)) {
- SET_PC_FROM_OFFSET(Load32Aligned(pc + 16));
- DISPATCH();
- }
- current += advance;
- }
- SET_PC_FROM_OFFSET(Load32Aligned(pc + 20));
- DISPATCH();
- }
- BYTECODE(SKIP_UNTIL_CHAR_POS_CHECKED) {
- int load_offset = (insn >> BYTECODE_SHIFT);
- int32_t advance = Load16AlignedSigned(pc + 4);
- uint16_t c = Load16Aligned(pc + 6);
- int32_t maximum_offset = Load32Aligned(pc + 8);
- while (static_cast<uintptr_t>(current + maximum_offset) <=
- static_cast<uintptr_t>(subject.length())) {
- current_char = subject[current + load_offset];
- if (c == current_char) {
- SET_PC_FROM_OFFSET(Load32Aligned(pc + 12));
- DISPATCH();
- }
- current += advance;
- }
- SET_PC_FROM_OFFSET(Load32Aligned(pc + 16));
- DISPATCH();
- }
- BYTECODE(SKIP_UNTIL_BIT_IN_TABLE) {
- int load_offset = (insn >> BYTECODE_SHIFT);
- int32_t advance = Load16AlignedSigned(pc + 4);
- const byte* table = pc + 8;
- while (static_cast<uintptr_t>(current + load_offset) <
- static_cast<uintptr_t>(subject.length())) {
- current_char = subject[current + load_offset];
- if (CheckBitInTable(current_char, table)) {
- SET_PC_FROM_OFFSET(Load32Aligned(pc + 24));
- DISPATCH();
- }
- current += advance;
- }
- SET_PC_FROM_OFFSET(Load32Aligned(pc + 28));
- DISPATCH();
- }
- BYTECODE(SKIP_UNTIL_GT_OR_NOT_BIT_IN_TABLE) {
- int load_offset = (insn >> BYTECODE_SHIFT);
- int32_t advance = Load16AlignedSigned(pc + 4);
- uint16_t limit = Load16Aligned(pc + 6);
- const byte* table = pc + 8;
- while (static_cast<uintptr_t>(current + load_offset) <
- static_cast<uintptr_t>(subject.length())) {
- current_char = subject[current + load_offset];
- if (current_char > limit) {
- SET_PC_FROM_OFFSET(Load32Aligned(pc + 24));
- DISPATCH();
- }
- if (!CheckBitInTable(current_char, table)) {
- SET_PC_FROM_OFFSET(Load32Aligned(pc + 24));
- DISPATCH();
- }
- current += advance;
- }
- SET_PC_FROM_OFFSET(Load32Aligned(pc + 28));
- DISPATCH();
- }
- BYTECODE(SKIP_UNTIL_CHAR_OR_CHAR) {
- int load_offset = (insn >> BYTECODE_SHIFT);
- int32_t advance = Load32Aligned(pc + 4);
- uint16_t c = Load16Aligned(pc + 8);
- uint16_t c2 = Load16Aligned(pc + 10);
- while (static_cast<uintptr_t>(current + load_offset) <
- static_cast<uintptr_t>(subject.length())) {
- current_char = subject[current + load_offset];
- // The two if-statements below are split up intentionally, as combining
- // them seems to result in register allocation behaving quite
- // differently and slowing down the resulting code.
- if (c == current_char) {
- SET_PC_FROM_OFFSET(Load32Aligned(pc + 12));
- DISPATCH();
- }
- if (c2 == current_char) {
- SET_PC_FROM_OFFSET(Load32Aligned(pc + 12));
- DISPATCH();
- }
- current += advance;
- }
- SET_PC_FROM_OFFSET(Load32Aligned(pc + 16));
- DISPATCH();
- }
-#if V8_USE_COMPUTED_GOTO
-// Lint gets confused a lot if we just use !V8_USE_COMPUTED_GOTO or ifndef
-// V8_USE_COMPUTED_GOTO here.
-#else
- default:
- UNREACHABLE();
- }
- // Label we jump to in DISPATCH(). There must be no instructions between the
- // end of the switch, this label and the end of the loop.
- switch_dispatch_continuation : {}
-#endif // V8_USE_COMPUTED_GOTO
- }
-}
-
-#undef BYTECODE
-#undef DISPATCH
-#undef DECODE
-#undef SET_PC_FROM_OFFSET
-#undef ADVANCE
-#undef BC_LABEL
-#undef V8_USE_COMPUTED_GOTO
-
-} // namespace
-
-// static
-IrregexpInterpreter::Result IrregexpInterpreter::Match(
- Isolate* isolate, JSRegExp regexp, String subject_string, int* registers,
- int registers_length, int start_position, RegExp::CallOrigin call_origin) {
- if (FLAG_regexp_tier_up) {
- regexp.TierUpTick();
- }
-
- bool is_one_byte = String::IsOneByteRepresentationUnderneath(subject_string);
- ByteArray code_array = ByteArray::cast(regexp.Bytecode(is_one_byte));
-
- return MatchInternal(isolate, code_array, subject_string, registers,
- registers_length, start_position, call_origin,
- regexp.BacktrackLimit());
-}
-
-IrregexpInterpreter::Result IrregexpInterpreter::MatchInternal(
- Isolate* isolate, ByteArray code_array, String subject_string,
- int* registers, int registers_length, int start_position,
- RegExp::CallOrigin call_origin, uint32_t backtrack_limit) {
- DCHECK(subject_string.IsFlat());
-
- // Note: Heap allocation *is* allowed in two situations if calling from
- // Runtime:
- // 1. When creating & throwing a stack overflow exception. The interpreter
- // aborts afterwards, and thus possible-moved objects are never used.
- // 2. When handling interrupts. We manually relocate unhandlified references
- // after interrupts have run.
- DisallowHeapAllocation no_gc;
-
- // Reset registers to -1 (=undefined).
- // This is necessary because registers are only written when a
- // capture group matched.
- // Resetting them ensures that previous matches are cleared.
- memset(registers, -1, sizeof(registers[0]) * registers_length);
-
- uc16 previous_char = '\n';
- String::FlatContent subject_content = subject_string.GetFlatContent(no_gc);
- if (subject_content.IsOneByte()) {
- Vector<const uint8_t> subject_vector = subject_content.ToOneByteVector();
- if (start_position != 0) previous_char = subject_vector[start_position - 1];
- return RawMatch(isolate, code_array, subject_string, subject_vector,
- registers, start_position, previous_char, call_origin,
- backtrack_limit);
- } else {
- DCHECK(subject_content.IsTwoByte());
- Vector<const uc16> subject_vector = subject_content.ToUC16Vector();
- if (start_position != 0) previous_char = subject_vector[start_position - 1];
- return RawMatch(isolate, code_array, subject_string, subject_vector,
- registers, start_position, previous_char, call_origin,
- backtrack_limit);
- }
-}
-
-#ifndef COMPILING_IRREGEXP_FOR_EXTERNAL_EMBEDDER
-
-// This method is called through an external reference from RegExpExecInternal
-// builtin.
-IrregexpInterpreter::Result IrregexpInterpreter::MatchForCallFromJs(
- Address subject, int32_t start_position, Address, Address, int* registers,
- int32_t registers_length, Address, RegExp::CallOrigin call_origin,
- Isolate* isolate, Address regexp) {
- DCHECK_NOT_NULL(isolate);
- DCHECK_NOT_NULL(registers);
- DCHECK(call_origin == RegExp::CallOrigin::kFromJs);
-
- DisallowHeapAllocation no_gc;
- DisallowJavascriptExecution no_js(isolate);
-
- String subject_string = String::cast(Object(subject));
- JSRegExp regexp_obj = JSRegExp::cast(Object(regexp));
-
- if (regexp_obj.MarkedForTierUp()) {
- // Returning RETRY will re-enter through runtime, where actual recompilation
- // for tier-up takes place.
- return IrregexpInterpreter::RETRY;
- }
-
- return Match(isolate, regexp_obj, subject_string, registers, registers_length,
- start_position, call_origin);
-}
-
-#endif // !COMPILING_IRREGEXP_FOR_EXTERNAL_EMBEDDER
-
-IrregexpInterpreter::Result IrregexpInterpreter::MatchForCallFromRuntime(
- Isolate* isolate, Handle<JSRegExp> regexp, Handle<String> subject_string,
- int* registers, int registers_length, int start_position) {
- return Match(isolate, *regexp, *subject_string, registers, registers_length,
- start_position, RegExp::CallOrigin::kFromRuntime);
-}
-
-} // namespace internal
-} // namespace v8
diff --git a/js/src/new-regexp/regexp-interpreter.h b/js/src/new-regexp/regexp-interpreter.h
deleted file mode 100644
index b4c0da2b7..000000000
--- a/js/src/new-regexp/regexp-interpreter.h
+++ /dev/null
@@ -1,61 +0,0 @@
-// Copyright 2011 the V8 project authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-// A simple interpreter for the Irregexp byte code.
-
-#ifndef V8_REGEXP_REGEXP_INTERPRETER_H_
-#define V8_REGEXP_REGEXP_INTERPRETER_H_
-
-#include "new-regexp/regexp.h"
-
-namespace v8 {
-namespace internal {
-
-class V8_EXPORT_PRIVATE IrregexpInterpreter : public AllStatic {
- public:
- enum Result {
- FAILURE = RegExp::kInternalRegExpFailure,
- SUCCESS = RegExp::kInternalRegExpSuccess,
- EXCEPTION = RegExp::kInternalRegExpException,
- RETRY = RegExp::kInternalRegExpRetry,
- };
-
- // In case a StackOverflow occurs, a StackOverflowException is created and
- // EXCEPTION is returned.
- static Result MatchForCallFromRuntime(Isolate* isolate,
- Handle<JSRegExp> regexp,
- Handle<String> subject_string,
- int* registers, int registers_length,
- int start_position);
-
- // In case a StackOverflow occurs, EXCEPTION is returned. The caller is
- // responsible for creating the exception.
- // RETRY is returned if a retry through the runtime is needed (e.g. when
- // interrupts have been scheduled or the regexp is marked for tier-up).
- // Arguments input_start, input_end and backtrack_stack are
- // unused. They are only passed to match the signature of the native irregex
- // code.
- static Result MatchForCallFromJs(Address subject, int32_t start_position,
- Address input_start, Address input_end,
- int* registers, int32_t registers_length,
- Address backtrack_stack,
- RegExp::CallOrigin call_origin,
- Isolate* isolate, Address regexp);
-
- static Result MatchInternal(Isolate* isolate, ByteArray code_array,
- String subject_string, int* registers,
- int registers_length, int start_position,
- RegExp::CallOrigin call_origin,
- uint32_t backtrack_limit);
-
- private:
- static Result Match(Isolate* isolate, JSRegExp regexp, String subject_string,
- int* registers, int registers_length, int start_position,
- RegExp::CallOrigin call_origin);
-};
-
-} // namespace internal
-} // namespace v8
-
-#endif // V8_REGEXP_REGEXP_INTERPRETER_H_
diff --git a/js/src/new-regexp/regexp-macro-assembler-arch.h b/js/src/new-regexp/regexp-macro-assembler-arch.h
deleted file mode 100644
index 8aeb8c433..000000000
--- a/js/src/new-regexp/regexp-macro-assembler-arch.h
+++ /dev/null
@@ -1,291 +0,0 @@
-/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
- * vim: set ts=8 sts=2 et sw=2 tw=80:
- * This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-
-// Copyright 2020 the V8 project authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-// This file implements the NativeRegExpMacroAssembler interface for
-// SpiderMonkey. It provides the same interface as each of V8's
-// architecture-specific implementations.
-
-#ifndef RegexpMacroAssemblerArch_h
-#define RegexpMacroAssemblerArch_h
-
-#include "jit/MacroAssembler.h"
-#include "new-regexp/regexp-macro-assembler.h"
-
-namespace v8 {
-namespace internal {
-
-struct FrameData {
- // Character position at the start of the input, stored as a
- // negative offset from the end of the string (input_end_pointer_).
- size_t inputStart;
-
- // The backtrack_stack_pointer_ register points to the top of the stack.
- // This points to the bottom of the backtrack stack.
- void* backtrackStackBase;
-
- // Copy of the input MatchPairs.
- int32_t* matches; // pointer to capture array
- int32_t numMatches; // size of capture array
-};
-
-class SMRegExpMacroAssembler final : public NativeRegExpMacroAssembler {
- public:
- SMRegExpMacroAssembler(JSContext* cx, Isolate* isolate,
- js::jit::StackMacroAssembler& masm, Zone* zone,
- Mode mode, uint32_t num_capture_registers);
- virtual ~SMRegExpMacroAssembler() {} // Nothing to do here
-
- virtual int stack_limit_slack();
- virtual IrregexpImplementation Implementation();
-
- virtual bool Succeed();
- virtual void Fail();
-
- virtual void AdvanceCurrentPosition(int by);
- virtual void PopCurrentPosition();
- virtual void PushCurrentPosition();
- virtual void SetCurrentPositionFromEnd(int by);
-
- virtual void Backtrack();
- virtual void Bind(Label* label);
- virtual void GoTo(Label* label);
- virtual void PushBacktrack(Label* label);
-
- virtual void CheckCharacter(uint32_t c, Label* on_equal);
- virtual void CheckNotCharacter(uint32_t c, Label* on_not_equal);
- virtual void CheckCharacterGT(uc16 limit, Label* on_greater);
- virtual void CheckCharacterLT(uc16 limit, Label* on_less);
- virtual void CheckCharacterAfterAnd(uint32_t c, uint32_t mask,
- Label* on_equal);
- virtual void CheckNotCharacterAfterAnd(uint32_t c, uint32_t mask,
- Label* on_not_equal);
- virtual void CheckNotCharacterAfterMinusAnd(uc16 c, uc16 minus, uc16 mask,
- Label* on_not_equal);
- virtual void CheckGreedyLoop(Label* on_tos_equals_current_position);
- virtual void CheckCharacterInRange(uc16 from, uc16 to, Label* on_in_range);
- virtual void CheckCharacterNotInRange(uc16 from, uc16 to,
- Label* on_not_in_range);
- virtual void CheckAtStart(int cp_offset, Label* on_at_start);
- virtual void CheckNotAtStart(int cp_offset, Label* on_not_at_start);
- virtual void CheckPosition(int cp_offset, Label* on_outside_input);
- virtual void CheckBitInTable(Handle<ByteArray> table, Label* on_bit_set);
- virtual bool CheckSpecialCharacterClass(uc16 type, Label* on_no_match);
- virtual void CheckNotBackReference(int start_reg, bool read_backward,
- Label* on_no_match);
- virtual void CheckNotBackReferenceIgnoreCase(int start_reg,
- bool read_backward,
- Label* on_no_match);
-
- virtual void LoadCurrentCharacterImpl(int cp_offset, Label* on_end_of_input,
- bool check_bounds, int characters,
- int eats_at_least);
-
- virtual void AdvanceRegister(int reg, int by);
- virtual void IfRegisterGE(int reg, int comparand, Label* if_ge);
- virtual void IfRegisterLT(int reg, int comparand, Label* if_lt);
- virtual void IfRegisterEqPos(int reg, Label* if_eq);
- virtual void PopRegister(int register_index);
- virtual void PushRegister(int register_index,
- StackCheckFlag check_stack_limit);
- virtual void ReadCurrentPositionFromRegister(int reg);
- virtual void WriteCurrentPositionToRegister(int reg, int cp_offset);
- virtual void ReadStackPointerFromRegister(int reg);
- virtual void WriteStackPointerToRegister(int reg);
- virtual void SetRegister(int register_index, int to);
- virtual void ClearRegisters(int reg_from, int reg_to);
-
- virtual Handle<HeapObject> GetCode(Handle<String> source);
-
- private:
- size_t frameSize_ = 0;
-
- void createStackFrame();
- void initFrameAndRegs();
- void successHandler();
- void exitHandler();
- void backtrackHandler();
- void stackOverflowHandler();
-
- // Push a register on the backtrack stack.
- void Push(js::jit::Register value);
-
- // Pop a value from the backtrack stack.
- void Pop(js::jit::Register target);
-
- void CheckAtStartImpl(int cp_offset, Label* on_cond,
- js::jit::Assembler::Condition cond);
- void CheckCharacterImpl(js::jit::Imm32 c, Label* on_cond,
- js::jit::Assembler::Condition cond);
- void CheckCharacterAfterAndImpl(uint32_t c, uint32_t and_with, Label* on_cond,
- bool negate);
- void CheckCharacterInRangeImpl(uc16 from, uc16 to, Label* on_cond,
- js::jit::Assembler::Condition cond);
- void CheckNotBackReferenceImpl(int start_reg, bool read_backward,
- Label* on_no_match, bool ignore_case);
-
- void LoadCurrentCharacterUnchecked(int cp_offset, int characters);
-
- void JumpOrBacktrack(Label* to);
-
- // MacroAssembler methods that take a Label can be called with a
- // null label, which means that we should backtrack if we would jump
- // to that label. This is a helper to avoid writing out the same
- // logic a dozen times.
- inline js::jit::Label* LabelOrBacktrack(Label* to) {
- return to ? to->inner() : &backtrack_label_;
- }
-
- void CheckBacktrackStackLimit();
-
- static bool GrowBacktrackStack(RegExpStack* regexp_stack);
-
- static uint32_t CaseInsensitiveCompareStrings(const char16_t* substring1,
- const char16_t* substring2,
- size_t byteLength);
- static uint32_t CaseInsensitiveCompareUCStrings(const char16_t* substring1,
- const char16_t* substring2,
- size_t byteLength);
-
- inline int char_size() { return static_cast<int>(mode_); }
- inline js::jit::Scale factor() {
- return mode_ == UC16 ? js::jit::TimesTwo : js::jit::TimesOne;
- }
-
- js::jit::Address inputStart() {
- return js::jit::Address(masm_.getStackPointer(),
- offsetof(FrameData, inputStart));
- }
- js::jit::Address backtrackStackBase() {
- return js::jit::Address(masm_.getStackPointer(),
- offsetof(FrameData, backtrackStackBase));
- }
- js::jit::Address matches() {
- return js::jit::Address(masm_.getStackPointer(),
- offsetof(FrameData, matches));
- }
- js::jit::Address numMatches() {
- return js::jit::Address(masm_.getStackPointer(),
- offsetof(FrameData, numMatches));
- }
-
- // The stack-pointer-relative location of a regexp register.
- js::jit::Address register_location(int register_index) {
- return js::jit::Address(masm_.getStackPointer(),
- register_offset(register_index));
- }
-
- int32_t register_offset(int register_index) {
- MOZ_ASSERT(register_index >= 0 && register_index <= kMaxRegister);
- if (num_registers_ <= register_index) {
- num_registers_ = register_index + 1;
- }
- static_assert(alignof(uintptr_t) <= alignof(FrameData),"Regexp: Alignment of uintptr_t and FrameData mismatch");
- return sizeof(FrameData) + register_index * sizeof(uintptr_t*);
- }
-
- JSContext* cx_;
- js::jit::StackMacroAssembler& masm_;
-
- /*
- * This assembler uses the following registers:
- *
- * - current_character_:
- * Contains the character (or characters) currently being examined.
- * Must be loaded using LoadCurrentCharacter before using any of the
- * dispatch methods. After a matching pass for a global regexp,
- * temporarily stores the index of capture start.
- * - current_position_:
- * Current position in input *as negative byte offset from end of string*.
- * - input_end_pointer_:
- * Points to byte after last character in the input. current_position_ is
- * relative to this.
- * - backtrack_stack_pointer_:
- * Points to tip of the (heap-allocated) backtrack stack. The stack grows
- * downward (like the native stack).
- * - temp0_, temp1_, temp2_:
- * Scratch registers.
- *
- * The native stack pointer is used to access arguments (InputOutputData),
- * local variables (FrameData), and irregexp's internal virtual registers
- * (see register_location).
- */
-
- js::jit::Register current_character_;
- js::jit::Register current_position_;
- js::jit::Register input_end_pointer_;
- js::jit::Register backtrack_stack_pointer_;
- js::jit::Register temp0_, temp1_, temp2_;
-
- js::jit::Label entry_label_;
- js::jit::Label start_label_;
- js::jit::Label backtrack_label_;
- js::jit::Label success_label_;
- js::jit::Label exit_label_;
- js::jit::Label stack_overflow_label_;
- js::jit::Label exit_with_exception_label_;
-
- // When we generate the code to push a backtrack label's address
- // onto the backtrack stack, we don't know its final address. We
- // have to patch it after linking. This is slightly delicate, as the
- // Label itself (which is allocated on the stack) may not exist by
- // the time we link. The approach is as follows:
- //
- // 1. When we push a label on the backtrack stack (PushBacktrack),
- // we bind the label's patchOffset_ field to the offset within
- // the code that should be overwritten. This works because each
- // label is only pushed by a single instruction.
- //
- // 2. When we bind a label (Bind), we check to see if it has a
- // bound patchOffset_. If it does, we create a LabelPatch mapping
- // its patch offset to the offset of the label itself.
- //
- // 3. While linking the code, we walk the list of label patches
- // and patch the code accordingly.
- class LabelPatch {
- public:
- LabelPatch(js::jit::CodeOffset patchOffset, size_t labelOffset)
- : patchOffset_(patchOffset), labelOffset_(labelOffset) {}
-
- js::jit::CodeOffset patchOffset_;
- size_t labelOffset_ = 0;
- };
-
- js::Vector<LabelPatch, 4, js::SystemAllocPolicy> labelPatches_;
- void AddLabelPatch(js::jit::CodeOffset patchOffset, size_t labelOffset) {
- js::AutoEnterOOMUnsafeRegion oomUnsafe;
- if (!labelPatches_.emplaceBack(patchOffset, labelOffset)) {
- oomUnsafe.crash("Irregexp label patch");
- }
- }
-
- Mode mode_;
- int num_registers_;
- int num_capture_registers_;
- js::jit::LiveGeneralRegisterSet savedRegisters_;
-
- public:
- using TableVector =
- js::Vector<PseudoHandle<ByteArrayData>, 4, js::SystemAllocPolicy>;
- TableVector& tables() { return tables_; }
-
- private:
- TableVector tables_;
- void AddTable(PseudoHandle<ByteArrayData> table) {
- js::AutoEnterOOMUnsafeRegion oomUnsafe;
- if (!tables_.append(std::move(table))) {
- oomUnsafe.crash("Irregexp table append");
- }
- }
-};
-
-} // namespace internal
-} // namespace v8
-
-#endif // RegexpMacroAssemblerArch_h
diff --git a/js/src/new-regexp/regexp-macro-assembler-tracer.cc b/js/src/new-regexp/regexp-macro-assembler-tracer.cc
deleted file mode 100644
index 8eb587c3c..000000000
--- a/js/src/new-regexp/regexp-macro-assembler-tracer.cc
+++ /dev/null
@@ -1,418 +0,0 @@
-// Copyright 2012 the V8 project authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#include "new-regexp/regexp-macro-assembler-tracer.h"
-
-
-namespace v8 {
-namespace internal {
-
-RegExpMacroAssemblerTracer::RegExpMacroAssemblerTracer(
- Isolate* isolate, RegExpMacroAssembler* assembler)
- : RegExpMacroAssembler(isolate, assembler->zone()), assembler_(assembler) {
- IrregexpImplementation type = assembler->Implementation();
- DCHECK_LT(type, 9);
- const char* impl_names[] = {"IA32", "ARM", "ARM64", "MIPS", "S390",
- "PPC", "X64", "X87", "Bytecode"};
- PrintF("RegExpMacroAssembler%s();\n", impl_names[type]);
-}
-
-RegExpMacroAssemblerTracer::~RegExpMacroAssemblerTracer() = default;
-
-void RegExpMacroAssemblerTracer::AbortedCodeGeneration() {
- PrintF(" AbortedCodeGeneration\n");
- assembler_->AbortedCodeGeneration();
-}
-
-
-// This is used for printing out debugging information. It makes an integer
-// that is closely related to the address of an object.
-static int LabelToInt(Label* label) {
- return static_cast<int>(reinterpret_cast<intptr_t>(label));
-}
-
-
-void RegExpMacroAssemblerTracer::Bind(Label* label) {
- PrintF("label[%08x]: (Bind)\n", LabelToInt(label));
- assembler_->Bind(label);
-}
-
-
-void RegExpMacroAssemblerTracer::AdvanceCurrentPosition(int by) {
- PrintF(" AdvanceCurrentPosition(by=%d);\n", by);
- assembler_->AdvanceCurrentPosition(by);
-}
-
-
-void RegExpMacroAssemblerTracer::CheckGreedyLoop(Label* label) {
- PrintF(" CheckGreedyLoop(label[%08x]);\n\n", LabelToInt(label));
- assembler_->CheckGreedyLoop(label);
-}
-
-
-void RegExpMacroAssemblerTracer::PopCurrentPosition() {
- PrintF(" PopCurrentPosition();\n");
- assembler_->PopCurrentPosition();
-}
-
-
-void RegExpMacroAssemblerTracer::PushCurrentPosition() {
- PrintF(" PushCurrentPosition();\n");
- assembler_->PushCurrentPosition();
-}
-
-
-void RegExpMacroAssemblerTracer::Backtrack() {
- PrintF(" Backtrack();\n");
- assembler_->Backtrack();
-}
-
-
-void RegExpMacroAssemblerTracer::GoTo(Label* label) {
- PrintF(" GoTo(label[%08x]);\n\n", LabelToInt(label));
- assembler_->GoTo(label);
-}
-
-
-void RegExpMacroAssemblerTracer::PushBacktrack(Label* label) {
- PrintF(" PushBacktrack(label[%08x]);\n", LabelToInt(label));
- assembler_->PushBacktrack(label);
-}
-
-
-bool RegExpMacroAssemblerTracer::Succeed() {
- bool restart = assembler_->Succeed();
- PrintF(" Succeed();%s\n", restart ? " [restart for global match]" : "");
- return restart;
-}
-
-
-void RegExpMacroAssemblerTracer::Fail() {
- PrintF(" Fail();");
- assembler_->Fail();
-}
-
-
-void RegExpMacroAssemblerTracer::PopRegister(int register_index) {
- PrintF(" PopRegister(register=%d);\n", register_index);
- assembler_->PopRegister(register_index);
-}
-
-
-void RegExpMacroAssemblerTracer::PushRegister(
- int register_index,
- StackCheckFlag check_stack_limit) {
- PrintF(" PushRegister(register=%d, %s);\n",
- register_index,
- check_stack_limit ? "check stack limit" : "");
- assembler_->PushRegister(register_index, check_stack_limit);
-}
-
-
-void RegExpMacroAssemblerTracer::AdvanceRegister(int reg, int by) {
- PrintF(" AdvanceRegister(register=%d, by=%d);\n", reg, by);
- assembler_->AdvanceRegister(reg, by);
-}
-
-
-void RegExpMacroAssemblerTracer::SetCurrentPositionFromEnd(int by) {
- PrintF(" SetCurrentPositionFromEnd(by=%d);\n", by);
- assembler_->SetCurrentPositionFromEnd(by);
-}
-
-
-void RegExpMacroAssemblerTracer::SetRegister(int register_index, int to) {
- PrintF(" SetRegister(register=%d, to=%d);\n", register_index, to);
- assembler_->SetRegister(register_index, to);
-}
-
-
-void RegExpMacroAssemblerTracer::WriteCurrentPositionToRegister(int reg,
- int cp_offset) {
- PrintF(" WriteCurrentPositionToRegister(register=%d,cp_offset=%d);\n",
- reg,
- cp_offset);
- assembler_->WriteCurrentPositionToRegister(reg, cp_offset);
-}
-
-
-void RegExpMacroAssemblerTracer::ClearRegisters(int reg_from, int reg_to) {
- PrintF(" ClearRegister(from=%d, to=%d);\n", reg_from, reg_to);
- assembler_->ClearRegisters(reg_from, reg_to);
-}
-
-
-void RegExpMacroAssemblerTracer::ReadCurrentPositionFromRegister(int reg) {
- PrintF(" ReadCurrentPositionFromRegister(register=%d);\n", reg);
- assembler_->ReadCurrentPositionFromRegister(reg);
-}
-
-
-void RegExpMacroAssemblerTracer::WriteStackPointerToRegister(int reg) {
- PrintF(" WriteStackPointerToRegister(register=%d);\n", reg);
- assembler_->WriteStackPointerToRegister(reg);
-}
-
-
-void RegExpMacroAssemblerTracer::ReadStackPointerFromRegister(int reg) {
- PrintF(" ReadStackPointerFromRegister(register=%d);\n", reg);
- assembler_->ReadStackPointerFromRegister(reg);
-}
-
-void RegExpMacroAssemblerTracer::LoadCurrentCharacterImpl(
- int cp_offset, Label* on_end_of_input, bool check_bounds, int characters,
- int eats_at_least) {
- const char* check_msg = check_bounds ? "" : " (unchecked)";
- PrintF(
- " LoadCurrentCharacter(cp_offset=%d, label[%08x]%s (%d chars) (eats at "
- "least %d));\n",
- cp_offset, LabelToInt(on_end_of_input), check_msg, characters,
- eats_at_least);
- assembler_->LoadCurrentCharacter(cp_offset, on_end_of_input, check_bounds,
- characters, eats_at_least);
-}
-
-class PrintablePrinter {
- public:
- explicit PrintablePrinter(uc16 character) : character_(character) { }
-
- const char* operator*() {
- if (character_ >= ' ' && character_ <= '~') {
- buffer_[0] = '(';
- buffer_[1] = static_cast<char>(character_);
- buffer_[2] = ')';
- buffer_[3] = '\0';
- } else {
- buffer_[0] = '\0';
- }
- return &buffer_[0];
- }
-
- private:
- uc16 character_;
- char buffer_[4];
-};
-
-
-void RegExpMacroAssemblerTracer::CheckCharacterLT(uc16 limit, Label* on_less) {
- PrintablePrinter printable(limit);
- PrintF(" CheckCharacterLT(c=0x%04x%s, label[%08x]);\n",
- limit,
- *printable,
- LabelToInt(on_less));
- assembler_->CheckCharacterLT(limit, on_less);
-}
-
-
-void RegExpMacroAssemblerTracer::CheckCharacterGT(uc16 limit,
- Label* on_greater) {
- PrintablePrinter printable(limit);
- PrintF(" CheckCharacterGT(c=0x%04x%s, label[%08x]);\n",
- limit,
- *printable,
- LabelToInt(on_greater));
- assembler_->CheckCharacterGT(limit, on_greater);
-}
-
-
-void RegExpMacroAssemblerTracer::CheckCharacter(unsigned c, Label* on_equal) {
- PrintablePrinter printable(c);
- PrintF(" CheckCharacter(c=0x%04x%s, label[%08x]);\n",
- c,
- *printable,
- LabelToInt(on_equal));
- assembler_->CheckCharacter(c, on_equal);
-}
-
-void RegExpMacroAssemblerTracer::CheckAtStart(int cp_offset,
- Label* on_at_start) {
- PrintF(" CheckAtStart(cp_offset=%d, label[%08x]);\n", cp_offset,
- LabelToInt(on_at_start));
- assembler_->CheckAtStart(cp_offset, on_at_start);
-}
-
-void RegExpMacroAssemblerTracer::CheckNotAtStart(int cp_offset,
- Label* on_not_at_start) {
- PrintF(" CheckNotAtStart(cp_offset=%d, label[%08x]);\n", cp_offset,
- LabelToInt(on_not_at_start));
- assembler_->CheckNotAtStart(cp_offset, on_not_at_start);
-}
-
-
-void RegExpMacroAssemblerTracer::CheckNotCharacter(unsigned c,
- Label* on_not_equal) {
- PrintablePrinter printable(c);
- PrintF(" CheckNotCharacter(c=0x%04x%s, label[%08x]);\n",
- c,
- *printable,
- LabelToInt(on_not_equal));
- assembler_->CheckNotCharacter(c, on_not_equal);
-}
-
-
-void RegExpMacroAssemblerTracer::CheckCharacterAfterAnd(
- unsigned c,
- unsigned mask,
- Label* on_equal) {
- PrintablePrinter printable(c);
- PrintF(" CheckCharacterAfterAnd(c=0x%04x%s, mask=0x%04x, label[%08x]);\n",
- c,
- *printable,
- mask,
- LabelToInt(on_equal));
- assembler_->CheckCharacterAfterAnd(c, mask, on_equal);
-}
-
-
-void RegExpMacroAssemblerTracer::CheckNotCharacterAfterAnd(
- unsigned c,
- unsigned mask,
- Label* on_not_equal) {
- PrintablePrinter printable(c);
- PrintF(" CheckNotCharacterAfterAnd(c=0x%04x%s, mask=0x%04x, label[%08x]);\n",
- c,
- *printable,
- mask,
- LabelToInt(on_not_equal));
- assembler_->CheckNotCharacterAfterAnd(c, mask, on_not_equal);
-}
-
-
-void RegExpMacroAssemblerTracer::CheckNotCharacterAfterMinusAnd(
- uc16 c,
- uc16 minus,
- uc16 mask,
- Label* on_not_equal) {
- PrintF(" CheckNotCharacterAfterMinusAnd(c=0x%04x, minus=%04x, mask=0x%04x, "
- "label[%08x]);\n",
- c,
- minus,
- mask,
- LabelToInt(on_not_equal));
- assembler_->CheckNotCharacterAfterMinusAnd(c, minus, mask, on_not_equal);
-}
-
-
-void RegExpMacroAssemblerTracer::CheckCharacterInRange(
- uc16 from,
- uc16 to,
- Label* on_not_in_range) {
- PrintablePrinter printable_from(from);
- PrintablePrinter printable_to(to);
- PrintF(" CheckCharacterInRange(from=0x%04x%s, to=0x%04x%s, label[%08x]);\n",
- from,
- *printable_from,
- to,
- *printable_to,
- LabelToInt(on_not_in_range));
- assembler_->CheckCharacterInRange(from, to, on_not_in_range);
-}
-
-
-void RegExpMacroAssemblerTracer::CheckCharacterNotInRange(
- uc16 from,
- uc16 to,
- Label* on_in_range) {
- PrintablePrinter printable_from(from);
- PrintablePrinter printable_to(to);
- PrintF(
- " CheckCharacterNotInRange(from=0x%04x%s," " to=%04x%s, label[%08x]);\n",
- from,
- *printable_from,
- to,
- *printable_to,
- LabelToInt(on_in_range));
- assembler_->CheckCharacterNotInRange(from, to, on_in_range);
-}
-
-
-void RegExpMacroAssemblerTracer::CheckBitInTable(
- Handle<ByteArray> table, Label* on_bit_set) {
- PrintF(" CheckBitInTable(label[%08x] ", LabelToInt(on_bit_set));
- for (int i = 0; i < kTableSize; i++) {
- PrintF("%c", table->get(i) != 0 ? 'X' : '.');
- if (i % 32 == 31 && i != kTableMask) {
- PrintF("\n ");
- }
- }
- PrintF(");\n");
- assembler_->CheckBitInTable(table, on_bit_set);
-}
-
-
-void RegExpMacroAssemblerTracer::CheckNotBackReference(int start_reg,
- bool read_backward,
- Label* on_no_match) {
- PrintF(" CheckNotBackReference(register=%d, %s, label[%08x]);\n", start_reg,
- read_backward ? "backward" : "forward", LabelToInt(on_no_match));
- assembler_->CheckNotBackReference(start_reg, read_backward, on_no_match);
-}
-
-void RegExpMacroAssemblerTracer::CheckNotBackReferenceIgnoreCase(
- int start_reg, bool read_backward, Label* on_no_match) {
- PrintF(" CheckNotBackReferenceIgnoreCase(register=%d, %s, label[%08x]);\n",
- start_reg, read_backward ? "backward" : "forward",
- LabelToInt(on_no_match));
- assembler_->CheckNotBackReferenceIgnoreCase(start_reg, read_backward,
- on_no_match);
-}
-
-void RegExpMacroAssemblerTracer::CheckPosition(int cp_offset,
- Label* on_outside_input) {
- PrintF(" CheckPosition(cp_offset=%d, label[%08x]);\n", cp_offset,
- LabelToInt(on_outside_input));
- assembler_->CheckPosition(cp_offset, on_outside_input);
-}
-
-
-bool RegExpMacroAssemblerTracer::CheckSpecialCharacterClass(
- uc16 type,
- Label* on_no_match) {
- bool supported = assembler_->CheckSpecialCharacterClass(type,
- on_no_match);
- PrintF(" CheckSpecialCharacterClass(type='%c', label[%08x]): %s;\n",
- type,
- LabelToInt(on_no_match),
- supported ? "true" : "false");
- return supported;
-}
-
-
-void RegExpMacroAssemblerTracer::IfRegisterLT(int register_index,
- int comparand, Label* if_lt) {
- PrintF(" IfRegisterLT(register=%d, number=%d, label[%08x]);\n",
- register_index, comparand, LabelToInt(if_lt));
- assembler_->IfRegisterLT(register_index, comparand, if_lt);
-}
-
-
-void RegExpMacroAssemblerTracer::IfRegisterEqPos(int register_index,
- Label* if_eq) {
- PrintF(" IfRegisterEqPos(register=%d, label[%08x]);\n",
- register_index, LabelToInt(if_eq));
- assembler_->IfRegisterEqPos(register_index, if_eq);
-}
-
-
-void RegExpMacroAssemblerTracer::IfRegisterGE(int register_index,
- int comparand, Label* if_ge) {
- PrintF(" IfRegisterGE(register=%d, number=%d, label[%08x]);\n",
- register_index, comparand, LabelToInt(if_ge));
- assembler_->IfRegisterGE(register_index, comparand, if_ge);
-}
-
-
-RegExpMacroAssembler::IrregexpImplementation
- RegExpMacroAssemblerTracer::Implementation() {
- return assembler_->Implementation();
-}
-
-
-Handle<HeapObject> RegExpMacroAssemblerTracer::GetCode(Handle<String> source) {
- PrintF(" GetCode(%s);\n", source->ToCString().get());
- return assembler_->GetCode(source);
-}
-
-} // namespace internal
-} // namespace v8
diff --git a/js/src/new-regexp/regexp-macro-assembler-tracer.h b/js/src/new-regexp/regexp-macro-assembler-tracer.h
deleted file mode 100644
index 0596a18ba..000000000
--- a/js/src/new-regexp/regexp-macro-assembler-tracer.h
+++ /dev/null
@@ -1,80 +0,0 @@
-// Copyright 2008 the V8 project authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#ifndef V8_REGEXP_REGEXP_MACRO_ASSEMBLER_TRACER_H_
-#define V8_REGEXP_REGEXP_MACRO_ASSEMBLER_TRACER_H_
-
-#include "new-regexp/regexp-macro-assembler.h"
-
-namespace v8 {
-namespace internal {
-
-// Decorator on a RegExpMacroAssembler that write all calls.
-class RegExpMacroAssemblerTracer: public RegExpMacroAssembler {
- public:
- RegExpMacroAssemblerTracer(Isolate* isolate, RegExpMacroAssembler* assembler);
- ~RegExpMacroAssemblerTracer() override;
- void AbortedCodeGeneration() override;
- int stack_limit_slack() override { return assembler_->stack_limit_slack(); }
- bool CanReadUnaligned() override { return assembler_->CanReadUnaligned(); }
- void AdvanceCurrentPosition(int by) override; // Signed cp change.
- void AdvanceRegister(int reg, int by) override; // r[reg] += by.
- void Backtrack() override;
- void Bind(Label* label) override;
- void CheckCharacter(unsigned c, Label* on_equal) override;
- void CheckCharacterAfterAnd(unsigned c, unsigned and_with,
- Label* on_equal) override;
- void CheckCharacterGT(uc16 limit, Label* on_greater) override;
- void CheckCharacterLT(uc16 limit, Label* on_less) override;
- void CheckGreedyLoop(Label* on_tos_equals_current_position) override;
- void CheckAtStart(int cp_offset, Label* on_at_start) override;
- void CheckNotAtStart(int cp_offset, Label* on_not_at_start) override;
- void CheckNotBackReference(int start_reg, bool read_backward,
- Label* on_no_match) override;
- void CheckNotBackReferenceIgnoreCase(int start_reg, bool read_backward,
- Label* on_no_match) override;
- void CheckNotCharacter(unsigned c, Label* on_not_equal) override;
- void CheckNotCharacterAfterAnd(unsigned c, unsigned and_with,
- Label* on_not_equal) override;
- void CheckNotCharacterAfterMinusAnd(uc16 c, uc16 minus, uc16 and_with,
- Label* on_not_equal) override;
- void CheckCharacterInRange(uc16 from, uc16 to, Label* on_in_range) override;
- void CheckCharacterNotInRange(uc16 from, uc16 to,
- Label* on_not_in_range) override;
- void CheckBitInTable(Handle<ByteArray> table, Label* on_bit_set) override;
- void CheckPosition(int cp_offset, Label* on_outside_input) override;
- bool CheckSpecialCharacterClass(uc16 type, Label* on_no_match) override;
- void Fail() override;
- Handle<HeapObject> GetCode(Handle<String> source) override;
- void GoTo(Label* label) override;
- void IfRegisterGE(int reg, int comparand, Label* if_ge) override;
- void IfRegisterLT(int reg, int comparand, Label* if_lt) override;
- void IfRegisterEqPos(int reg, Label* if_eq) override;
- IrregexpImplementation Implementation() override;
- void LoadCurrentCharacterImpl(int cp_offset, Label* on_end_of_input,
- bool check_bounds, int characters,
- int eats_at_least) override;
- void PopCurrentPosition() override;
- void PopRegister(int register_index) override;
- void PushBacktrack(Label* label) override;
- void PushCurrentPosition() override;
- void PushRegister(int register_index,
- StackCheckFlag check_stack_limit) override;
- void ReadCurrentPositionFromRegister(int reg) override;
- void ReadStackPointerFromRegister(int reg) override;
- void SetCurrentPositionFromEnd(int by) override;
- void SetRegister(int register_index, int to) override;
- bool Succeed() override;
- void WriteCurrentPositionToRegister(int reg, int cp_offset) override;
- void ClearRegisters(int reg_from, int reg_to) override;
- void WriteStackPointerToRegister(int reg) override;
-
- private:
- RegExpMacroAssembler* assembler_;
-};
-
-} // namespace internal
-} // namespace v8
-
-#endif // V8_REGEXP_REGEXP_MACRO_ASSEMBLER_TRACER_H_
diff --git a/js/src/new-regexp/regexp-macro-assembler.cc b/js/src/new-regexp/regexp-macro-assembler.cc
deleted file mode 100644
index 52c1cb1ba..000000000
--- a/js/src/new-regexp/regexp-macro-assembler.cc
+++ /dev/null
@@ -1,344 +0,0 @@
-// Copyright 2012 the V8 project authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#include "new-regexp/regexp-macro-assembler.h"
-
-#include "new-regexp/regexp-stack.h"
-
-#ifdef V8_INTL_SUPPORT
-#include "unicode/uchar.h"
-#include "unicode/unistr.h"
-#endif // V8_INTL_SUPPORT
-
-namespace v8 {
-namespace internal {
-
-RegExpMacroAssembler::RegExpMacroAssembler(Isolate* isolate, Zone* zone)
- : slow_safe_compiler_(false),
- global_mode_(NOT_GLOBAL),
- isolate_(isolate),
- zone_(zone) {}
-
-RegExpMacroAssembler::~RegExpMacroAssembler() = default;
-
-int RegExpMacroAssembler::CaseInsensitiveCompareUC16(Address byte_offset1,
- Address byte_offset2,
- size_t byte_length,
- Isolate* isolate) {
- // This function is not allowed to cause a garbage collection.
- // A GC might move the calling generated code and invalidate the
- // return address on the stack.
- DCHECK_EQ(0, byte_length % 2);
-
-#ifdef V8_INTL_SUPPORT
- int32_t length = (int32_t)(byte_length >> 1);
- icu::UnicodeString uni_str_1(reinterpret_cast<const char16_t*>(byte_offset1),
- length);
- return uni_str_1.caseCompare(reinterpret_cast<const char16_t*>(byte_offset2),
- length, U_FOLD_CASE_DEFAULT) == 0;
-#else
- uc16* substring1 = reinterpret_cast<uc16*>(byte_offset1);
- uc16* substring2 = reinterpret_cast<uc16*>(byte_offset2);
- size_t length = byte_length >> 1;
- DCHECK_NOT_NULL(isolate);
- unibrow::Mapping<unibrow::Ecma262Canonicalize>* canonicalize =
- isolate->regexp_macro_assembler_canonicalize();
- for (size_t i = 0; i < length; i++) {
- unibrow::uchar c1 = substring1[i];
- unibrow::uchar c2 = substring2[i];
- if (c1 != c2) {
- unibrow::uchar s1[1] = {c1};
- canonicalize->get(c1, '\0', s1);
- if (s1[0] != c2) {
- unibrow::uchar s2[1] = {c2};
- canonicalize->get(c2, '\0', s2);
- if (s1[0] != s2[0]) {
- return 0;
- }
- }
- }
- }
- return 1;
-#endif // V8_INTL_SUPPORT
-}
-
-
-void RegExpMacroAssembler::CheckNotInSurrogatePair(int cp_offset,
- Label* on_failure) {
- Label ok;
- // Check that current character is not a trail surrogate.
- LoadCurrentCharacter(cp_offset, &ok);
- CheckCharacterNotInRange(kTrailSurrogateStart, kTrailSurrogateEnd, &ok);
- // Check that previous character is not a lead surrogate.
- LoadCurrentCharacter(cp_offset - 1, &ok);
- CheckCharacterInRange(kLeadSurrogateStart, kLeadSurrogateEnd, on_failure);
- Bind(&ok);
-}
-
-void RegExpMacroAssembler::CheckPosition(int cp_offset,
- Label* on_outside_input) {
- LoadCurrentCharacter(cp_offset, on_outside_input, true);
-}
-
-void RegExpMacroAssembler::LoadCurrentCharacter(int cp_offset,
- Label* on_end_of_input,
- bool check_bounds,
- int characters,
- int eats_at_least) {
- // By default, eats_at_least = characters.
- if (eats_at_least == kUseCharactersValue) {
- eats_at_least = characters;
- }
-
- LoadCurrentCharacterImpl(cp_offset, on_end_of_input, check_bounds, characters,
- eats_at_least);
-}
-
-bool RegExpMacroAssembler::CheckSpecialCharacterClass(uc16 type,
- Label* on_no_match) {
- return false;
-}
-
-NativeRegExpMacroAssembler::NativeRegExpMacroAssembler(Isolate* isolate,
- Zone* zone)
- : RegExpMacroAssembler(isolate, zone) {}
-
-NativeRegExpMacroAssembler::~NativeRegExpMacroAssembler() = default;
-
-bool NativeRegExpMacroAssembler::CanReadUnaligned() {
- return FLAG_enable_regexp_unaligned_accesses && !slow_safe();
-}
-
-#ifndef COMPILING_IRREGEXP_FOR_EXTERNAL_EMBEDDER
-
-// This method may only be called after an interrupt.
-int NativeRegExpMacroAssembler::CheckStackGuardState(
- Isolate* isolate, int start_index, RegExp::CallOrigin call_origin,
- Address* return_address, Code re_code, Address* subject,
- const byte** input_start, const byte** input_end) {
- DisallowHeapAllocation no_gc;
- Address old_pc = PointerAuthentication::AuthenticatePC(return_address, 0);
- DCHECK_LE(re_code.raw_instruction_start(), old_pc);
- DCHECK_LE(old_pc, re_code.raw_instruction_end());
-
- StackLimitCheck check(isolate);
- bool js_has_overflowed = check.JsHasOverflowed();
-
- if (call_origin == RegExp::CallOrigin::kFromJs) {
- // Direct calls from JavaScript can be interrupted in two ways:
- // 1. A real stack overflow, in which case we let the caller throw the
- // exception.
- // 2. The stack guard was used to interrupt execution for another purpose,
- // forcing the call through the runtime system.
-
- // Bug(v8:9540) Investigate why this method is called from JS although no
- // stackoverflow or interrupt is pending on ARM64. We return 0 in this case
- // to continue execution normally.
- if (js_has_overflowed) {
- return EXCEPTION;
- } else if (check.InterruptRequested()) {
- return RETRY;
- } else {
- return 0;
- }
- }
- DCHECK(call_origin == RegExp::CallOrigin::kFromRuntime);
-
- // Prepare for possible GC.
- HandleScope handles(isolate);
- Handle<Code> code_handle(re_code, isolate);
- Handle<String> subject_handle(String::cast(Object(*subject)), isolate);
- bool is_one_byte = String::IsOneByteRepresentationUnderneath(*subject_handle);
- int return_value = 0;
-
- if (js_has_overflowed) {
- AllowHeapAllocation yes_gc;
- isolate->StackOverflow();
- return_value = EXCEPTION;
- } else if (check.InterruptRequested()) {
- AllowHeapAllocation yes_gc;
- Object result = isolate->stack_guard()->HandleInterrupts();
- if (result.IsException(isolate)) return_value = EXCEPTION;
- }
-
- if (*code_handle != re_code) { // Return address no longer valid
- // Overwrite the return address on the stack.
- intptr_t delta = code_handle->address() - re_code.address();
- Address new_pc = old_pc + delta;
- // TODO(v8:10026): avoid replacing a signed pointer.
- PointerAuthentication::ReplacePC(return_address, new_pc, 0);
- }
-
- // If we continue, we need to update the subject string addresses.
- if (return_value == 0) {
- // String encoding might have changed.
- if (String::IsOneByteRepresentationUnderneath(*subject_handle) !=
- is_one_byte) {
- // If we changed between an LATIN1 and an UC16 string, the specialized
- // code cannot be used, and we need to restart regexp matching from
- // scratch (including, potentially, compiling a new version of the code).
- return_value = RETRY;
- } else {
- *subject = subject_handle->ptr();
- intptr_t byte_length = *input_end - *input_start;
- *input_start = subject_handle->AddressOfCharacterAt(start_index, no_gc);
- *input_end = *input_start + byte_length;
- }
- }
- return return_value;
-}
-
-// Returns a {Result} sentinel, or the number of successful matches.
-int NativeRegExpMacroAssembler::Match(Handle<JSRegExp> regexp,
- Handle<String> subject,
- int* offsets_vector,
- int offsets_vector_length,
- int previous_index, Isolate* isolate) {
- DCHECK(subject->IsFlat());
- DCHECK_LE(0, previous_index);
- DCHECK_LE(previous_index, subject->length());
-
- // No allocations before calling the regexp, but we can't use
- // DisallowHeapAllocation, since regexps might be preempted, and another
- // thread might do allocation anyway.
-
- String subject_ptr = *subject;
- // Character offsets into string.
- int start_offset = previous_index;
- int char_length = subject_ptr.length() - start_offset;
- int slice_offset = 0;
-
- // The string has been flattened, so if it is a cons string it contains the
- // full string in the first part.
- if (StringShape(subject_ptr).IsCons()) {
- DCHECK_EQ(0, ConsString::cast(subject_ptr).second().length());
- subject_ptr = ConsString::cast(subject_ptr).first();
- } else if (StringShape(subject_ptr).IsSliced()) {
- SlicedString slice = SlicedString::cast(subject_ptr);
- subject_ptr = slice.parent();
- slice_offset = slice.offset();
- }
- if (StringShape(subject_ptr).IsThin()) {
- subject_ptr = ThinString::cast(subject_ptr).actual();
- }
- // Ensure that an underlying string has the same representation.
- bool is_one_byte = subject_ptr.IsOneByteRepresentation();
- DCHECK(subject_ptr.IsExternalString() || subject_ptr.IsSeqString());
- // String is now either Sequential or External
- int char_size_shift = is_one_byte ? 0 : 1;
-
- DisallowHeapAllocation no_gc;
- const byte* input_start =
- subject_ptr.AddressOfCharacterAt(start_offset + slice_offset, no_gc);
- int byte_length = char_length << char_size_shift;
- const byte* input_end = input_start + byte_length;
- return Execute(*subject, start_offset, input_start, input_end, offsets_vector,
- offsets_vector_length, isolate, *regexp);
-}
-
-// Returns a {Result} sentinel, or the number of successful matches.
-// TODO(pthier): The JSRegExp object is passed to native irregexp code to match
-// the signature of the interpreter. We should get rid of JS objects passed to
-// internal methods.
-int NativeRegExpMacroAssembler::Execute(
- String input, // This needs to be the unpacked (sliced, cons) string.
- int start_offset, const byte* input_start, const byte* input_end,
- int* output, int output_size, Isolate* isolate, JSRegExp regexp) {
- // Ensure that the minimum stack has been allocated.
- RegExpStackScope stack_scope(isolate);
- Address stack_base = stack_scope.stack()->stack_base();
-
- bool is_one_byte = String::IsOneByteRepresentationUnderneath(input);
- Code code = Code::cast(regexp.Code(is_one_byte));
- RegExp::CallOrigin call_origin = RegExp::CallOrigin::kFromRuntime;
-
- using RegexpMatcherSig = int(
- Address input_string, int start_offset, // NOLINT(readability/casting)
- const byte* input_start, const byte* input_end, int* output,
- int output_size, Address stack_base, int call_origin, Isolate* isolate,
- Address regexp);
-
- auto fn = GeneratedCode<RegexpMatcherSig>::FromCode(code);
- int result =
- fn.Call(input.ptr(), start_offset, input_start, input_end, output,
- output_size, stack_base, call_origin, isolate, regexp.ptr());
- DCHECK(result >= RETRY);
-
- if (result == EXCEPTION && !isolate->has_pending_exception()) {
- // We detected a stack overflow (on the backtrack stack) in RegExp code,
- // but haven't created the exception yet. Additionally, we allow heap
- // allocation because even though it invalidates {input_start} and
- // {input_end}, we are about to return anyway.
- AllowHeapAllocation allow_allocation;
- isolate->StackOverflow();
- }
- return result;
-}
-
-#endif // !COMPILING_IRREGEXP_FOR_EXTERNAL_EMBEDDER
-
-// clang-format off
-const byte NativeRegExpMacroAssembler::word_character_map[] = {
- 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
- 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
- 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
- 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
-
- 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
- 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
- 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, // '0' - '7'
- 0xFFu, 0xFFu, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, // '8' - '9'
-
- 0x00u, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, // 'A' - 'G'
- 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, // 'H' - 'O'
- 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, // 'P' - 'W'
- 0xFFu, 0xFFu, 0xFFu, 0x00u, 0x00u, 0x00u, 0x00u, 0xFFu, // 'X' - 'Z', '_'
-
- 0x00u, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, // 'a' - 'g'
- 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, // 'h' - 'o'
- 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, // 'p' - 'w'
- 0xFFu, 0xFFu, 0xFFu, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, // 'x' - 'z'
- // Latin-1 range
- 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
- 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
- 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
- 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
-
- 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
- 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
- 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
- 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
-
- 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
- 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
- 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
- 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
-
- 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
- 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
- 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
- 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
-};
-// clang-format on
-
-Address NativeRegExpMacroAssembler::GrowStack(Address stack_pointer,
- Address* stack_base,
- Isolate* isolate) {
- RegExpStack* regexp_stack = isolate->regexp_stack();
- size_t size = regexp_stack->stack_capacity();
- Address old_stack_base = regexp_stack->stack_base();
- DCHECK(old_stack_base == *stack_base);
- DCHECK(stack_pointer <= old_stack_base);
- DCHECK(static_cast<size_t>(old_stack_base - stack_pointer) <= size);
- Address new_stack_base = regexp_stack->EnsureCapacity(size * 2);
- if (new_stack_base == kNullAddress) {
- return kNullAddress;
- }
- *stack_base = new_stack_base;
- intptr_t stack_content_size = old_stack_base - stack_pointer;
- return new_stack_base - stack_content_size;
-}
-
-} // namespace internal
-} // namespace v8
diff --git a/js/src/new-regexp/regexp-macro-assembler.h b/js/src/new-regexp/regexp-macro-assembler.h
deleted file mode 100644
index 60d712dfc..000000000
--- a/js/src/new-regexp/regexp-macro-assembler.h
+++ /dev/null
@@ -1,280 +0,0 @@
-// Copyright 2012 the V8 project authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#ifndef V8_REGEXP_REGEXP_MACRO_ASSEMBLER_H_
-#define V8_REGEXP_REGEXP_MACRO_ASSEMBLER_H_
-
-#include "new-regexp/regexp-ast.h"
-#include "new-regexp/regexp-shim.h"
-#include "new-regexp/regexp.h"
-
-namespace v8 {
-namespace internal {
-
-static const uc32 kLeadSurrogateStart = 0xd800;
-static const uc32 kLeadSurrogateEnd = 0xdbff;
-static const uc32 kTrailSurrogateStart = 0xdc00;
-static const uc32 kTrailSurrogateEnd = 0xdfff;
-static const uc32 kNonBmpStart = 0x10000;
-static const uc32 kNonBmpEnd = 0x10ffff;
-
-struct DisjunctDecisionRow {
- RegExpCharacterClass cc;
- Label* on_match;
-};
-
-
-class RegExpMacroAssembler {
- public:
- // The implementation must be able to handle at least:
- static const int kMaxRegister = (1 << 16) - 1;
- static const int kMaxCPOffset = (1 << 15) - 1;
- static const int kMinCPOffset = -(1 << 15);
-
- static const int kTableSizeBits = 7;
- static const int kTableSize = 1 << kTableSizeBits;
- static const int kTableMask = kTableSize - 1;
-
- static constexpr int kUseCharactersValue = -1;
-
- enum IrregexpImplementation {
- kIA32Implementation,
- kARMImplementation,
- kARM64Implementation,
- kMIPSImplementation,
- kS390Implementation,
- kPPCImplementation,
- kX64Implementation,
- kX87Implementation,
- kBytecodeImplementation
- };
-
- enum StackCheckFlag {
- kNoStackLimitCheck = false,
- kCheckStackLimit = true
- };
-
- RegExpMacroAssembler(Isolate* isolate, Zone* zone);
- virtual ~RegExpMacroAssembler();
- // This function is called when code generation is aborted, so that
- // the assembler could clean up internal data structures.
- virtual void AbortedCodeGeneration() {}
- // The maximal number of pushes between stack checks. Users must supply
- // kCheckStackLimit flag to push operations (instead of kNoStackLimitCheck)
- // at least once for every stack_limit() pushes that are executed.
- virtual int stack_limit_slack() = 0;
- virtual bool CanReadUnaligned() = 0;
- virtual void AdvanceCurrentPosition(int by) = 0; // Signed cp change.
- virtual void AdvanceRegister(int reg, int by) = 0; // r[reg] += by.
- // Continues execution from the position pushed on the top of the backtrack
- // stack by an earlier PushBacktrack(Label*).
- virtual void Backtrack() = 0;
- virtual void Bind(Label* label) = 0;
- // Dispatch after looking the current character up in a 2-bits-per-entry
- // map. The destinations vector has up to 4 labels.
- virtual void CheckCharacter(unsigned c, Label* on_equal) = 0;
- // Bitwise and the current character with the given constant and then
- // check for a match with c.
- virtual void CheckCharacterAfterAnd(unsigned c,
- unsigned and_with,
- Label* on_equal) = 0;
- virtual void CheckCharacterGT(uc16 limit, Label* on_greater) = 0;
- virtual void CheckCharacterLT(uc16 limit, Label* on_less) = 0;
- virtual void CheckGreedyLoop(Label* on_tos_equals_current_position) = 0;
- virtual void CheckAtStart(int cp_offset, Label* on_at_start) = 0;
- virtual void CheckNotAtStart(int cp_offset, Label* on_not_at_start) = 0;
- virtual void CheckNotBackReference(int start_reg, bool read_backward,
- Label* on_no_match) = 0;
- virtual void CheckNotBackReferenceIgnoreCase(int start_reg,
- bool read_backward,
- Label* on_no_match) = 0;
- // Check the current character for a match with a literal character. If we
- // fail to match then goto the on_failure label. End of input always
- // matches. If the label is nullptr then we should pop a backtrack address
- // off the stack and go to that.
- virtual void CheckNotCharacter(unsigned c, Label* on_not_equal) = 0;
- virtual void CheckNotCharacterAfterAnd(unsigned c,
- unsigned and_with,
- Label* on_not_equal) = 0;
- // Subtract a constant from the current character, then and with the given
- // constant and then check for a match with c.
- virtual void CheckNotCharacterAfterMinusAnd(uc16 c,
- uc16 minus,
- uc16 and_with,
- Label* on_not_equal) = 0;
- virtual void CheckCharacterInRange(uc16 from,
- uc16 to, // Both inclusive.
- Label* on_in_range) = 0;
- virtual void CheckCharacterNotInRange(uc16 from,
- uc16 to, // Both inclusive.
- Label* on_not_in_range) = 0;
-
- // The current character (modulus the kTableSize) is looked up in the byte
- // array, and if the found byte is non-zero, we jump to the on_bit_set label.
- virtual void CheckBitInTable(Handle<ByteArray> table, Label* on_bit_set) = 0;
-
- // Checks whether the given offset from the current position is before
- // the end of the string. May overwrite the current character.
- virtual void CheckPosition(int cp_offset, Label* on_outside_input);
- // Check whether a standard/default character class matches the current
- // character. Returns false if the type of special character class does
- // not have custom support.
- // May clobber the current loaded character.
- virtual bool CheckSpecialCharacterClass(uc16 type, Label* on_no_match);
-
- // Control-flow integrity:
- // Define a jump target and bind a label.
- virtual void BindJumpTarget(Label* label) { Bind(label); }
-
- virtual void Fail() = 0;
- virtual Handle<HeapObject> GetCode(Handle<String> source) = 0;
- virtual void GoTo(Label* label) = 0;
- // Check whether a register is >= a given constant and go to a label if it
- // is. Backtracks instead if the label is nullptr.
- virtual void IfRegisterGE(int reg, int comparand, Label* if_ge) = 0;
- // Check whether a register is < a given constant and go to a label if it is.
- // Backtracks instead if the label is nullptr.
- virtual void IfRegisterLT(int reg, int comparand, Label* if_lt) = 0;
- // Check whether a register is == to the current position and go to a
- // label if it is.
- virtual void IfRegisterEqPos(int reg, Label* if_eq) = 0;
- virtual IrregexpImplementation Implementation() = 0;
- V8_EXPORT_PRIVATE void LoadCurrentCharacter(
- int cp_offset, Label* on_end_of_input, bool check_bounds = true,
- int characters = 1, int eats_at_least = kUseCharactersValue);
- virtual void LoadCurrentCharacterImpl(int cp_offset, Label* on_end_of_input,
- bool check_bounds, int characters,
- int eats_at_least) = 0;
- virtual void PopCurrentPosition() = 0;
- virtual void PopRegister(int register_index) = 0;
- // Pushes the label on the backtrack stack, so that a following Backtrack
- // will go to this label. Always checks the backtrack stack limit.
- virtual void PushBacktrack(Label* label) = 0;
- virtual void PushCurrentPosition() = 0;
- virtual void PushRegister(int register_index,
- StackCheckFlag check_stack_limit) = 0;
- virtual void ReadCurrentPositionFromRegister(int reg) = 0;
- virtual void ReadStackPointerFromRegister(int reg) = 0;
- virtual void SetCurrentPositionFromEnd(int by) = 0;
- virtual void SetRegister(int register_index, int to) = 0;
- // Return whether the matching (with a global regexp) will be restarted.
- virtual bool Succeed() = 0;
- virtual void WriteCurrentPositionToRegister(int reg, int cp_offset) = 0;
- virtual void ClearRegisters(int reg_from, int reg_to) = 0;
- virtual void WriteStackPointerToRegister(int reg) = 0;
-
- // Compares two-byte strings case insensitively.
- // Called from generated RegExp code.
- static int CaseInsensitiveCompareUC16(Address byte_offset1,
- Address byte_offset2,
- size_t byte_length, Isolate* isolate);
-
- // Check that we are not in the middle of a surrogate pair.
- void CheckNotInSurrogatePair(int cp_offset, Label* on_failure);
-
- // Controls the generation of large inlined constants in the code.
- void set_slow_safe(bool ssc) { slow_safe_compiler_ = ssc; }
- bool slow_safe() { return slow_safe_compiler_; }
-
- void set_backtrack_limit(uint32_t backtrack_limit) {
- backtrack_limit_ = backtrack_limit;
- }
-
- enum GlobalMode {
- NOT_GLOBAL,
- GLOBAL_NO_ZERO_LENGTH_CHECK,
- GLOBAL,
- GLOBAL_UNICODE
- };
- // Set whether the regular expression has the global flag. Exiting due to
- // a failure in a global regexp may still mean success overall.
- inline void set_global_mode(GlobalMode mode) { global_mode_ = mode; }
- inline bool global() { return global_mode_ != NOT_GLOBAL; }
- inline bool global_with_zero_length_check() {
- return global_mode_ == GLOBAL || global_mode_ == GLOBAL_UNICODE;
- }
- inline bool global_unicode() { return global_mode_ == GLOBAL_UNICODE; }
-
- Isolate* isolate() const { return isolate_; }
- Zone* zone() const { return zone_; }
-
- protected:
- bool has_backtrack_limit() const {
- return backtrack_limit_ != JSRegExp::kNoBacktrackLimit;
- }
- uint32_t backtrack_limit() const { return backtrack_limit_; }
-
- private:
- bool slow_safe_compiler_;
- uint32_t backtrack_limit_ = JSRegExp::kNoBacktrackLimit;
- GlobalMode global_mode_;
- Isolate* isolate_;
- Zone* zone_;
-};
-
-class NativeRegExpMacroAssembler: public RegExpMacroAssembler {
- public:
- // Type of input string to generate code for.
- enum Mode { LATIN1 = 1, UC16 = 2 };
-
- // Result of calling generated native RegExp code.
- // RETRY: Something significant changed during execution, and the matching
- // should be retried from scratch.
- // EXCEPTION: Something failed during execution. If no exception has been
- // thrown, it's an internal out-of-memory, and the caller should
- // throw the exception.
- // FAILURE: Matching failed.
- // SUCCESS: Matching succeeded, and the output array has been filled with
- // capture positions.
- enum Result {
- FAILURE = RegExp::kInternalRegExpFailure,
- SUCCESS = RegExp::kInternalRegExpSuccess,
- EXCEPTION = RegExp::kInternalRegExpException,
- RETRY = RegExp::kInternalRegExpRetry,
- };
-
- NativeRegExpMacroAssembler(Isolate* isolate, Zone* zone);
- ~NativeRegExpMacroAssembler() override;
- bool CanReadUnaligned() override;
-
- // Returns a {Result} sentinel, or the number of successful matches.
- static int Match(Handle<JSRegExp> regexp, Handle<String> subject,
- int* offsets_vector, int offsets_vector_length,
- int previous_index, Isolate* isolate);
-
- // Called from RegExp if the backtrack stack limit is hit.
- // Tries to expand the stack. Returns the new stack-pointer if
- // successful, and updates the stack_top address, or returns 0 if unable
- // to grow the stack.
- // This function must not trigger a garbage collection.
- static Address GrowStack(Address stack_pointer, Address* stack_top,
- Isolate* isolate);
-
- static int CheckStackGuardState(Isolate* isolate, int start_index,
- RegExp::CallOrigin call_origin,
- Address* return_address, Code re_code,
- Address* subject, const byte** input_start,
- const byte** input_end);
-
- // Byte map of one byte characters with a 0xff if the character is a word
- // character (digit, letter or underscore) and 0x00 otherwise.
- // Used by generated RegExp code.
- static const byte word_character_map[256];
-
- static Address word_character_map_address() {
- return reinterpret_cast<Address>(&word_character_map[0]);
- }
-
- // Returns a {Result} sentinel, or the number of successful matches.
- V8_EXPORT_PRIVATE static int Execute(String input, int start_offset,
- const byte* input_start,
- const byte* input_end, int* output,
- int output_size, Isolate* isolate,
- JSRegExp regexp);
-};
-
-} // namespace internal
-} // namespace v8
-
-#endif // V8_REGEXP_REGEXP_MACRO_ASSEMBLER_H_
diff --git a/js/src/new-regexp/regexp-native-macro-assembler.cc b/js/src/new-regexp/regexp-native-macro-assembler.cc
deleted file mode 100644
index 01453a937..000000000
--- a/js/src/new-regexp/regexp-native-macro-assembler.cc
+++ /dev/null
@@ -1,1213 +0,0 @@
-/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
- * vim: set ts=8 sts=2 et sw=2 tw=80:
- * This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-
-// Copyright 2020 the V8 project authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#include "jit/Linker.h"
-#include "gc/Zone.h"
-#include "new-regexp/regexp-macro-assembler-arch.h"
-#include "new-regexp/regexp-stack.h"
-#include "vm/MatchPairs.h"
-
-#include "jit/MacroAssembler-inl.h"
-
-using namespace js;
-using namespace js::irregexp;
-using namespace js::jit;
-
-namespace v8 {
-namespace internal {
-
-using js::MatchPairs;
-using js::jit::AbsoluteAddress;
-using js::jit::Address;
-using js::jit::AllocatableGeneralRegisterSet;
-using js::jit::Assembler;
-using js::jit::BaseIndex;
-using js::jit::CodeLocationLabel;
-using js::jit::GeneralRegisterBackwardIterator;
-using js::jit::GeneralRegisterForwardIterator;
-using js::jit::GeneralRegisterSet;
-using js::jit::Imm32;
-using js::jit::ImmPtr;
-using js::jit::ImmWord;
-using js::jit::JitCode;
-using js::jit::Linker;
-using js::jit::LiveGeneralRegisterSet;
-using js::jit::Register;
-using js::jit::Registers;
-using js::jit::StackMacroAssembler;
-
-SMRegExpMacroAssembler::SMRegExpMacroAssembler(JSContext* cx, Isolate* isolate,
- StackMacroAssembler& masm,
- Zone* zone, Mode mode,
- uint32_t num_capture_registers)
- : NativeRegExpMacroAssembler(isolate, zone),
- cx_(cx),
- masm_(masm),
- mode_(mode),
- num_registers_(num_capture_registers),
- num_capture_registers_(num_capture_registers) {
- // Each capture has a start and an end register
- MOZ_ASSERT(num_capture_registers_ % 2 == 0);
-
- AllocatableGeneralRegisterSet regs(GeneralRegisterSet::All());
-
- temp0_ = regs.takeAny();
- temp1_ = regs.takeAny();
- temp2_ = regs.takeAny();
- input_end_pointer_ = regs.takeAny();
- current_character_ = regs.takeAny();
- current_position_ = regs.takeAny();
- backtrack_stack_pointer_ = regs.takeAny();
- savedRegisters_ = js::jit::SavedNonVolatileRegisters(regs);
-
- masm_.jump(&entry_label_); // We'll generate the entry code later
- masm_.bind(&start_label_); // and continue from here.
-}
-
-int SMRegExpMacroAssembler::stack_limit_slack() {
- return RegExpStack::kStackLimitSlack;
-}
-
-void SMRegExpMacroAssembler::AdvanceCurrentPosition(int by) {
- if (by != 0) {
- masm_.addPtr(Imm32(by * char_size()), current_position_);
- }
-}
-
-void SMRegExpMacroAssembler::AdvanceRegister(int reg, int by) {
- MOZ_ASSERT(reg >= 0 && reg < num_registers_);
- if (by != 0) {
- masm_.addPtr(Imm32(by), register_location(reg));
- }
-}
-
-void SMRegExpMacroAssembler::Backtrack() {
- // Pop code location from backtrack stack and jump to location.
- Pop(temp0_);
- masm_.jump(temp0_);
-}
-
-void SMRegExpMacroAssembler::Bind(Label* label) {
- masm_.bind(label->inner());
- if (label->patchOffset_.bound()) {
- AddLabelPatch(label->patchOffset_, label->pos());
- }
-}
-
-// Check if current_position + cp_offset is the input start
-void SMRegExpMacroAssembler::CheckAtStartImpl(int cp_offset, Label* on_cond,
- Assembler::Condition cond) {
- Address addr(current_position_, cp_offset * char_size());
- masm_.computeEffectiveAddress(addr, temp0_);
-
- masm_.branchPtr(cond, inputStart(), temp0_,
- LabelOrBacktrack(on_cond));
-}
-
-void SMRegExpMacroAssembler::CheckAtStart(int cp_offset, Label* on_at_start) {
- CheckAtStartImpl(cp_offset, on_at_start, Assembler::Equal);
-}
-
-void SMRegExpMacroAssembler::CheckNotAtStart(int cp_offset,
- Label* on_not_at_start) {
- CheckAtStartImpl(cp_offset, on_not_at_start, Assembler::NotEqual);
-}
-
-void SMRegExpMacroAssembler::CheckCharacterImpl(Imm32 c, Label* on_cond,
- Assembler::Condition cond) {
- masm_.branch32(cond, current_character_, c, LabelOrBacktrack(on_cond));
-}
-
-void SMRegExpMacroAssembler::CheckCharacter(uint32_t c, Label* on_equal) {
- CheckCharacterImpl(Imm32(c), on_equal, Assembler::Equal);
-}
-
-void SMRegExpMacroAssembler::CheckNotCharacter(uint32_t c,
- Label* on_not_equal) {
- CheckCharacterImpl(Imm32(c), on_not_equal, Assembler::NotEqual);
-}
-
-void SMRegExpMacroAssembler::CheckCharacterGT(uc16 c, Label* on_greater) {
- CheckCharacterImpl(Imm32(c), on_greater, Assembler::GreaterThan);
-}
-
-void SMRegExpMacroAssembler::CheckCharacterLT(uc16 c, Label* on_less) {
- CheckCharacterImpl(Imm32(c), on_less, Assembler::LessThan);
-}
-
-// Bitwise-and the current character with mask and then check for a
-// match with c.
-void SMRegExpMacroAssembler::CheckCharacterAfterAndImpl(uint32_t c,
- uint32_t mask,
- Label* on_cond,
- bool is_not) {
- if (c == 0) {
- Assembler::Condition cond = is_not ? Assembler::NonZero : Assembler::Zero;
- masm_.branchTest32(cond, current_character_, Imm32(mask),
- LabelOrBacktrack(on_cond));
- } else {
- Assembler::Condition cond = is_not ? Assembler::NotEqual : Assembler::Equal;
- masm_.move32(Imm32(mask), temp0_);
- masm_.and32(current_character_, temp0_);
- masm_.branch32(cond, temp0_, Imm32(c), LabelOrBacktrack(on_cond));
- }
-}
-
-void SMRegExpMacroAssembler::CheckCharacterAfterAnd(uint32_t c,
- uint32_t mask,
- Label* on_equal) {
- CheckCharacterAfterAndImpl(c, mask, on_equal, /*is_not =*/false);
-}
-
-void SMRegExpMacroAssembler::CheckNotCharacterAfterAnd(uint32_t c,
- uint32_t mask,
- Label* on_not_equal) {
- CheckCharacterAfterAndImpl(c, mask, on_not_equal, /*is_not =*/true);
-}
-
-
-// Subtract minus from the current character, then bitwise-and the
-// result with mask, then check for a match with c.
-void SMRegExpMacroAssembler::CheckNotCharacterAfterMinusAnd(
- uc16 c, uc16 minus, uc16 mask, Label* on_not_equal) {
- masm_.computeEffectiveAddress(Address(current_character_, -minus), temp0_);
- if (c == 0) {
- masm_.branchTest32(Assembler::NonZero, temp0_, Imm32(mask),
- LabelOrBacktrack(on_not_equal));
- } else {
- masm_.and32(Imm32(mask), temp0_);
- masm_.branch32(Assembler::NotEqual, temp0_, Imm32(c),
- LabelOrBacktrack(on_not_equal));
- }
-}
-
-// If the current position matches the position stored on top of the backtrack
-// stack, pops the backtrack stack and branches to the given label.
-void SMRegExpMacroAssembler::CheckGreedyLoop(Label* on_equal) {
- js::jit::Label fallthrough;
- masm_.branchPtr(Assembler::NotEqual, Address(backtrack_stack_pointer_, 0),
- current_position_, &fallthrough);
- masm_.addPtr(Imm32(sizeof(void*)), backtrack_stack_pointer_); // Pop.
- JumpOrBacktrack(on_equal);
- masm_.bind(&fallthrough);
-}
-
-void SMRegExpMacroAssembler::CheckCharacterInRangeImpl(
- uc16 from, uc16 to, Label* on_cond, Assembler::Condition cond) {
- // x is in [from,to] if unsigned(x - from) <= to - from
- masm_.computeEffectiveAddress(Address(current_character_, -from), temp0_);
- masm_.branch32(cond, temp0_, Imm32(to - from), LabelOrBacktrack(on_cond));
-}
-
-void SMRegExpMacroAssembler::CheckCharacterInRange(uc16 from, uc16 to,
- Label* on_in_range) {
- CheckCharacterInRangeImpl(from, to, on_in_range, Assembler::BelowOrEqual);
-}
-
-void SMRegExpMacroAssembler::CheckCharacterNotInRange(uc16 from, uc16 to,
- Label* on_not_in_range) {
- CheckCharacterInRangeImpl(from, to, on_not_in_range, Assembler::Above);
-}
-
-void SMRegExpMacroAssembler::CheckBitInTable(Handle<ByteArray> table,
- Label* on_bit_set) {
- // Claim ownership of the ByteArray from the current HandleScope.
- // ByteArrays are allocated on the C++ heap and are (eventually)
- // owned by the RegExpShared.
- PseudoHandle<ByteArrayData> rawTable = table->takeOwnership(isolate());
-
- masm_.movePtr(ImmPtr(rawTable->data()), temp0_);
-
- masm_.move32(Imm32(kTableMask), temp1_);
- masm_.and32(current_character_, temp1_);
-
- masm_.load8ZeroExtend(BaseIndex(temp0_, temp1_, js::jit::TimesOne), temp0_);
- masm_.branchTest32(Assembler::NonZero, temp0_, temp0_,
- LabelOrBacktrack(on_bit_set));
-
- // Transfer ownership of |rawTable| to the |tables_| vector.
- AddTable(std::move(rawTable));
-}
-
-void SMRegExpMacroAssembler::CheckNotBackReferenceImpl(int start_reg,
- bool read_backward,
- Label* on_no_match,
- bool ignore_case) {
- js::jit::Label fallthrough;
-
- // Captures are stored as a sequential pair of registers.
- // Find the length of the back-referenced capture and load the
- // capture's start index into current_character_.
- masm_.loadPtr(register_location(start_reg), // index of start
- current_character_);
- masm_.loadPtr(register_location(start_reg + 1), temp0_); // index of end
- masm_.subPtr(current_character_, temp0_); // length of capture
-
- // Capture registers are either both set or both cleared.
- // If the capture length is zero, then the capture is either empty or cleared.
- // Fall through in both cases.
- masm_.branchPtr(Assembler::Equal, temp0_, ImmWord(0), &fallthrough);
-
- // Check that there are sufficient characters left in the input.
- if (read_backward) {
- // If start + len > current, there isn't enough room for a
- // lookbehind backreference.
- masm_.loadPtr(inputStart(), temp1_);
- masm_.addPtr(temp0_, temp1_);
- masm_.branchPtr(Assembler::GreaterThan, temp1_, current_position_,
- LabelOrBacktrack(on_no_match));
- } else {
- // current_position_ is the negative offset from the end.
- // If current + len > 0, there isn't enough room for a backreference.
- masm_.movePtr(current_position_, temp1_);
- masm_.addPtr(temp0_, temp1_);
- masm_.branchPtr(Assembler::GreaterThan, temp1_, ImmWord(0),
- LabelOrBacktrack(on_no_match));
- }
-
- if (mode_ == UC16 && ignore_case) {
- // We call a helper function for case-insensitive non-latin1 strings.
-
- // Save volatile regs. temp1_ and temp2_ don't need to be saved.
- LiveGeneralRegisterSet volatileRegs(GeneralRegisterSet::Volatile());
- volatileRegs.takeUnchecked(temp1_);
- volatileRegs.takeUnchecked(temp2_);
- masm_.PushRegsInMask(volatileRegs);
-
- // Parameters are
- // Address captured - Address of captured substring's start.
- // Address current - Address of current character position.
- // size_t byte_length - length of capture (in bytes)
-
- // Compute |captured|
- masm_.addPtr(input_end_pointer_, current_character_);
-
- // Compute |current|
- masm_.addPtr(input_end_pointer_, current_position_);
- if (read_backward) {
- // Offset by length when matching backwards.
- masm_.subPtr(temp0_, current_position_);
- }
-
- masm_.setupUnalignedABICall(temp1_);
- masm_.passABIArg(current_character_);
- masm_.passABIArg(current_position_);
- masm_.passABIArg(temp0_);
-
- bool unicode = true; // TODO: Fix V8 bug
- if (unicode) {
- uint32_t (*fun)(const char16_t*, const char16_t*, size_t) =
- CaseInsensitiveCompareUCStrings;
- masm_.callWithABI(JS_FUNC_TO_DATA_PTR(void*, fun));
- } else {
- uint32_t (*fun)(const char16_t*, const char16_t*, size_t) =
- CaseInsensitiveCompareStrings;
- masm_.callWithABI(JS_FUNC_TO_DATA_PTR(void*, fun));
- }
- masm_.storeCallInt32Result(temp1_);
- masm_.PopRegsInMask(volatileRegs);
- masm_.branchTest32(Assembler::Zero, temp1_, temp1_,
- LabelOrBacktrack(on_no_match));
-
- // On success, advance position by length of capture
- if (read_backward) {
- masm_.subPtr(temp0_, current_position_);
- } else {
- masm_.addPtr(temp0_, current_position_);
- }
-
- masm_.bind(&fallthrough);
- return;
- }
-
- // We will be modifying current_position_. Save it in case the match fails.
- masm_.push(current_position_);
-
- // Compute start of capture string
- masm_.addPtr(input_end_pointer_, current_character_);
-
- // Compute start of match string
- masm_.addPtr(input_end_pointer_, current_position_);
- if (read_backward) {
- // Offset by length when matching backwards.
- masm_.subPtr(temp0_, current_position_);
- }
-
- // Compute end of match string
- masm_.addPtr(current_position_, temp0_);
-
- js::jit::Label success;
- js::jit::Label fail;
- js::jit::Label loop;
- masm_.bind(&loop);
-
- // Load next character from each string.
- if (mode_ == LATIN1) {
- masm_.load8ZeroExtend(Address(current_character_, 0), temp1_);
- masm_.load8ZeroExtend(Address(current_position_, 0), temp2_);
- } else {
- masm_.load16ZeroExtend(Address(current_character_, 0), temp1_);
- masm_.load16ZeroExtend(Address(current_position_, 0), temp2_);
- }
-
- if (ignore_case) {
- MOZ_ASSERT(mode_ == LATIN1);
- // Try exact match.
- js::jit::Label loop_increment;
- masm_.branch32(Assembler::Equal, temp1_, temp2_, &loop_increment);
-
- // Mismatch. Try case-insensitive match.
- // Force the match character to lower case (by setting bit 0x20)
- // then check to see if it is a letter.
- js::jit::Label convert_capture;
- masm_.or32(Imm32(0x20), temp1_);
-
- // Check if it is in [a,z].
- masm_.computeEffectiveAddress(Address(temp1_, -'a'), temp2_);
- masm_.branch32(Assembler::BelowOrEqual, temp2_, Imm32('z' - 'a'),
- &convert_capture);
- // Check for values in range [224,254].
- // Exclude 247 (U+00F7 DIVISION SIGN).
- masm_.sub32(Imm32(224 - 'a'), temp2_);
- masm_.branch32(Assembler::Above, temp2_, Imm32(254 - 224), &fail);
- masm_.branch32(Assembler::Equal, temp2_, Imm32(247 - 224), &fail);
-
- // Match character is lower case. Convert capture character
- // to lower case and compare.
- masm_.bind(&convert_capture);
- masm_.load8ZeroExtend(Address(current_character_, 0), temp2_);
- masm_.or32(Imm32(0x20), temp2_);
- masm_.branch32(Assembler::NotEqual, temp1_, temp2_, &fail);
-
- masm_.bind(&loop_increment);
- } else {
- // Fail if characters do not match.
- masm_.branch32(Assembler::NotEqual, temp1_, temp2_, &fail);
- }
-
- // Increment pointers into match and capture strings.
- masm_.addPtr(Imm32(char_size()), current_character_);
- masm_.addPtr(Imm32(char_size()), current_position_);
-
- // Loop if we have not reached the end of the match string.
- masm_.branchPtr(Assembler::Below, current_position_, temp0_, &loop);
- masm_.jump(&success);
-
- // If we fail, restore current_position_ and branch.
- masm_.bind(&fail);
- masm_.pop(current_position_);
- JumpOrBacktrack(on_no_match);
-
- masm_.bind(&success);
-
- // current_position_ is a pointer. Convert it back to an offset.
- masm_.subPtr(input_end_pointer_, current_position_);
- if (read_backward) {
- // Subtract match length if we matched backward
- masm_.addPtr(register_location(start_reg), current_position_);
- masm_.subPtr(register_location(start_reg + 1), current_position_);
- }
-
- // Drop saved value of current_position_
- masm_.addToStackPtr(Imm32(sizeof(uintptr_t)));
-
- masm_.bind(&fallthrough);
-}
-
-// Branch if a back-reference does not match a previous capture.
-void SMRegExpMacroAssembler::CheckNotBackReference(int start_reg,
- bool read_backward,
- Label* on_no_match) {
- CheckNotBackReferenceImpl(start_reg, read_backward, on_no_match,
- /*ignore_case = */ false);
-}
-
-void SMRegExpMacroAssembler::CheckNotBackReferenceIgnoreCase(
- int start_reg, bool read_backward, Label* on_no_match) {
- CheckNotBackReferenceImpl(start_reg, read_backward, on_no_match,
- /*ignore_case = */ true);
-}
-
-// Checks whether the given offset from the current position is
-// inside the input string.
-void SMRegExpMacroAssembler::CheckPosition(int cp_offset,
- Label* on_outside_input) {
- // Note: current_position_ is a (negative) byte offset relative to
- // the end of the input string.
- if (cp_offset >= 0) {
- // end + current + offset >= end
- // <=> current + offset >= 0
- // <=> current >= -offset
- masm_.branchPtr(Assembler::GreaterThanOrEqual, current_position_,
- ImmWord(-cp_offset * char_size()),
- LabelOrBacktrack(on_outside_input));
- } else {
- // Compute offset position
- masm_.computeEffectiveAddress(
- Address(current_position_, cp_offset * char_size()), temp0_);
-
- // Compare to start of input.
- masm_.branchPtr(Assembler::GreaterThanOrEqual, inputStart(), temp0_,
- LabelOrBacktrack(on_outside_input));
- }
-}
-
-// This function attempts to generate special case code for character classes.
-// Returns true if a special case is generated.
-// Otherwise returns false and generates no code.
-bool SMRegExpMacroAssembler::CheckSpecialCharacterClass(uc16 type,
- Label* on_no_match) {
- js::jit::Label* no_match = LabelOrBacktrack(on_no_match);
-
- // Note: throughout this function, range checks (c in [min, max])
- // are implemented by an unsigned (c - min) <= (max - min) check.
- switch (type) {
- case 's': {
- // Match space-characters
- if (mode_ != LATIN1) {
- return false;
- }
- js::jit::Label success;
- // One byte space characters are ' ', '\t'..'\r', and '\u00a0' (NBSP).
-
- // Check ' '
- masm_.branch32(Assembler::Equal, current_character_, Imm32(' '),
- &success);
-
- // Check '\t'..'\r'
- masm_.computeEffectiveAddress(Address(current_character_, -'\t'),
- temp0_);
- masm_.branch32(Assembler::BelowOrEqual, temp0_, Imm32('\r' - '\t'),
- &success);
-
- // Check \u00a0.
- masm_.branch32(Assembler::NotEqual, temp0_, Imm32(0x00a0 - '\t'),
- no_match);
-
- masm_.bind(&success);
- return true;
- }
- case 'S':
- // The emitted code for generic character classes is good enough.
- return false;
- case 'd':
- // Match latin1 digits ('0'-'9')
- masm_.computeEffectiveAddress(Address(current_character_, -'0'), temp0_);
- masm_.branch32(Assembler::Above, temp0_, Imm32('9' - '0'), no_match);
- return true;
- case 'D':
- // Match anything except latin1 digits ('0'-'9')
- masm_.computeEffectiveAddress(Address(current_character_, -'0'), temp0_);
- masm_.branch32(Assembler::BelowOrEqual, temp0_, Imm32('9' - '0'),
- no_match);
- return true;
- case '.':
- // Match non-newlines. This excludes '\n' (0x0a), '\r' (0x0d),
- // U+2028 LINE SEPARATOR, and U+2029 PARAGRAPH SEPARATOR.
- // See https://tc39.es/ecma262/#prod-LineTerminator
-
- // To test for 0x0a and 0x0d efficiently, we XOR the input with 1.
- // This converts 0x0a to 0x0b, and 0x0d to 0x0c, allowing us to
- // test for the contiguous range 0x0b..0x0c.
- masm_.move32(current_character_, temp0_);
- masm_.xor32(Imm32(0x01), temp0_);
- masm_.sub32(Imm32(0x0b), temp0_);
- masm_.branch32(Assembler::BelowOrEqual, temp0_, Imm32(0x0c - 0x0b),
- no_match);
-
- if (mode_ == UC16) {
- // Compare original value to 0x2028 and 0x2029, using the already
- // computed (current_char ^ 0x01 - 0x0b). I.e., check for
- // 0x201d (0x2028 - 0x0b) or 0x201e.
- masm_.sub32(Imm32(0x2028 - 0x0b), temp0_);
- masm_.branch32(Assembler::BelowOrEqual, temp0_, Imm32(0x2029 - 0x2028),
- no_match);
- }
- return true;
- case 'w':
- // \w matches the set of 63 characters defined in Runtime Semantics:
- // WordCharacters. We use a static lookup table, which is defined in
- // regexp-macro-assembler.cc.
- // Note: if both Unicode and IgnoreCase are true, \w matches a
- // larger set of characters. That case is handled elsewhere.
- if (mode_ != LATIN1) {
- masm_.branch32(Assembler::Above, current_character_, Imm32('z'),
- no_match);
- }
- static_assert(arraysize(word_character_map) > unibrow::Latin1::kMaxChar,
- "regex: arraysize(word_character_map) > unibrow::Latin1::kMaxChar");
- masm_.movePtr(ImmPtr(word_character_map), temp0_);
- masm_.load8ZeroExtend(
- BaseIndex(temp0_, current_character_, js::jit::TimesOne), temp0_);
- masm_.branchTest32(Assembler::Zero, temp0_, temp0_, no_match);
- return true;
- case 'W': {
- // See 'w' above.
- js::jit::Label done;
- if (mode_ != LATIN1) {
- masm_.branch32(Assembler::Above, current_character_, Imm32('z'), &done);
- }
- static_assert(arraysize(word_character_map) > unibrow::Latin1::kMaxChar,
- "regex: arraysize(word_character_map) > unibrow::Latin1::kMaxChar");
- masm_.movePtr(ImmPtr(word_character_map), temp0_);
- masm_.load8ZeroExtend(
- BaseIndex(temp0_, current_character_, js::jit::TimesOne), temp0_);
- masm_.branchTest32(Assembler::NonZero, temp0_, temp0_, no_match);
- if (mode_ != LATIN1) {
- masm_.bind(&done);
- }
- return true;
- }
- ////////////////////////////////////////////////////////////////////////
- // Non-standard classes (with no syntactic shorthand) used internally //
- ////////////////////////////////////////////////////////////////////////
- case '*':
- // Match any character
- return true;
- case 'n':
- // Match newlines. The opposite of '.'. See '.' above.
- masm_.move32(current_character_, temp0_);
- masm_.xor32(Imm32(0x01), temp0_);
- masm_.sub32(Imm32(0x0b), temp0_);
- if (mode_ == LATIN1) {
- masm_.branch32(Assembler::Above, temp0_, Imm32(0x0c - 0x0b), no_match);
- } else {
- MOZ_ASSERT(mode_ == UC16);
- js::jit::Label done;
- masm_.branch32(Assembler::BelowOrEqual, temp0_, Imm32(0x0c - 0x0b),
- &done);
-
- // Compare original value to 0x2028 and 0x2029, using the already
- // computed (current_char ^ 0x01 - 0x0b). I.e., check for
- // 0x201d (0x2028 - 0x0b) or 0x201e.
- masm_.sub32(Imm32(0x2028 - 0x0b), temp0_);
- masm_.branch32(Assembler::Above, temp0_, Imm32(0x2029 - 0x2028),
- no_match);
- masm_.bind(&done);
- }
- return true;
-
- // No custom implementation
- default:
- return false;
- }
-}
-
-void SMRegExpMacroAssembler::Fail() {
- masm_.movePtr(ImmWord(js::RegExpRunStatus_Success_NotFound), temp0_);
- masm_.jump(&exit_label_);
-}
-
-void SMRegExpMacroAssembler::GoTo(Label* to) {
- masm_.jump(LabelOrBacktrack(to));
-}
-
-void SMRegExpMacroAssembler::IfRegisterGE(int reg, int comparand,
- Label* if_ge) {
- masm_.branchPtr(Assembler::GreaterThanOrEqual, register_location(reg),
- ImmWord(comparand), LabelOrBacktrack(if_ge));
-}
-
-void SMRegExpMacroAssembler::IfRegisterLT(int reg, int comparand,
- Label* if_lt) {
- masm_.branchPtr(Assembler::LessThan, register_location(reg),
- ImmWord(comparand), LabelOrBacktrack(if_lt));
-}
-
-void SMRegExpMacroAssembler::IfRegisterEqPos(int reg, Label* if_eq) {
- masm_.branchPtr(Assembler::Equal, register_location(reg), current_position_,
- LabelOrBacktrack(if_eq));
-}
-
-// This is a word-for-word identical copy of the V8 code, which is
-// duplicated in at least nine different places in V8 (one per
-// supported architecture) with no differences outside of comments and
-// formatting. It should be hoisted into the superclass. Once that is
-// done upstream, this version can be deleted.
-void SMRegExpMacroAssembler::LoadCurrentCharacterImpl(int cp_offset,
- Label* on_end_of_input,
- bool check_bounds,
- int characters,
- int eats_at_least) {
- // It's possible to preload a small number of characters when each success
- // path requires a large number of characters, but not the reverse.
- MOZ_ASSERT(eats_at_least >= characters);
- MOZ_ASSERT(cp_offset < (1 << 30)); // Be sane! (And ensure negation works)
-
- if (check_bounds) {
- if (cp_offset >= 0) {
- CheckPosition(cp_offset + eats_at_least - 1, on_end_of_input);
- } else {
- CheckPosition(cp_offset, on_end_of_input);
- }
- }
- LoadCurrentCharacterUnchecked(cp_offset, characters);
-}
-
-// Load the character (or characters) at the specified offset from the
-// current position. Zero-extend to 32 bits.
-void SMRegExpMacroAssembler::LoadCurrentCharacterUnchecked(int cp_offset,
- int characters) {
- BaseIndex address(input_end_pointer_, current_position_, js::jit::TimesOne,
- cp_offset * char_size());
- if (mode_ == LATIN1) {
- if (characters == 4) {
- masm_.load32(address, current_character_);
- } else if (characters == 2) {
- masm_.load16ZeroExtend(address, current_character_);
- } else {
- MOZ_ASSERT(characters == 1);
- masm_.load8ZeroExtend(address, current_character_);
- }
- } else {
- MOZ_ASSERT(mode_ == UC16);
- if (characters == 2) {
- masm_.load32(address, current_character_);
- } else {
- MOZ_ASSERT(characters == 1);
- masm_.load16ZeroExtend(address, current_character_);
- }
- }
-}
-
-void SMRegExpMacroAssembler::PopCurrentPosition() { Pop(current_position_); }
-
-void SMRegExpMacroAssembler::PopRegister(int register_index) {
- Pop(temp0_);
- masm_.storePtr(temp0_, register_location(register_index));
-}
-
-void SMRegExpMacroAssembler::PushBacktrack(Label* label) {
- MOZ_ASSERT(!label->is_bound());
- MOZ_ASSERT(!label->patchOffset_.bound());
- label->patchOffset_ = masm_.movWithPatch(ImmPtr(nullptr), temp0_);
- MOZ_ASSERT(label->patchOffset_.bound());
-
- Push(temp0_);
-
- CheckBacktrackStackLimit();
-}
-
-void SMRegExpMacroAssembler::PushCurrentPosition() { Push(current_position_); }
-
-void SMRegExpMacroAssembler::PushRegister(int register_index,
- StackCheckFlag check_stack_limit) {
- masm_.loadPtr(register_location(register_index), temp0_);
- Push(temp0_);
- if (check_stack_limit) {
- CheckBacktrackStackLimit();
- }
-}
-
-void SMRegExpMacroAssembler::ReadCurrentPositionFromRegister(int reg) {
- masm_.loadPtr(register_location(reg), current_position_);
-}
-
-void SMRegExpMacroAssembler::WriteCurrentPositionToRegister(int reg,
- int cp_offset) {
- if (cp_offset == 0) {
- masm_.storePtr(current_position_, register_location(reg));
- } else {
- Address addr(current_position_, cp_offset * char_size());
- masm_.computeEffectiveAddress(addr, temp0_);
- masm_.storePtr(temp0_, register_location(reg));
- }
-}
-
-// Note: The backtrack stack pointer is stored in a register as an
-// offset from the stack top, not as a bare pointer, so that it is not
-// corrupted if the backtrack stack grows (and therefore moves).
-void SMRegExpMacroAssembler::ReadStackPointerFromRegister(int reg) {
- masm_.loadPtr(register_location(reg), backtrack_stack_pointer_);
- masm_.addPtr(backtrackStackBase(), backtrack_stack_pointer_);
-}
-void SMRegExpMacroAssembler::WriteStackPointerToRegister(int reg) {
- masm_.movePtr(backtrack_stack_pointer_, temp0_);
- masm_.subPtr(backtrackStackBase(), temp0_);
- masm_.storePtr(temp0_, register_location(reg));
-}
-
-// When matching a regexp that is anchored at the end, this operation
-// is used to try skipping the beginning of long strings. If the
-// maximum length of a match is less than the length of the string, we
-// can skip the initial len - max_len bytes.
-void SMRegExpMacroAssembler::SetCurrentPositionFromEnd(int by) {
- js::jit::Label after_position;
- masm_.branchPtr(Assembler::GreaterThanOrEqual, current_position_,
- ImmWord(-by * char_size()), &after_position);
- masm_.movePtr(ImmWord(-by * char_size()), current_position_);
-
- // On RegExp code entry (where this operation is used), the character before
- // the current position is expected to be already loaded.
- // We have advanced the position, so it's safe to read backwards.
- LoadCurrentCharacterUnchecked(-1, 1);
- masm_.bind(&after_position);
-}
-
-void SMRegExpMacroAssembler::SetRegister(int register_index, int to) {
- MOZ_ASSERT(register_index >= num_capture_registers_);
- masm_.storePtr(ImmWord(to), register_location(register_index));
-}
-
-// Returns true if a regexp match can be restarted (aka the regexp is global).
-// The return value is not used anywhere, but we implement it to be safe.
-bool SMRegExpMacroAssembler::Succeed() {
- masm_.jump(&success_label_);
- return global();
-}
-
-// Capture registers are initialized to input[-1]
-void SMRegExpMacroAssembler::ClearRegisters(int reg_from, int reg_to) {
- MOZ_ASSERT(reg_from <= reg_to);
- masm_.loadPtr(inputStart(), temp0_);
- masm_.subPtr(Imm32(char_size()), temp0_);
- for (int reg = reg_from; reg <= reg_to; reg++) {
- masm_.storePtr(temp0_, register_location(reg));
- }
-}
-
-void SMRegExpMacroAssembler::Push(Register source) {
- MOZ_ASSERT(source != backtrack_stack_pointer_);
-
- masm_.subPtr(Imm32(sizeof(void*)), backtrack_stack_pointer_);
- masm_.storePtr(source, Address(backtrack_stack_pointer_, 0));
-}
-
-void SMRegExpMacroAssembler::Pop(Register target) {
- MOZ_ASSERT(target != backtrack_stack_pointer_);
-
- masm_.loadPtr(Address(backtrack_stack_pointer_, 0), target);
- masm_.addPtr(Imm32(sizeof(void*)), backtrack_stack_pointer_);
-}
-
-void SMRegExpMacroAssembler::JumpOrBacktrack(Label* to) {
- if (to) {
- masm_.jump(to->inner());
- } else {
- Backtrack();
- }
-}
-
-// Generate a quick inline test for backtrack stack overflow.
-// If the test fails, call an OOL handler to try growing the stack.
-void SMRegExpMacroAssembler::CheckBacktrackStackLimit() {
- js::jit::Label no_stack_overflow;
- masm_.branchPtr(
- Assembler::BelowOrEqual,
- AbsoluteAddress(isolate()->regexp_stack()->limit_address_address()),
- backtrack_stack_pointer_, &no_stack_overflow);
-
- masm_.call(&stack_overflow_label_);
-
- // Exit with an exception if the call failed
- masm_.branchTest32(Assembler::Zero, temp0_, temp0_,
- &exit_with_exception_label_);
-
- masm_.bind(&no_stack_overflow);
-}
-
-// This is used to sneak an OOM through the V8 layer.
-static Handle<HeapObject> DummyCode() {
- return Handle<HeapObject>::fromHandleValue(JS::UndefinedHandleValue);
-}
-
-// Finalize code. This is called last, so that we know how many
-// registers we need.
-Handle<HeapObject> SMRegExpMacroAssembler::GetCode(Handle<String> source) {
- if (!cx_->compartment()->ensureJitCompartmentExists(cx_)) {
- return DummyCode();
- }
-
- masm_.bind(&entry_label_);
-
- createStackFrame();
- initFrameAndRegs();
-
- masm_.jump(&start_label_);
-
- successHandler();
- exitHandler();
- backtrackHandler();
- stackOverflowHandler();
-
- Linker linker(masm_);
- JitCode* code = linker.newCode<NoGC>(cx_, REGEXP_CODE);
- if (!code) {
- ReportOutOfMemory(cx_);
- return DummyCode();
- }
-
- for (LabelPatch& lp : labelPatches_) {
- Assembler::PatchDataWithValueCheck(CodeLocationLabel(code, lp.patchOffset_),
- ImmPtr(code->raw() + lp.labelOffset_),
- ImmPtr(nullptr));
- }
-
- return Handle<HeapObject>(JS::PrivateGCThingValue(code), isolate());
-}
-
-/*
- * The stack will have the following structure:
- * sp-> - FrameData
- * - inputStart
- * - backtrack stack base
- * - matches
- * - numMatches
- * - Registers
- * - Capture positions
- * - Scratch registers
- * --- frame alignment ---
- * - Saved register area
- * - Return address
- */
-void SMRegExpMacroAssembler::createStackFrame() {
-#ifdef JS_CODEGEN_ARM64
- // ARM64 communicates stack address via SP, but uses a pseudo-sp (PSP) for
- // addressing. The register we use for PSP may however also be used by
- // calling code, and it is nonvolatile, so save it. Do this as a special
- // case first because the generic save/restore code needs the PSP to be
- // initialized already.
- MOZ_ASSERT(js::jit::PseudoStackPointer64.Is(masm_.GetStackPointer64()));
- masm_.Str(js::jit::PseudoStackPointer64,
- vixl::MemOperand(js::jit::sp, -16, vixl::PreIndex));
-
- // Initialize the PSP from the SP.
- masm_.initPseudoStackPtr();
-#endif
-
- // Push non-volatile registers which might be modified by jitcode.
- size_t pushedNonVolatileRegisters = 0;
- for (GeneralRegisterForwardIterator iter(savedRegisters_); iter.more();
- ++iter) {
- masm_.Push(*iter);
- pushedNonVolatileRegisters++;
- }
-
- // The pointer to InputOutputData is passed as the first argument.
- // On x86 we have to load it off the stack into temp0_.
- // On other platforms it is already in a register.
-#ifdef JS_CODEGEN_X86
- Address ioDataAddr(masm_.getStackPointer(),
- (pushedNonVolatileRegisters + 1) * sizeof(void*));
- masm_.loadPtr(ioDataAddr, temp0_);
-#else
- if (js::jit::IntArgReg0 != temp0_) {
- masm_.movePtr(js::jit::IntArgReg0, temp0_);
- }
-#endif
-
- // Start a new stack frame.
- size_t frameBytes = sizeof(FrameData) + num_registers_ * sizeof(void*);
- frameSize_ = js::jit::StackDecrementForCall(js::jit::ABIStackAlignment,
- masm_.framePushed(), frameBytes);
- masm_.reserveStack(frameSize_);
- masm_.checkStackAlignment();
-
- // Check if we have space on the stack. Use the *NoInterrupt stack limit to
- // avoid failing repeatedly when the regex code is called from Ion JIT code.
- // (See bug 1208819)
- js::jit::Label stack_ok;
- AbsoluteAddress limit_addr(cx_->addressOfJitStackLimitNoInterrupt());
- masm_.branchStackPtrRhs(Assembler::Below, limit_addr, &stack_ok);
-
- // There is not enough space on the stack. Exit with an exception.
- masm_.movePtr(ImmWord(js::RegExpRunStatus_Error), temp0_);
- masm_.jump(&exit_label_);
-
- masm_.bind(&stack_ok);
-}
-
-void SMRegExpMacroAssembler::initFrameAndRegs() {
- // At this point, an uninitialized stack frame has been created,
- // and the address of the InputOutputData is in temp0_.
- Register ioDataReg = temp0_;
-
- Register matchesReg = temp1_;
- masm_.loadPtr(Address(ioDataReg, offsetof(InputOutputData, matches)),
- matchesReg);
-
- // Initialize output registers
- masm_.loadPtr(Address(matchesReg, MatchPairs::offsetOfPairs()), temp2_);
- masm_.storePtr(temp2_, matches());
- masm_.load32(Address(matchesReg, MatchPairs::offsetOfPairCount()), temp2_);
- masm_.store32(temp2_, numMatches());
-
-#ifdef DEBUG
- // Bounds-check numMatches.
- js::jit::Label enoughRegisters;
- masm_.branchPtr(Assembler::GreaterThanOrEqual, temp2_,
- ImmWord(num_capture_registers_ / 2), &enoughRegisters);
- masm_.assumeUnreachable("Not enough output pairs for RegExp");
- masm_.bind(&enoughRegisters);
-#endif
-
- // Load input start pointer.
- masm_.loadPtr(Address(ioDataReg, offsetof(InputOutputData, inputStart)),
- current_position_);
-
- // Load input end pointer
- masm_.loadPtr(Address(ioDataReg, offsetof(InputOutputData, inputEnd)),
- input_end_pointer_);
-
- // Set up input position to be negative offset from string end.
- masm_.subPtr(input_end_pointer_, current_position_);
-
- // Store inputStart
- masm_.storePtr(current_position_, inputStart());
-
- // Load start index
- Register startIndexReg = temp1_;
- masm_.loadPtr(Address(ioDataReg, offsetof(InputOutputData, startIndex)),
- startIndexReg);
- masm_.computeEffectiveAddress(
- BaseIndex(current_position_, startIndexReg, factor()), current_position_);
-
- // Initialize current_character_.
- // Load newline if index is at start, or previous character otherwise.
- js::jit::Label start_regexp;
- js::jit::Label load_previous_character;
- masm_.branchPtr(Assembler::NotEqual, startIndexReg, ImmWord(0),
- &load_previous_character);
- masm_.movePtr(ImmWord('\n'), current_character_);
- masm_.jump(&start_regexp);
-
- masm_.bind(&load_previous_character);
- LoadCurrentCharacterUnchecked(-1, 1);
- masm_.bind(&start_regexp);
-
- // Initialize captured registers with inputStart - 1
- MOZ_ASSERT(num_capture_registers_ > 0);
- Register inputStartMinusOneReg = temp2_;
- masm_.loadPtr(inputStart(), inputStartMinusOneReg);
- masm_.subPtr(Imm32(char_size()), inputStartMinusOneReg);
- if (num_capture_registers_ > 8) {
- masm_.movePtr(ImmWord(register_offset(0)), temp1_);
- js::jit::Label init_loop;
- masm_.bind(&init_loop);
- masm_.storePtr(inputStartMinusOneReg, BaseIndex(masm_.getStackPointer(),
- temp1_, js::jit::TimesOne));
- masm_.addPtr(ImmWord(sizeof(void*)), temp1_);
- masm_.branchPtr(Assembler::LessThan, temp1_,
- ImmWord(register_offset(num_capture_registers_)),
- &init_loop);
- } else {
- // Unroll the loop
- for (int i = 0; i < num_capture_registers_; i++) {
- masm_.storePtr(inputStartMinusOneReg, register_location(i));
- }
- }
-
- // Initialize backtrack stack pointer
- masm_.loadPtr(AbsoluteAddress(isolate()->top_of_regexp_stack()),
- backtrack_stack_pointer_);
- masm_.storePtr(backtrack_stack_pointer_, backtrackStackBase());
-}
-
-void SMRegExpMacroAssembler::successHandler() {
- MOZ_ASSERT(success_label_.used());
- masm_.bind(&success_label_);
-
- // Copy captures to the MatchPairs pointed to by the InputOutputData.
- // Captures are stored as positions, which are negative byte offsets
- // from the end of the string. We must convert them to actual
- // indices.
- //
- // Index: [ 0 ][ 1 ][ 2 ][ 3 ][ 4 ][ 5 ][END]
- // Pos (1-byte): [-6 ][-5 ][-4 ][-3 ][-2 ][-1 ][ 0 ] // IS = -6
- // Pos (2-byte): [-12][-10][-8 ][-6 ][-4 ][-2 ][ 0 ] // IS = -12
- //
- // To convert a position to an index, we subtract InputStart, and
- // divide the result by char_size.
- Register matchesReg = temp1_;
- masm_.loadPtr(matches(), matchesReg);
-
- Register inputStartReg = temp2_;
- masm_.loadPtr(inputStart(), inputStartReg);
-
- for (int i = 0; i < num_capture_registers_; i++) {
- masm_.loadPtr(register_location(i), temp0_);
- masm_.subPtr(inputStartReg, temp0_);
- if (mode_ == UC16) {
- masm_.rshiftPtrArithmetic(Imm32(1), temp0_);
- }
- masm_.store32(temp0_, Address(matchesReg, i * sizeof(int32_t)));
- }
-
- masm_.movePtr(ImmWord(js::RegExpRunStatus_Success), temp0_);
- // This falls through to the exit handler.
-}
-
-void SMRegExpMacroAssembler::exitHandler() {
- masm_.bind(&exit_label_);
-
- if (temp0_ != js::jit::ReturnReg) {
- masm_.movePtr(temp0_, js::jit::ReturnReg);
- }
-
- masm_.freeStack(frameSize_);
-
- // Restore registers which were saved on entry
- for (GeneralRegisterBackwardIterator iter(savedRegisters_); iter.more();
- ++iter) {
- masm_.Pop(*iter);
- }
-
-#ifdef JS_CODEGEN_ARM64
- // Now restore the value that was in the PSP register on entry, and return.
-
- // Obtain the correct SP from the PSP.
- masm_.Mov(js::jit::sp, js::jit::PseudoStackPointer64);
-
- // Restore the saved value of the PSP register, this value is whatever the
- // caller had saved in it, not any actual SP value, and it must not be
- // overwritten subsequently.
- masm_.Ldr(js::jit::PseudoStackPointer64,
- vixl::MemOperand(js::jit::sp, 16, vixl::PostIndex));
-
- // Perform a plain Ret(), as abiret() will move SP <- PSP and that is wrong.
- masm_.Ret(vixl::lr);
-#else
- masm_.abiret();
-#endif
-
- if (exit_with_exception_label_.used()) {
- masm_.bind(&exit_with_exception_label_);
-
- // Exit with an error result to signal thrown exception
- masm_.movePtr(ImmWord(js::RegExpRunStatus_Error), temp0_);
- masm_.jump(&exit_label_);
- }
-}
-
-void SMRegExpMacroAssembler::backtrackHandler() {
- if (!backtrack_label_.used()) {
- return;
- }
- masm_.bind(&backtrack_label_);
- Backtrack();
-}
-
-void SMRegExpMacroAssembler::stackOverflowHandler() {
- if (!stack_overflow_label_.used()) {
- return;
- }
-
- // Called if the backtrack-stack limit has been hit.
- // NOTE: depending on architecture, the call may have
- // changed the stack pointer. We adjust for that below.
- masm_.bind(&stack_overflow_label_);
-
- // Load argument
- masm_.movePtr(ImmPtr(isolate()->regexp_stack()), temp1_);
-
- // Save registers before calling C function
- LiveGeneralRegisterSet volatileRegs(GeneralRegisterSet::Volatile());
-
-#ifdef JS_USE_LINK_REGISTER
- masm.pushReturnAddress();
-#endif
-
- // Adjust for the return address on the stack.
- size_t frameOffset = sizeof(void*);
-
- volatileRegs.takeUnchecked(temp0_);
- volatileRegs.takeUnchecked(temp1_);
- masm_.PushRegsInMask(volatileRegs);
-
- masm_.setupUnalignedABICall(temp0_);
- masm_.passABIArg(temp1_);
- masm_.callWithABI(JS_FUNC_TO_DATA_PTR(void*, GrowBacktrackStack));
- masm_.storeCallBoolResult(temp0_);
-
- masm_.PopRegsInMask(volatileRegs);
-
- // If GrowBacktrackStack returned false, we have failed to grow the
- // stack, and must exit with a stack-overflow exception. Do this in
- // the caller so that the stack is adjusted by our return instruction.
- js::jit::Label overflow_return;
- masm_.branchTest32(Assembler::Zero, temp0_, temp0_, &overflow_return);
-
- // Otherwise, store the new backtrack stack base and recompute the new
- // top of the stack.
- Address bsbAddress(masm_.getStackPointer(),
- offsetof(FrameData, backtrackStackBase) + frameOffset);
- masm_.subPtr(bsbAddress, backtrack_stack_pointer_);
-
- masm_.loadPtr(AbsoluteAddress(isolate()->top_of_regexp_stack()), temp1_);
- masm_.storePtr(temp1_, bsbAddress);
- masm_.addPtr(temp1_, backtrack_stack_pointer_);
-
- // Resume execution in calling code.
- masm_.bind(&overflow_return);
- masm_.ret();
-}
-
-// This is only used by tracing code.
-// The return value doesn't matter.
-RegExpMacroAssembler::IrregexpImplementation
-SMRegExpMacroAssembler::Implementation() {
- return kBytecodeImplementation;
-}
-
-/*static */
-uint32_t SMRegExpMacroAssembler::CaseInsensitiveCompareStrings(
- const char16_t* substring1, const char16_t* substring2, size_t byteLength) {
- JS::AutoCheckCannotGC nogc;
-
- MOZ_ASSERT(byteLength % sizeof(char16_t) == 0);
- size_t length = byteLength / sizeof(char16_t);
-
- for (size_t i = 0; i < length; i++) {
- char16_t c1 = substring1[i];
- char16_t c2 = substring2[i];
- if (c1 != c2) {
- c1 = js::unicode::ToUpperCase(c1);
- c2 = js::unicode::ToUpperCase(c2);
- if (c1 != c2) {
- return 0;
- }
- }
- }
-
- return 1;
-}
-
-/*static */
-uint32_t SMRegExpMacroAssembler::CaseInsensitiveCompareUCStrings(
- const char16_t* substring1, const char16_t* substring2, size_t byteLength) {
- JS::AutoCheckCannotGC nogc;
-
- MOZ_ASSERT(byteLength % sizeof(char16_t) == 0);
- size_t length = byteLength / sizeof(char16_t);
-
- for (size_t i = 0; i < length; i++) {
- char16_t c1 = substring1[i];
- char16_t c2 = substring2[i];
- if (c1 != c2) {
- c1 = js::unicode::FoldCase(c1);
- c2 = js::unicode::FoldCase(c2);
- if (c1 != c2) {
- return 0;
- }
- }
- }
-
- return 1;
-}
-
-/* static */
-bool SMRegExpMacroAssembler::GrowBacktrackStack(RegExpStack* regexp_stack) {
- JS::AutoCheckCannotGC nogc;
- size_t size = regexp_stack->stack_capacity();
- return !!regexp_stack->EnsureCapacity(size * 2);
-}
-
-} // namespace internal
-} // namespace v8
diff --git a/js/src/new-regexp/regexp-nodes.h b/js/src/new-regexp/regexp-nodes.h
deleted file mode 100644
index 099687c25..000000000
--- a/js/src/new-regexp/regexp-nodes.h
+++ /dev/null
@@ -1,750 +0,0 @@
-// Copyright 2019 the V8 project authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#ifndef V8_REGEXP_REGEXP_NODES_H_
-#define V8_REGEXP_REGEXP_NODES_H_
-
-#include "new-regexp/regexp-macro-assembler.h"
-
-namespace v8 {
-namespace internal {
-
-class AlternativeGenerationList;
-class BoyerMooreLookahead;
-class GreedyLoopState;
-class Label;
-class NodeVisitor;
-class QuickCheckDetails;
-class RegExpCompiler;
-class Trace;
-struct PreloadState;
-class ChoiceNode;
-
-#define FOR_EACH_NODE_TYPE(VISIT) \
- VISIT(End) \
- VISIT(Action) \
- VISIT(Choice) \
- VISIT(LoopChoice) \
- VISIT(NegativeLookaroundChoice) \
- VISIT(BackReference) \
- VISIT(Assertion) \
- VISIT(Text)
-
-struct NodeInfo final {
- NodeInfo()
- : being_analyzed(false),
- been_analyzed(false),
- follows_word_interest(false),
- follows_newline_interest(false),
- follows_start_interest(false),
- at_end(false),
- visited(false),
- replacement_calculated(false) {}
-
- // Returns true if the interests and assumptions of this node
- // matches the given one.
- bool Matches(NodeInfo* that) {
- return (at_end == that->at_end) &&
- (follows_word_interest == that->follows_word_interest) &&
- (follows_newline_interest == that->follows_newline_interest) &&
- (follows_start_interest == that->follows_start_interest);
- }
-
- // Updates the interests of this node given the interests of the
- // node preceding it.
- void AddFromPreceding(NodeInfo* that) {
- at_end |= that->at_end;
- follows_word_interest |= that->follows_word_interest;
- follows_newline_interest |= that->follows_newline_interest;
- follows_start_interest |= that->follows_start_interest;
- }
-
- bool HasLookbehind() {
- return follows_word_interest || follows_newline_interest ||
- follows_start_interest;
- }
-
- // Sets the interests of this node to include the interests of the
- // following node.
- void AddFromFollowing(NodeInfo* that) {
- follows_word_interest |= that->follows_word_interest;
- follows_newline_interest |= that->follows_newline_interest;
- follows_start_interest |= that->follows_start_interest;
- }
-
- void ResetCompilationState() {
- being_analyzed = false;
- been_analyzed = false;
- }
-
- bool being_analyzed : 1;
- bool been_analyzed : 1;
-
- // These bits are set of this node has to know what the preceding
- // character was.
- bool follows_word_interest : 1;
- bool follows_newline_interest : 1;
- bool follows_start_interest : 1;
-
- bool at_end : 1;
- bool visited : 1;
- bool replacement_calculated : 1;
-};
-
-struct EatsAtLeastInfo final {
- EatsAtLeastInfo() : EatsAtLeastInfo(0) {}
- explicit EatsAtLeastInfo(uint8_t eats)
- : eats_at_least_from_possibly_start(eats),
- eats_at_least_from_not_start(eats) {}
- void SetMin(const EatsAtLeastInfo& other) {
- if (other.eats_at_least_from_possibly_start <
- eats_at_least_from_possibly_start) {
- eats_at_least_from_possibly_start =
- other.eats_at_least_from_possibly_start;
- }
- if (other.eats_at_least_from_not_start < eats_at_least_from_not_start) {
- eats_at_least_from_not_start = other.eats_at_least_from_not_start;
- }
- }
-
- // Any successful match starting from the current node will consume at least
- // this many characters. This does not necessarily mean that there is a
- // possible match with exactly this many characters, but we generally try to
- // get this number as high as possible to allow for early exit on failure.
- uint8_t eats_at_least_from_possibly_start;
-
- // Like eats_at_least_from_possibly_start, but with the additional assumption
- // that start-of-string assertions (^) can't match. This value is greater than
- // or equal to eats_at_least_from_possibly_start.
- uint8_t eats_at_least_from_not_start;
-};
-
-class RegExpNode : public ZoneObject {
- public:
- explicit RegExpNode(Zone* zone)
- : replacement_(nullptr),
- on_work_list_(false),
- trace_count_(0),
- zone_(zone) {
- bm_info_[0] = bm_info_[1] = nullptr;
- }
- virtual ~RegExpNode();
- virtual void Accept(NodeVisitor* visitor) = 0;
- // Generates a goto to this node or actually generates the code at this point.
- virtual void Emit(RegExpCompiler* compiler, Trace* trace) = 0;
- // How many characters must this node consume at a minimum in order to
- // succeed. The not_at_start argument is used to indicate that we know we are
- // not at the start of the input. In this case anchored branches will always
- // fail and can be ignored when determining how many characters are consumed
- // on success. If this node has not been analyzed yet, EatsAtLeast returns 0.
- int EatsAtLeast(bool not_at_start);
- // Returns how many characters this node must consume in order to succeed,
- // given that this is a LoopChoiceNode whose counter register is in a
- // newly-initialized state at the current position in the generated code. For
- // example, consider /a{6,8}/. Absent any extra information, the
- // LoopChoiceNode for the repetition must report that it consumes at least
- // zero characters, because it may have already looped several times. However,
- // with a newly-initialized counter, it can report that it consumes at least
- // six characters.
- virtual EatsAtLeastInfo EatsAtLeastFromLoopEntry();
- // Emits some quick code that checks whether the preloaded characters match.
- // Falls through on certain failure, jumps to the label on possible success.
- // If the node cannot make a quick check it does nothing and returns false.
- bool EmitQuickCheck(RegExpCompiler* compiler, Trace* bounds_check_trace,
- Trace* trace, bool preload_has_checked_bounds,
- Label* on_possible_success,
- QuickCheckDetails* details_return,
- bool fall_through_on_failure, ChoiceNode* predecessor);
- // For a given number of characters this returns a mask and a value. The
- // next n characters are anded with the mask and compared with the value.
- // A comparison failure indicates the node cannot match the next n characters.
- // A comparison success indicates the node may match.
- virtual void GetQuickCheckDetails(QuickCheckDetails* details,
- RegExpCompiler* compiler,
- int characters_filled_in,
- bool not_at_start) = 0;
- // Fills in quick check details for this node, given that this is a
- // LoopChoiceNode whose counter register is in a newly-initialized state at
- // the current position in the generated code. For example, consider /a{6,8}/.
- // Absent any extra information, the LoopChoiceNode for the repetition cannot
- // generate any useful quick check because a match might be the (empty)
- // continuation node. However, with a newly-initialized counter, it can
- // generate a quick check for several 'a' characters at once.
- virtual void GetQuickCheckDetailsFromLoopEntry(QuickCheckDetails* details,
- RegExpCompiler* compiler,
- int characters_filled_in,
- bool not_at_start);
- static const int kNodeIsTooComplexForGreedyLoops = kMinInt;
- virtual int GreedyLoopTextLength() { return kNodeIsTooComplexForGreedyLoops; }
- // Only returns the successor for a text node of length 1 that matches any
- // character and that has no guards on it.
- virtual RegExpNode* GetSuccessorOfOmnivorousTextNode(
- RegExpCompiler* compiler) {
- return nullptr;
- }
-
- // Collects information on the possible code units (mod 128) that can match if
- // we look forward. This is used for a Boyer-Moore-like string searching
- // implementation. TODO(erikcorry): This should share more code with
- // EatsAtLeast, GetQuickCheckDetails. The budget argument is used to limit
- // the number of nodes we are willing to look at in order to create this data.
- static const int kRecursionBudget = 200;
- bool KeepRecursing(RegExpCompiler* compiler);
- virtual void FillInBMInfo(Isolate* isolate, int offset, int budget,
- BoyerMooreLookahead* bm, bool not_at_start) {
- UNREACHABLE();
- }
-
- // If we know that the input is one-byte then there are some nodes that can
- // never match. This method returns a node that can be substituted for
- // itself, or nullptr if the node can never match.
- virtual RegExpNode* FilterOneByte(int depth) { return this; }
- // Helper for FilterOneByte.
- RegExpNode* replacement() {
- DCHECK(info()->replacement_calculated);
- return replacement_;
- }
- RegExpNode* set_replacement(RegExpNode* replacement) {
- info()->replacement_calculated = true;
- replacement_ = replacement;
- return replacement; // For convenience.
- }
-
- // We want to avoid recalculating the lookahead info, so we store it on the
- // node. Only info that is for this node is stored. We can tell that the
- // info is for this node when offset == 0, so the information is calculated
- // relative to this node.
- void SaveBMInfo(BoyerMooreLookahead* bm, bool not_at_start, int offset) {
- if (offset == 0) set_bm_info(not_at_start, bm);
- }
-
- Label* label() { return &label_; }
- // If non-generic code is generated for a node (i.e. the node is not at the
- // start of the trace) then it cannot be reused. This variable sets a limit
- // on how often we allow that to happen before we insist on starting a new
- // trace and generating generic code for a node that can be reused by flushing
- // the deferred actions in the current trace and generating a goto.
- static const int kMaxCopiesCodeGenerated = 10;
-
- bool on_work_list() { return on_work_list_; }
- void set_on_work_list(bool value) { on_work_list_ = value; }
-
- NodeInfo* info() { return &info_; }
- const EatsAtLeastInfo* eats_at_least_info() const { return &eats_at_least_; }
- void set_eats_at_least_info(const EatsAtLeastInfo& eats_at_least) {
- eats_at_least_ = eats_at_least;
- }
-
- BoyerMooreLookahead* bm_info(bool not_at_start) {
- return bm_info_[not_at_start ? 1 : 0];
- }
-
- Zone* zone() const { return zone_; }
-
- protected:
- enum LimitResult { DONE, CONTINUE };
- RegExpNode* replacement_;
-
- LimitResult LimitVersions(RegExpCompiler* compiler, Trace* trace);
-
- void set_bm_info(bool not_at_start, BoyerMooreLookahead* bm) {
- bm_info_[not_at_start ? 1 : 0] = bm;
- }
-
- private:
- static const int kFirstCharBudget = 10;
- Label label_;
- bool on_work_list_;
- NodeInfo info_;
-
- // Saved values for EatsAtLeast results, to avoid recomputation. Filled in
- // during analysis (valid if info_.been_analyzed is true).
- EatsAtLeastInfo eats_at_least_;
-
- // This variable keeps track of how many times code has been generated for
- // this node (in different traces). We don't keep track of where the
- // generated code is located unless the code is generated at the start of
- // a trace, in which case it is generic and can be reused by flushing the
- // deferred operations in the current trace and generating a goto.
- int trace_count_;
- BoyerMooreLookahead* bm_info_[2];
-
- Zone* zone_;
-};
-
-class SeqRegExpNode : public RegExpNode {
- public:
- explicit SeqRegExpNode(RegExpNode* on_success)
- : RegExpNode(on_success->zone()), on_success_(on_success) {}
- RegExpNode* on_success() { return on_success_; }
- void set_on_success(RegExpNode* node) { on_success_ = node; }
- RegExpNode* FilterOneByte(int depth) override;
- void FillInBMInfo(Isolate* isolate, int offset, int budget,
- BoyerMooreLookahead* bm, bool not_at_start) override {
- on_success_->FillInBMInfo(isolate, offset, budget - 1, bm, not_at_start);
- if (offset == 0) set_bm_info(not_at_start, bm);
- }
-
- protected:
- RegExpNode* FilterSuccessor(int depth);
-
- private:
- RegExpNode* on_success_;
-};
-
-class ActionNode : public SeqRegExpNode {
- public:
- enum ActionType {
- SET_REGISTER_FOR_LOOP,
- INCREMENT_REGISTER,
- STORE_POSITION,
- BEGIN_SUBMATCH,
- POSITIVE_SUBMATCH_SUCCESS,
- EMPTY_MATCH_CHECK,
- CLEAR_CAPTURES
- };
- static ActionNode* SetRegisterForLoop(int reg, int val,
- RegExpNode* on_success);
- static ActionNode* IncrementRegister(int reg, RegExpNode* on_success);
- static ActionNode* StorePosition(int reg, bool is_capture,
- RegExpNode* on_success);
- static ActionNode* ClearCaptures(Interval range, RegExpNode* on_success);
- static ActionNode* BeginSubmatch(int stack_pointer_reg, int position_reg,
- RegExpNode* on_success);
- static ActionNode* PositiveSubmatchSuccess(int stack_pointer_reg,
- int restore_reg,
- int clear_capture_count,
- int clear_capture_from,
- RegExpNode* on_success);
- static ActionNode* EmptyMatchCheck(int start_register,
- int repetition_register,
- int repetition_limit,
- RegExpNode* on_success);
- void Accept(NodeVisitor* visitor) override;
- void Emit(RegExpCompiler* compiler, Trace* trace) override;
- void GetQuickCheckDetails(QuickCheckDetails* details,
- RegExpCompiler* compiler, int filled_in,
- bool not_at_start) override;
- void FillInBMInfo(Isolate* isolate, int offset, int budget,
- BoyerMooreLookahead* bm, bool not_at_start) override;
- ActionType action_type() { return action_type_; }
- // TODO(erikcorry): We should allow some action nodes in greedy loops.
- int GreedyLoopTextLength() override {
- return kNodeIsTooComplexForGreedyLoops;
- }
-
- private:
- union {
- struct {
- int reg;
- int value;
- } u_store_register;
- struct {
- int reg;
- } u_increment_register;
- struct {
- int reg;
- bool is_capture;
- } u_position_register;
- struct {
- int stack_pointer_register;
- int current_position_register;
- int clear_register_count;
- int clear_register_from;
- } u_submatch;
- struct {
- int start_register;
- int repetition_register;
- int repetition_limit;
- } u_empty_match_check;
- struct {
- int range_from;
- int range_to;
- } u_clear_captures;
- } data_;
- ActionNode(ActionType action_type, RegExpNode* on_success)
- : SeqRegExpNode(on_success), action_type_(action_type) {}
- ActionType action_type_;
- friend class DotPrinterImpl;
-};
-
-class TextNode : public SeqRegExpNode {
- public:
- TextNode(ZoneList<TextElement>* elms, bool read_backward,
- RegExpNode* on_success)
- : SeqRegExpNode(on_success), elms_(elms), read_backward_(read_backward) {}
- TextNode(RegExpCharacterClass* that, bool read_backward,
- RegExpNode* on_success)
- : SeqRegExpNode(on_success),
- elms_(new (zone()) ZoneList<TextElement>(1, zone())),
- read_backward_(read_backward) {
- elms_->Add(TextElement::CharClass(that), zone());
- }
- // Create TextNode for a single character class for the given ranges.
- static TextNode* CreateForCharacterRanges(Zone* zone,
- ZoneList<CharacterRange>* ranges,
- bool read_backward,
- RegExpNode* on_success,
- JSRegExp::Flags flags);
- // Create TextNode for a surrogate pair with a range given for the
- // lead and the trail surrogate each.
- static TextNode* CreateForSurrogatePair(Zone* zone, CharacterRange lead,
- CharacterRange trail,
- bool read_backward,
- RegExpNode* on_success,
- JSRegExp::Flags flags);
- void Accept(NodeVisitor* visitor) override;
- void Emit(RegExpCompiler* compiler, Trace* trace) override;
- void GetQuickCheckDetails(QuickCheckDetails* details,
- RegExpCompiler* compiler, int characters_filled_in,
- bool not_at_start) override;
- ZoneList<TextElement>* elements() { return elms_; }
- bool read_backward() { return read_backward_; }
- void MakeCaseIndependent(Isolate* isolate, bool is_one_byte);
- int GreedyLoopTextLength() override;
- RegExpNode* GetSuccessorOfOmnivorousTextNode(
- RegExpCompiler* compiler) override;
- void FillInBMInfo(Isolate* isolate, int offset, int budget,
- BoyerMooreLookahead* bm, bool not_at_start) override;
- void CalculateOffsets();
- RegExpNode* FilterOneByte(int depth) override;
- int Length();
-
- private:
- enum TextEmitPassType {
- NON_LATIN1_MATCH, // Check for characters that can't match.
- SIMPLE_CHARACTER_MATCH, // Case-dependent single character check.
- NON_LETTER_CHARACTER_MATCH, // Check characters that have no case equivs.
- CASE_CHARACTER_MATCH, // Case-independent single character check.
- CHARACTER_CLASS_MATCH // Character class.
- };
- static bool SkipPass(TextEmitPassType pass, bool ignore_case);
- static const int kFirstRealPass = SIMPLE_CHARACTER_MATCH;
- static const int kLastPass = CHARACTER_CLASS_MATCH;
- void TextEmitPass(RegExpCompiler* compiler, TextEmitPassType pass,
- bool preloaded, Trace* trace, bool first_element_checked,
- int* checked_up_to);
- ZoneList<TextElement>* elms_;
- bool read_backward_;
-};
-
-class AssertionNode : public SeqRegExpNode {
- public:
- enum AssertionType {
- AT_END,
- AT_START,
- AT_BOUNDARY,
- AT_NON_BOUNDARY,
- AFTER_NEWLINE
- };
- static AssertionNode* AtEnd(RegExpNode* on_success) {
- return new (on_success->zone()) AssertionNode(AT_END, on_success);
- }
- static AssertionNode* AtStart(RegExpNode* on_success) {
- return new (on_success->zone()) AssertionNode(AT_START, on_success);
- }
- static AssertionNode* AtBoundary(RegExpNode* on_success) {
- return new (on_success->zone()) AssertionNode(AT_BOUNDARY, on_success);
- }
- static AssertionNode* AtNonBoundary(RegExpNode* on_success) {
- return new (on_success->zone()) AssertionNode(AT_NON_BOUNDARY, on_success);
- }
- static AssertionNode* AfterNewline(RegExpNode* on_success) {
- return new (on_success->zone()) AssertionNode(AFTER_NEWLINE, on_success);
- }
- void Accept(NodeVisitor* visitor) override;
- void Emit(RegExpCompiler* compiler, Trace* trace) override;
- void GetQuickCheckDetails(QuickCheckDetails* details,
- RegExpCompiler* compiler, int filled_in,
- bool not_at_start) override;
- void FillInBMInfo(Isolate* isolate, int offset, int budget,
- BoyerMooreLookahead* bm, bool not_at_start) override;
- AssertionType assertion_type() { return assertion_type_; }
-
- private:
- void EmitBoundaryCheck(RegExpCompiler* compiler, Trace* trace);
- enum IfPrevious { kIsNonWord, kIsWord };
- void BacktrackIfPrevious(RegExpCompiler* compiler, Trace* trace,
- IfPrevious backtrack_if_previous);
- AssertionNode(AssertionType t, RegExpNode* on_success)
- : SeqRegExpNode(on_success), assertion_type_(t) {}
- AssertionType assertion_type_;
-};
-
-class BackReferenceNode : public SeqRegExpNode {
- public:
- BackReferenceNode(int start_reg, int end_reg, JSRegExp::Flags flags,
- bool read_backward, RegExpNode* on_success)
- : SeqRegExpNode(on_success),
- start_reg_(start_reg),
- end_reg_(end_reg),
- flags_(flags),
- read_backward_(read_backward) {}
- void Accept(NodeVisitor* visitor) override;
- int start_register() { return start_reg_; }
- int end_register() { return end_reg_; }
- bool read_backward() { return read_backward_; }
- void Emit(RegExpCompiler* compiler, Trace* trace) override;
- void GetQuickCheckDetails(QuickCheckDetails* details,
- RegExpCompiler* compiler, int characters_filled_in,
- bool not_at_start) override {
- return;
- }
- void FillInBMInfo(Isolate* isolate, int offset, int budget,
- BoyerMooreLookahead* bm, bool not_at_start) override;
-
- private:
- int start_reg_;
- int end_reg_;
- JSRegExp::Flags flags_;
- bool read_backward_;
-};
-
-class EndNode : public RegExpNode {
- public:
- enum Action { ACCEPT, BACKTRACK, NEGATIVE_SUBMATCH_SUCCESS };
- EndNode(Action action, Zone* zone) : RegExpNode(zone), action_(action) {}
- void Accept(NodeVisitor* visitor) override;
- void Emit(RegExpCompiler* compiler, Trace* trace) override;
- void GetQuickCheckDetails(QuickCheckDetails* details,
- RegExpCompiler* compiler, int characters_filled_in,
- bool not_at_start) override {
- // Returning 0 from EatsAtLeast should ensure we never get here.
- UNREACHABLE();
- }
- void FillInBMInfo(Isolate* isolate, int offset, int budget,
- BoyerMooreLookahead* bm, bool not_at_start) override {
- // Returning 0 from EatsAtLeast should ensure we never get here.
- UNREACHABLE();
- }
-
- private:
- Action action_;
-};
-
-class NegativeSubmatchSuccess : public EndNode {
- public:
- NegativeSubmatchSuccess(int stack_pointer_reg, int position_reg,
- int clear_capture_count, int clear_capture_start,
- Zone* zone)
- : EndNode(NEGATIVE_SUBMATCH_SUCCESS, zone),
- stack_pointer_register_(stack_pointer_reg),
- current_position_register_(position_reg),
- clear_capture_count_(clear_capture_count),
- clear_capture_start_(clear_capture_start) {}
- void Emit(RegExpCompiler* compiler, Trace* trace) override;
-
- private:
- int stack_pointer_register_;
- int current_position_register_;
- int clear_capture_count_;
- int clear_capture_start_;
-};
-
-class Guard : public ZoneObject {
- public:
- enum Relation { LT, GEQ };
- Guard(int reg, Relation op, int value) : reg_(reg), op_(op), value_(value) {}
- int reg() { return reg_; }
- Relation op() { return op_; }
- int value() { return value_; }
-
- private:
- int reg_;
- Relation op_;
- int value_;
-};
-
-class GuardedAlternative {
- public:
- explicit GuardedAlternative(RegExpNode* node)
- : node_(node), guards_(nullptr) {}
- void AddGuard(Guard* guard, Zone* zone);
- RegExpNode* node() { return node_; }
- void set_node(RegExpNode* node) { node_ = node; }
- ZoneList<Guard*>* guards() { return guards_; }
-
- private:
- RegExpNode* node_;
- ZoneList<Guard*>* guards_;
-};
-
-class AlternativeGeneration;
-
-class ChoiceNode : public RegExpNode {
- public:
- explicit ChoiceNode(int expected_size, Zone* zone)
- : RegExpNode(zone),
- alternatives_(new (zone)
- ZoneList<GuardedAlternative>(expected_size, zone)),
- not_at_start_(false),
- being_calculated_(false) {}
- void Accept(NodeVisitor* visitor) override;
- void AddAlternative(GuardedAlternative node) {
- alternatives()->Add(node, zone());
- }
- ZoneList<GuardedAlternative>* alternatives() { return alternatives_; }
- void Emit(RegExpCompiler* compiler, Trace* trace) override;
- void GetQuickCheckDetails(QuickCheckDetails* details,
- RegExpCompiler* compiler, int characters_filled_in,
- bool not_at_start) override;
- void FillInBMInfo(Isolate* isolate, int offset, int budget,
- BoyerMooreLookahead* bm, bool not_at_start) override;
-
- bool being_calculated() { return being_calculated_; }
- bool not_at_start() { return not_at_start_; }
- void set_not_at_start() { not_at_start_ = true; }
- void set_being_calculated(bool b) { being_calculated_ = b; }
- virtual bool try_to_emit_quick_check_for_alternative(bool is_first) {
- return true;
- }
- RegExpNode* FilterOneByte(int depth) override;
- virtual bool read_backward() { return false; }
-
- protected:
- int GreedyLoopTextLengthForAlternative(GuardedAlternative* alternative);
- ZoneList<GuardedAlternative>* alternatives_;
-
- private:
- template <typename...>
- friend class Analysis;
-
- void GenerateGuard(RegExpMacroAssembler* macro_assembler, Guard* guard,
- Trace* trace);
- int CalculatePreloadCharacters(RegExpCompiler* compiler, int eats_at_least);
- void EmitOutOfLineContinuation(RegExpCompiler* compiler, Trace* trace,
- GuardedAlternative alternative,
- AlternativeGeneration* alt_gen,
- int preload_characters,
- bool next_expects_preload);
- void SetUpPreLoad(RegExpCompiler* compiler, Trace* current_trace,
- PreloadState* preloads);
- void AssertGuardsMentionRegisters(Trace* trace);
- int EmitOptimizedUnanchoredSearch(RegExpCompiler* compiler, Trace* trace);
- Trace* EmitGreedyLoop(RegExpCompiler* compiler, Trace* trace,
- AlternativeGenerationList* alt_gens,
- PreloadState* preloads,
- GreedyLoopState* greedy_loop_state, int text_length);
- void EmitChoices(RegExpCompiler* compiler,
- AlternativeGenerationList* alt_gens, int first_choice,
- Trace* trace, PreloadState* preloads);
-
- // If true, this node is never checked at the start of the input.
- // Allows a new trace to start with at_start() set to false.
- bool not_at_start_;
- bool being_calculated_;
-};
-
-class NegativeLookaroundChoiceNode : public ChoiceNode {
- public:
- explicit NegativeLookaroundChoiceNode(GuardedAlternative this_must_fail,
- GuardedAlternative then_do_this,
- Zone* zone)
- : ChoiceNode(2, zone) {
- AddAlternative(this_must_fail);
- AddAlternative(then_do_this);
- }
- void GetQuickCheckDetails(QuickCheckDetails* details,
- RegExpCompiler* compiler, int characters_filled_in,
- bool not_at_start) override;
- void FillInBMInfo(Isolate* isolate, int offset, int budget,
- BoyerMooreLookahead* bm, bool not_at_start) override {
- continue_node()->FillInBMInfo(isolate, offset, budget - 1, bm,
- not_at_start);
- if (offset == 0) set_bm_info(not_at_start, bm);
- }
- static constexpr int kLookaroundIndex = 0;
- static constexpr int kContinueIndex = 1;
- RegExpNode* lookaround_node() {
- return alternatives()->at(kLookaroundIndex).node();
- }
- RegExpNode* continue_node() {
- return alternatives()->at(kContinueIndex).node();
- }
- // For a negative lookahead we don't emit the quick check for the
- // alternative that is expected to fail. This is because quick check code
- // starts by loading enough characters for the alternative that takes fewest
- // characters, but on a negative lookahead the negative branch did not take
- // part in that calculation (EatsAtLeast) so the assumptions don't hold.
- bool try_to_emit_quick_check_for_alternative(bool is_first) override {
- return !is_first;
- }
- void Accept(NodeVisitor* visitor) override;
- RegExpNode* FilterOneByte(int depth) override;
-};
-
-class LoopChoiceNode : public ChoiceNode {
- public:
- LoopChoiceNode(bool body_can_be_zero_length, bool read_backward,
- int min_loop_iterations, Zone* zone)
- : ChoiceNode(2, zone),
- loop_node_(nullptr),
- continue_node_(nullptr),
- body_can_be_zero_length_(body_can_be_zero_length),
- read_backward_(read_backward),
- traversed_loop_initialization_node_(false),
- min_loop_iterations_(min_loop_iterations) {}
- void AddLoopAlternative(GuardedAlternative alt);
- void AddContinueAlternative(GuardedAlternative alt);
- void Emit(RegExpCompiler* compiler, Trace* trace) override;
- void GetQuickCheckDetails(QuickCheckDetails* details,
- RegExpCompiler* compiler, int characters_filled_in,
- bool not_at_start) override;
- void GetQuickCheckDetailsFromLoopEntry(QuickCheckDetails* details,
- RegExpCompiler* compiler,
- int characters_filled_in,
- bool not_at_start) override;
- void FillInBMInfo(Isolate* isolate, int offset, int budget,
- BoyerMooreLookahead* bm, bool not_at_start) override;
- EatsAtLeastInfo EatsAtLeastFromLoopEntry() override;
- RegExpNode* loop_node() { return loop_node_; }
- RegExpNode* continue_node() { return continue_node_; }
- bool body_can_be_zero_length() { return body_can_be_zero_length_; }
- int min_loop_iterations() const { return min_loop_iterations_; }
- bool read_backward() override { return read_backward_; }
- void Accept(NodeVisitor* visitor) override;
- RegExpNode* FilterOneByte(int depth) override;
-
- private:
- // AddAlternative is made private for loop nodes because alternatives
- // should not be added freely, we need to keep track of which node
- // goes back to the node itself.
- void AddAlternative(GuardedAlternative node) {
- ChoiceNode::AddAlternative(node);
- }
-
- RegExpNode* loop_node_;
- RegExpNode* continue_node_;
- bool body_can_be_zero_length_;
- bool read_backward_;
-
- // Temporary marker set only while generating quick check details. Represents
- // whether GetQuickCheckDetails traversed the initialization node for this
- // loop's counter. If so, we may be able to generate stricter quick checks
- // because we know the loop node must match at least min_loop_iterations_
- // times before the continuation node can match.
- bool traversed_loop_initialization_node_;
-
- // The minimum number of times the loop_node_ must match before the
- // continue_node_ might be considered. This value can be temporarily decreased
- // while generating quick check details, to represent the remaining iterations
- // after the completed portion of the quick check details.
- int min_loop_iterations_;
-
- friend class IterationDecrementer;
- friend class LoopInitializationMarker;
-};
-
-class NodeVisitor {
- public:
- virtual ~NodeVisitor() = default;
-#define DECLARE_VISIT(Type) virtual void Visit##Type(Type##Node* that) = 0;
- FOR_EACH_NODE_TYPE(DECLARE_VISIT)
-#undef DECLARE_VISIT
-};
-
-} // namespace internal
-} // namespace v8
-
-#endif // V8_REGEXP_REGEXP_NODES_H_
diff --git a/js/src/new-regexp/regexp-parser.cc b/js/src/new-regexp/regexp-parser.cc
deleted file mode 100644
index a26e35438..000000000
--- a/js/src/new-regexp/regexp-parser.cc
+++ /dev/null
@@ -1,2109 +0,0 @@
-// Copyright 2016 the V8 project authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#include "new-regexp/regexp-parser.h"
-
-#include <vector>
-
-#include "new-regexp/property-sequences.h"
-#include "new-regexp/regexp-macro-assembler.h"
-#include "new-regexp/regexp.h"
-
-#ifdef V8_INTL_SUPPORT
-#include "unicode/uniset.h"
-#endif // V8_INTL_SUPPORT
-
-namespace v8 {
-namespace internal {
-
-RegExpParser::RegExpParser(FlatStringReader* in, JSRegExp::Flags flags,
- Isolate* isolate, Zone* zone)
- : isolate_(isolate),
- zone_(zone),
- captures_(nullptr),
- named_captures_(nullptr),
- named_back_references_(nullptr),
- in_(in),
- current_(kEndMarker),
- top_level_flags_(flags),
- next_pos_(0),
- captures_started_(0),
- capture_count_(0),
- has_more_(true),
- simple_(false),
- contains_anchor_(false),
- is_scanned_for_captures_(false),
- has_named_captures_(false),
- failed_(false) {
- Advance();
-}
-
-template <bool update_position>
-inline uc32 RegExpParser::ReadNext() {
- int position = next_pos_;
- uc32 c0 = in()->Get(position);
- position++;
- // Read the whole surrogate pair in case of unicode flag, if possible.
- if (unicode() && position < in()->length() &&
- unibrow::Utf16::IsLeadSurrogate(static_cast<uc16>(c0))) {
- uc16 c1 = in()->Get(position);
- if (unibrow::Utf16::IsTrailSurrogate(c1)) {
- c0 = unibrow::Utf16::CombineSurrogatePair(static_cast<uc16>(c0), c1);
- position++;
- }
- }
- if (update_position) next_pos_ = position;
- return c0;
-}
-
-
-uc32 RegExpParser::Next() {
- if (has_next()) {
- return ReadNext<false>();
- } else {
- return kEndMarker;
- }
-}
-
-void RegExpParser::Advance() {
- if (has_next()) {
- StackLimitCheck check(isolate());
- if (check.HasOverflowed()) {
- if (FLAG_correctness_fuzzer_suppressions) {
- FATAL("Aborting on stack overflow");
- }
- ReportError(RegExpError::kStackOverflow);
- } else if (zone()->excess_allocation()) {
- if (FLAG_correctness_fuzzer_suppressions) {
- FATAL("Aborting on excess zone allocation");
- }
- ReportError(RegExpError::kTooLarge);
- } else {
- current_ = ReadNext<true>();
- }
- } else {
- current_ = kEndMarker;
- // Advance so that position() points to 1-after-the-last-character. This is
- // important so that Reset() to this position works correctly.
- next_pos_ = in()->length() + 1;
- has_more_ = false;
- }
-}
-
-
-void RegExpParser::Reset(int pos) {
- next_pos_ = pos;
- has_more_ = (pos < in()->length());
- Advance();
-}
-
-void RegExpParser::Advance(int dist) {
- next_pos_ += dist - 1;
- Advance();
-}
-
-
-bool RegExpParser::simple() { return simple_; }
-
-bool RegExpParser::IsSyntaxCharacterOrSlash(uc32 c) {
- switch (c) {
- case '^':
- case '$':
- case '\\':
- case '.':
- case '*':
- case '+':
- case '?':
- case '(':
- case ')':
- case '[':
- case ']':
- case '{':
- case '}':
- case '|':
- case '/':
- return true;
- default:
- break;
- }
- return false;
-}
-
-RegExpTree* RegExpParser::ReportError(RegExpError error) {
- if (failed_) return nullptr; // Do not overwrite any existing error.
- failed_ = true;
- error_ = error;
- error_pos_ = position();
- // Zip to the end to make sure no more input is read.
- current_ = kEndMarker;
- next_pos_ = in()->length();
- return nullptr;
-}
-
-#define CHECK_FAILED /**/); \
- if (failed_) return nullptr; \
- ((void)0
-
-// Pattern ::
-// Disjunction
-RegExpTree* RegExpParser::ParsePattern() {
- RegExpTree* result = ParseDisjunction(CHECK_FAILED);
- PatchNamedBackReferences(CHECK_FAILED);
- DCHECK(!has_more());
- // If the result of parsing is a literal string atom, and it has the
- // same length as the input, then the atom is identical to the input.
- if (result->IsAtom() && result->AsAtom()->length() == in()->length()) {
- simple_ = true;
- }
- return result;
-}
-
-
-// Disjunction ::
-// Alternative
-// Alternative | Disjunction
-// Alternative ::
-// [empty]
-// Term Alternative
-// Term ::
-// Assertion
-// Atom
-// Atom Quantifier
-RegExpTree* RegExpParser::ParseDisjunction() {
- // Used to store current state while parsing subexpressions.
- RegExpParserState initial_state(nullptr, INITIAL, RegExpLookaround::LOOKAHEAD,
- 0, nullptr, top_level_flags_, zone());
- RegExpParserState* state = &initial_state;
- // Cache the builder in a local variable for quick access.
- RegExpBuilder* builder = initial_state.builder();
- while (true) {
- switch (current()) {
- case kEndMarker:
- if (state->IsSubexpression()) {
- // Inside a parenthesized group when hitting end of input.
- return ReportError(RegExpError::kUnterminatedGroup);
- }
- DCHECK_EQ(INITIAL, state->group_type());
- // Parsing completed successfully.
- return builder->ToRegExp();
- case ')': {
- if (!state->IsSubexpression()) {
- return ReportError(RegExpError::kUnmatchedParen);
- }
- DCHECK_NE(INITIAL, state->group_type());
-
- Advance();
- // End disjunction parsing and convert builder content to new single
- // regexp atom.
- RegExpTree* body = builder->ToRegExp();
-
- int end_capture_index = captures_started();
-
- int capture_index = state->capture_index();
- SubexpressionType group_type = state->group_type();
-
- // Build result of subexpression.
- if (group_type == CAPTURE) {
- if (state->IsNamedCapture()) {
- CreateNamedCaptureAtIndex(state->capture_name(),
- capture_index CHECK_FAILED);
- }
- RegExpCapture* capture = GetCapture(capture_index);
- capture->set_body(body);
- body = capture;
- } else if (group_type == GROUPING) {
- body = new (zone()) RegExpGroup(body);
- } else {
- DCHECK(group_type == POSITIVE_LOOKAROUND ||
- group_type == NEGATIVE_LOOKAROUND);
- bool is_positive = (group_type == POSITIVE_LOOKAROUND);
- body = new (zone()) RegExpLookaround(
- body, is_positive, end_capture_index - capture_index,
- capture_index, state->lookaround_type());
- }
-
- // Restore previous state.
- state = state->previous_state();
- builder = state->builder();
-
- builder->AddAtom(body);
- // For compatibility with JSC and ES3, we allow quantifiers after
- // lookaheads, and break in all cases.
- break;
- }
- case '|': {
- Advance();
- builder->NewAlternative();
- continue;
- }
- case '*':
- case '+':
- case '?':
- return ReportError(RegExpError::kNothingToRepeat);
- case '^': {
- Advance();
- if (builder->multiline()) {
- builder->AddAssertion(new (zone()) RegExpAssertion(
- RegExpAssertion::START_OF_LINE, builder->flags()));
- } else {
- builder->AddAssertion(new (zone()) RegExpAssertion(
- RegExpAssertion::START_OF_INPUT, builder->flags()));
- set_contains_anchor();
- }
- continue;
- }
- case '$': {
- Advance();
- RegExpAssertion::AssertionType assertion_type =
- builder->multiline() ? RegExpAssertion::END_OF_LINE
- : RegExpAssertion::END_OF_INPUT;
- builder->AddAssertion(
- new (zone()) RegExpAssertion(assertion_type, builder->flags()));
- continue;
- }
- case '.': {
- Advance();
- ZoneList<CharacterRange>* ranges =
- new (zone()) ZoneList<CharacterRange>(2, zone());
-
- if (builder->dotall()) {
- // Everything.
- CharacterRange::AddClassEscape('*', ranges, false, zone());
- } else {
- // Everything except \x0A, \x0D, \u2028 and \u2029
- CharacterRange::AddClassEscape('.', ranges, false, zone());
- }
-
- RegExpCharacterClass* cc =
- new (zone()) RegExpCharacterClass(zone(), ranges, builder->flags());
- builder->AddCharacterClass(cc);
- break;
- }
- case '(': {
- state = ParseOpenParenthesis(state CHECK_FAILED);
- builder = state->builder();
- continue;
- }
- case '[': {
- RegExpTree* cc = ParseCharacterClass(builder CHECK_FAILED);
- builder->AddCharacterClass(cc->AsCharacterClass());
- break;
- }
- // Atom ::
- // \ AtomEscape
- case '\\':
- switch (Next()) {
- case kEndMarker:
- return ReportError(RegExpError::kEscapeAtEndOfPattern);
- case 'b':
- Advance(2);
- builder->AddAssertion(new (zone()) RegExpAssertion(
- RegExpAssertion::BOUNDARY, builder->flags()));
- continue;
- case 'B':
- Advance(2);
- builder->AddAssertion(new (zone()) RegExpAssertion(
- RegExpAssertion::NON_BOUNDARY, builder->flags()));
- continue;
- // AtomEscape ::
- // CharacterClassEscape
- //
- // CharacterClassEscape :: one of
- // d D s S w W
- case 'd':
- case 'D':
- case 's':
- case 'S':
- case 'w':
- case 'W': {
- uc32 c = Next();
- Advance(2);
- ZoneList<CharacterRange>* ranges =
- new (zone()) ZoneList<CharacterRange>(2, zone());
- CharacterRange::AddClassEscape(
- c, ranges, unicode() && builder->ignore_case(), zone());
- RegExpCharacterClass* cc = new (zone())
- RegExpCharacterClass(zone(), ranges, builder->flags());
- builder->AddCharacterClass(cc);
- break;
- }
- case 'p':
- case 'P': {
- uc32 p = Next();
- Advance(2);
- if (unicode()) {
- ZoneList<CharacterRange>* ranges =
- new (zone()) ZoneList<CharacterRange>(2, zone());
- ZoneVector<char> name_1(zone());
- ZoneVector<char> name_2(zone());
- if (ParsePropertyClassName(&name_1, &name_2)) {
- if (AddPropertyClassRange(ranges, p == 'P', name_1, name_2)) {
- RegExpCharacterClass* cc = new (zone())
- RegExpCharacterClass(zone(), ranges, builder->flags());
- builder->AddCharacterClass(cc);
- break;
- }
- if (p == 'p' && name_2.empty()) {
- RegExpTree* sequence = GetPropertySequence(name_1);
- if (sequence != nullptr) {
- builder->AddAtom(sequence);
- break;
- }
- }
- }
- return ReportError(RegExpError::kInvalidPropertyName);
- } else {
- builder->AddCharacter(p);
- }
- break;
- }
- case '1':
- case '2':
- case '3':
- case '4':
- case '5':
- case '6':
- case '7':
- case '8':
- case '9': {
- int index = 0;
- bool is_backref = ParseBackReferenceIndex(&index CHECK_FAILED);
- if (is_backref) {
- if (state->IsInsideCaptureGroup(index)) {
- // The back reference is inside the capture group it refers to.
- // Nothing can possibly have been captured yet, so we use empty
- // instead. This ensures that, when checking a back reference,
- // the capture registers of the referenced capture are either
- // both set or both cleared.
- builder->AddEmpty();
- } else {
- RegExpCapture* capture = GetCapture(index);
- RegExpTree* atom =
- new (zone()) RegExpBackReference(capture, builder->flags());
- builder->AddAtom(atom);
- }
- break;
- }
- // With /u, no identity escapes except for syntax characters
- // are allowed. Otherwise, all identity escapes are allowed.
- if (unicode()) {
- return ReportError(RegExpError::kInvalidEscape);
- }
- uc32 first_digit = Next();
- if (first_digit == '8' || first_digit == '9') {
- builder->AddCharacter(first_digit);
- Advance(2);
- break;
- }
- V8_FALLTHROUGH;
- }
- case '0': {
- Advance();
- if (unicode() && Next() >= '0' && Next() <= '9') {
- // With /u, decimal escape with leading 0 are not parsed as octal.
- return ReportError(RegExpError::kInvalidDecimalEscape);
- }
- uc32 octal = ParseOctalLiteral();
- builder->AddCharacter(octal);
- break;
- }
- // ControlEscape :: one of
- // f n r t v
- case 'f':
- Advance(2);
- builder->AddCharacter('\f');
- break;
- case 'n':
- Advance(2);
- builder->AddCharacter('\n');
- break;
- case 'r':
- Advance(2);
- builder->AddCharacter('\r');
- break;
- case 't':
- Advance(2);
- builder->AddCharacter('\t');
- break;
- case 'v':
- Advance(2);
- builder->AddCharacter('\v');
- break;
- case 'c': {
- Advance();
- uc32 controlLetter = Next();
- // Special case if it is an ASCII letter.
- // Convert lower case letters to uppercase.
- uc32 letter = controlLetter & ~('a' ^ 'A');
- if (letter < 'A' || 'Z' < letter) {
- // controlLetter is not in range 'A'-'Z' or 'a'-'z'.
- // Read the backslash as a literal character instead of as
- // starting an escape.
- // ES#prod-annexB-ExtendedPatternCharacter
- if (unicode()) {
- // With /u, invalid escapes are not treated as identity escapes.
- return ReportError(RegExpError::kInvalidUnicodeEscape);
- }
- builder->AddCharacter('\\');
- } else {
- Advance(2);
- builder->AddCharacter(controlLetter & 0x1F);
- }
- break;
- }
- case 'x': {
- Advance(2);
- uc32 value;
- if (ParseHexEscape(2, &value)) {
- builder->AddCharacter(value);
- } else if (!unicode()) {
- builder->AddCharacter('x');
- } else {
- // With /u, invalid escapes are not treated as identity escapes.
- return ReportError(RegExpError::kInvalidEscape);
- }
- break;
- }
- case 'u': {
- Advance(2);
- uc32 value;
- if (ParseUnicodeEscape(&value)) {
- builder->AddEscapedUnicodeCharacter(value);
- } else if (!unicode()) {
- builder->AddCharacter('u');
- } else {
- // With /u, invalid escapes are not treated as identity escapes.
- return ReportError(RegExpError::kInvalidUnicodeEscape);
- }
- break;
- }
- case 'k':
- // Either an identity escape or a named back-reference. The two
- // interpretations are mutually exclusive: '\k' is interpreted as
- // an identity escape for non-Unicode patterns without named
- // capture groups, and as the beginning of a named back-reference
- // in all other cases.
- if (unicode() || HasNamedCaptures()) {
- Advance(2);
- ParseNamedBackReference(builder, state CHECK_FAILED);
- break;
- }
- V8_FALLTHROUGH;
- default:
- Advance();
- // With /u, no identity escapes except for syntax characters
- // are allowed. Otherwise, all identity escapes are allowed.
- if (!unicode() || IsSyntaxCharacterOrSlash(current())) {
- builder->AddCharacter(current());
- Advance();
- } else {
- return ReportError(RegExpError::kInvalidEscape);
- }
- break;
- }
- break;
- case '{': {
- int dummy;
- bool parsed = ParseIntervalQuantifier(&dummy, &dummy CHECK_FAILED);
- if (parsed) return ReportError(RegExpError::kNothingToRepeat);
- V8_FALLTHROUGH;
- }
- case '}':
- case ']':
- if (unicode()) {
- return ReportError(RegExpError::kLoneQuantifierBrackets);
- }
- V8_FALLTHROUGH;
- default:
- builder->AddUnicodeCharacter(current());
- Advance();
- break;
- } // end switch(current())
-
- int min;
- int max;
- switch (current()) {
- // QuantifierPrefix ::
- // *
- // +
- // ?
- // {
- case '*':
- min = 0;
- max = RegExpTree::kInfinity;
- Advance();
- break;
- case '+':
- min = 1;
- max = RegExpTree::kInfinity;
- Advance();
- break;
- case '?':
- min = 0;
- max = 1;
- Advance();
- break;
- case '{':
- if (ParseIntervalQuantifier(&min, &max)) {
- if (max < min) {
- return ReportError(RegExpError::kRangeOutOfOrder);
- }
- break;
- } else if (unicode()) {
- // With /u, incomplete quantifiers are not allowed.
- return ReportError(RegExpError::kIncompleteQuantifier);
- }
- continue;
- default:
- continue;
- }
- RegExpQuantifier::QuantifierType quantifier_type = RegExpQuantifier::GREEDY;
- if (current() == '?') {
- quantifier_type = RegExpQuantifier::NON_GREEDY;
- Advance();
- } else if (FLAG_regexp_possessive_quantifier && current() == '+') {
- // FLAG_regexp_possessive_quantifier is a debug-only flag.
- quantifier_type = RegExpQuantifier::POSSESSIVE;
- Advance();
- }
- if (!builder->AddQuantifierToAtom(min, max, quantifier_type)) {
- return ReportError(RegExpError::kInvalidQuantifier);
- }
- }
-}
-
-RegExpParser::RegExpParserState* RegExpParser::ParseOpenParenthesis(
- RegExpParserState* state) {
- RegExpLookaround::Type lookaround_type = state->lookaround_type();
- bool is_named_capture = false;
- JSRegExp::Flags switch_on = JSRegExp::kNone;
- JSRegExp::Flags switch_off = JSRegExp::kNone;
- const ZoneVector<uc16>* capture_name = nullptr;
- SubexpressionType subexpr_type = CAPTURE;
- Advance();
- if (current() == '?') {
- switch (Next()) {
- case ':':
- Advance(2);
- subexpr_type = GROUPING;
- break;
- case '=':
- Advance(2);
- lookaround_type = RegExpLookaround::LOOKAHEAD;
- subexpr_type = POSITIVE_LOOKAROUND;
- break;
- case '!':
- Advance(2);
- lookaround_type = RegExpLookaround::LOOKAHEAD;
- subexpr_type = NEGATIVE_LOOKAROUND;
- break;
- case '-':
- case 'i':
- case 's':
- case 'm': {
- if (!FLAG_regexp_mode_modifiers) {
- ReportError(RegExpError::kInvalidGroup);
- return nullptr;
- }
- Advance();
- bool flags_sense = true; // Switching on flags.
- while (subexpr_type != GROUPING) {
- switch (current()) {
- case '-':
- if (!flags_sense) {
- ReportError(RegExpError::kMultipleFlagDashes);
- return nullptr;
- }
- flags_sense = false;
- Advance();
- continue;
- case 's':
- case 'i':
- case 'm': {
- JSRegExp::Flags bit = JSRegExp::kUnicode;
- if (current() == 'i') bit = JSRegExp::kIgnoreCase;
- if (current() == 'm') bit = JSRegExp::kMultiline;
- if (current() == 's') bit = JSRegExp::kDotAll;
- if (((switch_on | switch_off) & bit) != 0) {
- ReportError(RegExpError::kRepeatedFlag);
- return nullptr;
- }
- if (flags_sense) {
- switch_on |= bit;
- } else {
- switch_off |= bit;
- }
- Advance();
- continue;
- }
- case ')': {
- Advance();
- state->builder()
- ->FlushText(); // Flush pending text using old flags.
- // These (?i)-style flag switches don't put us in a subexpression
- // at all, they just modify the flags in the rest of the current
- // subexpression.
- JSRegExp::Flags flags =
- (state->builder()->flags() | switch_on) & ~switch_off;
- state->builder()->set_flags(flags);
- return state;
- }
- case ':':
- Advance();
- subexpr_type = GROUPING; // Will break us out of the outer loop.
- continue;
- default:
- ReportError(RegExpError::kInvalidFlagGroup);
- return nullptr;
- }
- }
- break;
- }
- case '<':
- Advance();
- if (Next() == '=') {
- Advance(2);
- lookaround_type = RegExpLookaround::LOOKBEHIND;
- subexpr_type = POSITIVE_LOOKAROUND;
- break;
- } else if (Next() == '!') {
- Advance(2);
- lookaround_type = RegExpLookaround::LOOKBEHIND;
- subexpr_type = NEGATIVE_LOOKAROUND;
- break;
- }
- is_named_capture = true;
- has_named_captures_ = true;
- Advance();
- break;
- default:
- ReportError(RegExpError::kInvalidGroup);
- return nullptr;
- }
- }
- if (subexpr_type == CAPTURE) {
- if (captures_started_ >= JSRegExp::kMaxCaptures) {
- ReportError(RegExpError::kTooManyCaptures);
- return nullptr;
- }
- captures_started_++;
-
- if (is_named_capture) {
- capture_name = ParseCaptureGroupName(CHECK_FAILED);
- }
- }
- JSRegExp::Flags flags = (state->builder()->flags() | switch_on) & ~switch_off;
- // Store current state and begin new disjunction parsing.
- return new (zone())
- RegExpParserState(state, subexpr_type, lookaround_type, captures_started_,
- capture_name, flags, zone());
-}
-
-#ifdef DEBUG
-// Currently only used in an DCHECK.
-static bool IsSpecialClassEscape(uc32 c) {
- switch (c) {
- case 'd':
- case 'D':
- case 's':
- case 'S':
- case 'w':
- case 'W':
- return true;
- default:
- return false;
- }
-}
-#endif
-
-
-// In order to know whether an escape is a backreference or not we have to scan
-// the entire regexp and find the number of capturing parentheses. However we
-// don't want to scan the regexp twice unless it is necessary. This mini-parser
-// is called when needed. It can see the difference between capturing and
-// noncapturing parentheses and can skip character classes and backslash-escaped
-// characters.
-void RegExpParser::ScanForCaptures() {
- DCHECK(!is_scanned_for_captures_);
- const int saved_position = position();
- // Start with captures started previous to current position
- int capture_count = captures_started();
- // Add count of captures after this position.
- int n;
- while ((n = current()) != kEndMarker) {
- Advance();
- switch (n) {
- case '\\':
- Advance();
- break;
- case '[': {
- int c;
- while ((c = current()) != kEndMarker) {
- Advance();
- if (c == '\\') {
- Advance();
- } else {
- if (c == ']') break;
- }
- }
- break;
- }
- case '(':
- if (current() == '?') {
- // At this point we could be in
- // * a non-capturing group '(:',
- // * a lookbehind assertion '(?<=' '(?<!'
- // * or a named capture '(?<'.
- //
- // Of these, only named captures are capturing groups.
-
- Advance();
- if (current() != '<') break;
-
- Advance();
- if (current() == '=' || current() == '!') break;
-
- // Found a possible named capture. It could turn out to be a syntax
- // error (e.g. an unterminated or invalid name), but that distinction
- // does not matter for our purposes.
- has_named_captures_ = true;
- }
- capture_count++;
- break;
- }
- }
- capture_count_ = capture_count;
- is_scanned_for_captures_ = true;
- Reset(saved_position);
-}
-
-
-bool RegExpParser::ParseBackReferenceIndex(int* index_out) {
- DCHECK_EQ('\\', current());
- DCHECK('1' <= Next() && Next() <= '9');
- // Try to parse a decimal literal that is no greater than the total number
- // of left capturing parentheses in the input.
- int start = position();
- int value = Next() - '0';
- Advance(2);
- while (true) {
- uc32 c = current();
- if (IsDecimalDigit(c)) {
- value = 10 * value + (c - '0');
- if (value > JSRegExp::kMaxCaptures) {
- Reset(start);
- return false;
- }
- Advance();
- } else {
- break;
- }
- }
- if (value > captures_started()) {
- if (!is_scanned_for_captures_) ScanForCaptures();
- if (value > capture_count_) {
- Reset(start);
- return false;
- }
- }
- *index_out = value;
- return true;
-}
-
-static void push_code_unit(ZoneVector<uc16>* v, uint32_t code_unit) {
- if (code_unit <= unibrow::Utf16::kMaxNonSurrogateCharCode) {
- v->push_back(code_unit);
- } else {
- v->push_back(unibrow::Utf16::LeadSurrogate(code_unit));
- v->push_back(unibrow::Utf16::TrailSurrogate(code_unit));
- }
-}
-
-const ZoneVector<uc16>* RegExpParser::ParseCaptureGroupName() {
- ZoneVector<uc16>* name =
- new (zone()->New(sizeof(ZoneVector<uc16>))) ZoneVector<uc16>(zone());
-
- bool at_start = true;
- while (true) {
- uc32 c = current();
- Advance();
-
- // Convert unicode escapes.
- if (c == '\\' && current() == 'u') {
- Advance();
- if (!ParseUnicodeEscape(&c)) {
- ReportError(RegExpError::kInvalidUnicodeEscape);
- return nullptr;
- }
- }
-
- // The backslash char is misclassified as both ID_Start and ID_Continue.
- if (c == '\\') {
- ReportError(RegExpError::kInvalidCaptureGroupName);
- return nullptr;
- }
-
- if (at_start) {
- if (!IsIdentifierStart(c)) {
- ReportError(RegExpError::kInvalidCaptureGroupName);
- return nullptr;
- }
- push_code_unit(name, c);
- at_start = false;
- } else {
- if (c == '>') {
- break;
- } else if (IsIdentifierPart(c)) {
- push_code_unit(name, c);
- } else {
- ReportError(RegExpError::kInvalidCaptureGroupName);
- return nullptr;
- }
- }
- }
-
- return name;
-}
-
-bool RegExpParser::CreateNamedCaptureAtIndex(const ZoneVector<uc16>* name,
- int index) {
- DCHECK(0 < index && index <= captures_started_);
- DCHECK_NOT_NULL(name);
-
- RegExpCapture* capture = GetCapture(index);
- DCHECK_NULL(capture->name());
-
- capture->set_name(name);
-
- if (named_captures_ == nullptr) {
- named_captures_ = new (zone_->New(sizeof(*named_captures_)))
- ZoneSet<RegExpCapture*, RegExpCaptureNameLess>(zone());
- } else {
- // Check for duplicates and bail if we find any.
-
- const auto& named_capture_it = named_captures_->find(capture);
- if (named_capture_it != named_captures_->end()) {
- ReportError(RegExpError::kDuplicateCaptureGroupName);
- return false;
- }
- }
-
- named_captures_->emplace(capture);
-
- return true;
-}
-
-bool RegExpParser::ParseNamedBackReference(RegExpBuilder* builder,
- RegExpParserState* state) {
- // The parser is assumed to be on the '<' in \k<name>.
- if (current() != '<') {
- ReportError(RegExpError::kInvalidNamedReference);
- return false;
- }
-
- Advance();
- const ZoneVector<uc16>* name = ParseCaptureGroupName();
- if (name == nullptr) {
- return false;
- }
-
- if (state->IsInsideCaptureGroup(name)) {
- builder->AddEmpty();
- } else {
- RegExpBackReference* atom =
- new (zone()) RegExpBackReference(builder->flags());
- atom->set_name(name);
-
- builder->AddAtom(atom);
-
- if (named_back_references_ == nullptr) {
- named_back_references_ =
- new (zone()) ZoneList<RegExpBackReference*>(1, zone());
- }
- named_back_references_->Add(atom, zone());
- }
-
- return true;
-}
-
-void RegExpParser::PatchNamedBackReferences() {
- if (named_back_references_ == nullptr) return;
-
- if (named_captures_ == nullptr) {
- ReportError(RegExpError::kInvalidNamedCaptureReference);
- return;
- }
-
- // Look up and patch the actual capture for each named back reference.
-
- for (int i = 0; i < named_back_references_->length(); i++) {
- RegExpBackReference* ref = named_back_references_->at(i);
-
- // Capture used to search the named_captures_ by name, index of the
- // capture is never used.
- static const int kInvalidIndex = 0;
- RegExpCapture* search_capture = new (zone()) RegExpCapture(kInvalidIndex);
- DCHECK_NULL(search_capture->name());
- search_capture->set_name(ref->name());
-
- int index = -1;
- const auto& capture_it = named_captures_->find(search_capture);
- if (capture_it != named_captures_->end()) {
- index = (*capture_it)->index();
- } else {
- ReportError(RegExpError::kInvalidNamedCaptureReference);
- return;
- }
-
- ref->set_capture(GetCapture(index));
- }
-}
-
-RegExpCapture* RegExpParser::GetCapture(int index) {
- // The index for the capture groups are one-based. Its index in the list is
- // zero-based.
- int know_captures =
- is_scanned_for_captures_ ? capture_count_ : captures_started_;
- DCHECK(index <= know_captures);
- if (captures_ == nullptr) {
- captures_ = new (zone()) ZoneList<RegExpCapture*>(know_captures, zone());
- }
- while (captures_->length() < know_captures) {
- captures_->Add(new (zone()) RegExpCapture(captures_->length() + 1), zone());
- }
- return captures_->at(index - 1);
-}
-
-namespace {
-
-struct RegExpCaptureIndexLess {
- bool operator()(const RegExpCapture* lhs, const RegExpCapture* rhs) const {
- DCHECK_NOT_NULL(lhs);
- DCHECK_NOT_NULL(rhs);
- return lhs->index() < rhs->index();
- }
-};
-
-} // namespace
-
-Handle<FixedArray> RegExpParser::CreateCaptureNameMap() {
- if (named_captures_ == nullptr || named_captures_->empty()) {
- return Handle<FixedArray>();
- }
-
- // Named captures are sorted by name (because the set is used to ensure
- // name uniqueness). But the capture name map must to be sorted by index.
-
- ZoneVector<RegExpCapture*> sorted_named_captures(
- named_captures_->begin(), named_captures_->end(), zone());
- std::sort(sorted_named_captures.begin(), sorted_named_captures.end(),
- RegExpCaptureIndexLess{});
- DCHECK_EQ(sorted_named_captures.size(), named_captures_->size());
-
- Factory* factory = isolate()->factory();
-
- int len = static_cast<int>(sorted_named_captures.size()) * 2;
- Handle<FixedArray> array = factory->NewFixedArray(len);
-
- int i = 0;
- for (const auto& capture : sorted_named_captures) {
- Vector<const uc16> capture_name(capture->name()->data(),
- capture->name()->size());
- // CSA code in ConstructNewResultFromMatchInfo requires these strings to be
- // internalized so they can be used as property names in the 'exec' results.
- Handle<String> name = factory->InternalizeString(capture_name);
- array->set(i * 2, *name);
- array->set(i * 2 + 1, Smi::FromInt(capture->index()));
-
- i++;
- }
- DCHECK_EQ(i * 2, len);
-
- return array;
-}
-
-bool RegExpParser::HasNamedCaptures() {
- if (has_named_captures_ || is_scanned_for_captures_) {
- return has_named_captures_;
- }
-
- ScanForCaptures();
- DCHECK(is_scanned_for_captures_);
- return has_named_captures_;
-}
-
-bool RegExpParser::RegExpParserState::IsInsideCaptureGroup(int index) {
- for (RegExpParserState* s = this; s != nullptr; s = s->previous_state()) {
- if (s->group_type() != CAPTURE) continue;
- // Return true if we found the matching capture index.
- if (index == s->capture_index()) return true;
- // Abort if index is larger than what has been parsed up till this state.
- if (index > s->capture_index()) return false;
- }
- return false;
-}
-
-bool RegExpParser::RegExpParserState::IsInsideCaptureGroup(
- const ZoneVector<uc16>* name) {
- DCHECK_NOT_NULL(name);
- for (RegExpParserState* s = this; s != nullptr; s = s->previous_state()) {
- if (s->capture_name() == nullptr) continue;
- if (*s->capture_name() == *name) return true;
- }
- return false;
-}
-
-// QuantifierPrefix ::
-// { DecimalDigits }
-// { DecimalDigits , }
-// { DecimalDigits , DecimalDigits }
-//
-// Returns true if parsing succeeds, and set the min_out and max_out
-// values. Values are truncated to RegExpTree::kInfinity if they overflow.
-bool RegExpParser::ParseIntervalQuantifier(int* min_out, int* max_out) {
- DCHECK_EQ(current(), '{');
- int start = position();
- Advance();
- int min = 0;
- if (!IsDecimalDigit(current())) {
- Reset(start);
- return false;
- }
- while (IsDecimalDigit(current())) {
- int next = current() - '0';
- if (min > (RegExpTree::kInfinity - next) / 10) {
- // Overflow. Skip past remaining decimal digits and return -1.
- do {
- Advance();
- } while (IsDecimalDigit(current()));
- min = RegExpTree::kInfinity;
- break;
- }
- min = 10 * min + next;
- Advance();
- }
- int max = 0;
- if (current() == '}') {
- max = min;
- Advance();
- } else if (current() == ',') {
- Advance();
- if (current() == '}') {
- max = RegExpTree::kInfinity;
- Advance();
- } else {
- while (IsDecimalDigit(current())) {
- int next = current() - '0';
- if (max > (RegExpTree::kInfinity - next) / 10) {
- do {
- Advance();
- } while (IsDecimalDigit(current()));
- max = RegExpTree::kInfinity;
- break;
- }
- max = 10 * max + next;
- Advance();
- }
- if (current() != '}') {
- Reset(start);
- return false;
- }
- Advance();
- }
- } else {
- Reset(start);
- return false;
- }
- *min_out = min;
- *max_out = max;
- return true;
-}
-
-
-uc32 RegExpParser::ParseOctalLiteral() {
- DCHECK(('0' <= current() && current() <= '7') || current() == kEndMarker);
- // For compatibility with some other browsers (not all), we parse
- // up to three octal digits with a value below 256.
- // ES#prod-annexB-LegacyOctalEscapeSequence
- uc32 value = current() - '0';
- Advance();
- if ('0' <= current() && current() <= '7') {
- value = value * 8 + current() - '0';
- Advance();
- if (value < 32 && '0' <= current() && current() <= '7') {
- value = value * 8 + current() - '0';
- Advance();
- }
- }
- return value;
-}
-
-
-bool RegExpParser::ParseHexEscape(int length, uc32* value) {
- int start = position();
- uc32 val = 0;
- for (int i = 0; i < length; ++i) {
- uc32 c = current();
- int d = HexValue(c);
- if (d < 0) {
- Reset(start);
- return false;
- }
- val = val * 16 + d;
- Advance();
- }
- *value = val;
- return true;
-}
-
-// This parses RegExpUnicodeEscapeSequence as described in ECMA262.
-bool RegExpParser::ParseUnicodeEscape(uc32* value) {
- // Accept both \uxxxx and \u{xxxxxx} (if harmony unicode escapes are
- // allowed). In the latter case, the number of hex digits between { } is
- // arbitrary. \ and u have already been read.
- if (current() == '{' && unicode()) {
- int start = position();
- Advance();
- if (ParseUnlimitedLengthHexNumber(0x10FFFF, value)) {
- if (current() == '}') {
- Advance();
- return true;
- }
- }
- Reset(start);
- return false;
- }
- // \u but no {, or \u{...} escapes not allowed.
- bool result = ParseHexEscape(4, value);
- if (result && unicode() && unibrow::Utf16::IsLeadSurrogate(*value) &&
- current() == '\\') {
- // Attempt to read trail surrogate.
- int start = position();
- if (Next() == 'u') {
- Advance(2);
- uc32 trail;
- if (ParseHexEscape(4, &trail) &&
- unibrow::Utf16::IsTrailSurrogate(trail)) {
- *value = unibrow::Utf16::CombineSurrogatePair(static_cast<uc16>(*value),
- static_cast<uc16>(trail));
- return true;
- }
- }
- Reset(start);
- }
- return result;
-}
-
-#ifdef V8_INTL_SUPPORT
-
-namespace {
-
-bool IsExactPropertyAlias(const char* property_name, UProperty property) {
- const char* short_name = u_getPropertyName(property, U_SHORT_PROPERTY_NAME);
- if (short_name != nullptr && strcmp(property_name, short_name) == 0)
- return true;
- for (int i = 0;; i++) {
- const char* long_name = u_getPropertyName(
- property, static_cast<UPropertyNameChoice>(U_LONG_PROPERTY_NAME + i));
- if (long_name == nullptr) break;
- if (strcmp(property_name, long_name) == 0) return true;
- }
- return false;
-}
-
-bool IsExactPropertyValueAlias(const char* property_value_name,
- UProperty property, int32_t property_value) {
- const char* short_name =
- u_getPropertyValueName(property, property_value, U_SHORT_PROPERTY_NAME);
- if (short_name != nullptr && strcmp(property_value_name, short_name) == 0) {
- return true;
- }
- for (int i = 0;; i++) {
- const char* long_name = u_getPropertyValueName(
- property, property_value,
- static_cast<UPropertyNameChoice>(U_LONG_PROPERTY_NAME + i));
- if (long_name == nullptr) break;
- if (strcmp(property_value_name, long_name) == 0) return true;
- }
- return false;
-}
-
-bool LookupPropertyValueName(UProperty property,
- const char* property_value_name, bool negate,
- ZoneList<CharacterRange>* result, Zone* zone) {
- UProperty property_for_lookup = property;
- if (property_for_lookup == UCHAR_SCRIPT_EXTENSIONS) {
- // For the property Script_Extensions, we have to do the property value
- // name lookup as if the property is Script.
- property_for_lookup = UCHAR_SCRIPT;
- }
- int32_t property_value =
- u_getPropertyValueEnum(property_for_lookup, property_value_name);
- if (property_value == UCHAR_INVALID_CODE) return false;
-
- // We require the property name to match exactly to one of the property value
- // aliases. However, u_getPropertyValueEnum uses loose matching.
- if (!IsExactPropertyValueAlias(property_value_name, property_for_lookup,
- property_value)) {
- return false;
- }
-
- UErrorCode ec = U_ZERO_ERROR;
- icu::UnicodeSet set;
- set.applyIntPropertyValue(property, property_value, ec);
- bool success = ec == U_ZERO_ERROR && !set.isEmpty();
-
- if (success) {
- set.removeAllStrings();
- if (negate) set.complement();
- for (int i = 0; i < set.getRangeCount(); i++) {
- result->Add(
- CharacterRange::Range(set.getRangeStart(i), set.getRangeEnd(i)),
- zone);
- }
- }
- return success;
-}
-
-template <size_t N>
-inline bool NameEquals(const char* name, const char (&literal)[N]) {
- return strncmp(name, literal, N + 1) == 0;
-}
-
-bool LookupSpecialPropertyValueName(const char* name,
- ZoneList<CharacterRange>* result,
- bool negate, Zone* zone) {
- if (NameEquals(name, "Any")) {
- if (negate) {
- // Leave the list of character ranges empty, since the negation of 'Any'
- // is the empty set.
- } else {
- result->Add(CharacterRange::Everything(), zone);
- }
- } else if (NameEquals(name, "ASCII")) {
- result->Add(negate ? CharacterRange::Range(0x80, String::kMaxCodePoint)
- : CharacterRange::Range(0x0, 0x7F),
- zone);
- } else if (NameEquals(name, "Assigned")) {
- return LookupPropertyValueName(UCHAR_GENERAL_CATEGORY, "Unassigned",
- !negate, result, zone);
- } else {
- return false;
- }
- return true;
-}
-
-// Explicitly whitelist supported binary properties. The spec forbids supporting
-// properties outside of this set to ensure interoperability.
-bool IsSupportedBinaryProperty(UProperty property) {
- switch (property) {
- case UCHAR_ALPHABETIC:
- // 'Any' is not supported by ICU. See LookupSpecialPropertyValueName.
- // 'ASCII' is not supported by ICU. See LookupSpecialPropertyValueName.
- case UCHAR_ASCII_HEX_DIGIT:
- // 'Assigned' is not supported by ICU. See LookupSpecialPropertyValueName.
- case UCHAR_BIDI_CONTROL:
- case UCHAR_BIDI_MIRRORED:
- case UCHAR_CASE_IGNORABLE:
- case UCHAR_CASED:
- case UCHAR_CHANGES_WHEN_CASEFOLDED:
- case UCHAR_CHANGES_WHEN_CASEMAPPED:
- case UCHAR_CHANGES_WHEN_LOWERCASED:
- case UCHAR_CHANGES_WHEN_NFKC_CASEFOLDED:
- case UCHAR_CHANGES_WHEN_TITLECASED:
- case UCHAR_CHANGES_WHEN_UPPERCASED:
- case UCHAR_DASH:
- case UCHAR_DEFAULT_IGNORABLE_CODE_POINT:
- case UCHAR_DEPRECATED:
- case UCHAR_DIACRITIC:
- case UCHAR_EMOJI:
- case UCHAR_EMOJI_COMPONENT:
- case UCHAR_EMOJI_MODIFIER_BASE:
- case UCHAR_EMOJI_MODIFIER:
- case UCHAR_EMOJI_PRESENTATION:
- case UCHAR_EXTENDED_PICTOGRAPHIC:
- case UCHAR_EXTENDER:
- case UCHAR_GRAPHEME_BASE:
- case UCHAR_GRAPHEME_EXTEND:
- case UCHAR_HEX_DIGIT:
- case UCHAR_ID_CONTINUE:
- case UCHAR_ID_START:
- case UCHAR_IDEOGRAPHIC:
- case UCHAR_IDS_BINARY_OPERATOR:
- case UCHAR_IDS_TRINARY_OPERATOR:
- case UCHAR_JOIN_CONTROL:
- case UCHAR_LOGICAL_ORDER_EXCEPTION:
- case UCHAR_LOWERCASE:
- case UCHAR_MATH:
- case UCHAR_NONCHARACTER_CODE_POINT:
- case UCHAR_PATTERN_SYNTAX:
- case UCHAR_PATTERN_WHITE_SPACE:
- case UCHAR_QUOTATION_MARK:
- case UCHAR_RADICAL:
- case UCHAR_REGIONAL_INDICATOR:
- case UCHAR_S_TERM:
- case UCHAR_SOFT_DOTTED:
- case UCHAR_TERMINAL_PUNCTUATION:
- case UCHAR_UNIFIED_IDEOGRAPH:
- case UCHAR_UPPERCASE:
- case UCHAR_VARIATION_SELECTOR:
- case UCHAR_WHITE_SPACE:
- case UCHAR_XID_CONTINUE:
- case UCHAR_XID_START:
- return true;
- default:
- break;
- }
- return false;
-}
-
-bool IsUnicodePropertyValueCharacter(char c) {
- // https://tc39.github.io/proposal-regexp-unicode-property-escapes/
- //
- // Note that using this to validate each parsed char is quite conservative.
- // A possible alternative solution would be to only ensure the parsed
- // property name/value candidate string does not contain '\0' characters and
- // let ICU lookups trigger the final failure.
- if ('a' <= c && c <= 'z') return true;
- if ('A' <= c && c <= 'Z') return true;
- if ('0' <= c && c <= '9') return true;
- return (c == '_');
-}
-
-} // anonymous namespace
-
-bool RegExpParser::ParsePropertyClassName(ZoneVector<char>* name_1,
- ZoneVector<char>* name_2) {
- DCHECK(name_1->empty());
- DCHECK(name_2->empty());
- // Parse the property class as follows:
- // - In \p{name}, 'name' is interpreted
- // - either as a general category property value name.
- // - or as a binary property name.
- // - In \p{name=value}, 'name' is interpreted as an enumerated property name,
- // and 'value' is interpreted as one of the available property value names.
- // - Aliases in PropertyAlias.txt and PropertyValueAlias.txt can be used.
- // - Loose matching is not applied.
- if (current() == '{') {
- // Parse \p{[PropertyName=]PropertyNameValue}
- for (Advance(); current() != '}' && current() != '='; Advance()) {
- if (!IsUnicodePropertyValueCharacter(current())) return false;
- if (!has_next()) return false;
- name_1->push_back(static_cast<char>(current()));
- }
- if (current() == '=') {
- for (Advance(); current() != '}'; Advance()) {
- if (!IsUnicodePropertyValueCharacter(current())) return false;
- if (!has_next()) return false;
- name_2->push_back(static_cast<char>(current()));
- }
- name_2->push_back(0); // null-terminate string.
- }
- } else {
- return false;
- }
- Advance();
- name_1->push_back(0); // null-terminate string.
-
- DCHECK(name_1->size() - 1 == std::strlen(name_1->data()));
- DCHECK(name_2->empty() || name_2->size() - 1 == std::strlen(name_2->data()));
- return true;
-}
-
-bool RegExpParser::AddPropertyClassRange(ZoneList<CharacterRange>* add_to,
- bool negate,
- const ZoneVector<char>& name_1,
- const ZoneVector<char>& name_2) {
- if (name_2.empty()) {
- // First attempt to interpret as general category property value name.
- const char* name = name_1.data();
- if (LookupPropertyValueName(UCHAR_GENERAL_CATEGORY_MASK, name, negate,
- add_to, zone())) {
- return true;
- }
- // Interpret "Any", "ASCII", and "Assigned".
- if (LookupSpecialPropertyValueName(name, add_to, negate, zone())) {
- return true;
- }
- // Then attempt to interpret as binary property name with value name 'Y'.
- UProperty property = u_getPropertyEnum(name);
- if (!IsSupportedBinaryProperty(property)) return false;
- if (!IsExactPropertyAlias(name, property)) return false;
- return LookupPropertyValueName(property, negate ? "N" : "Y", false, add_to,
- zone());
- } else {
- // Both property name and value name are specified. Attempt to interpret
- // the property name as enumerated property.
- const char* property_name = name_1.data();
- const char* value_name = name_2.data();
- UProperty property = u_getPropertyEnum(property_name);
- if (!IsExactPropertyAlias(property_name, property)) return false;
- if (property == UCHAR_GENERAL_CATEGORY) {
- // We want to allow aggregate value names such as "Letter".
- property = UCHAR_GENERAL_CATEGORY_MASK;
- } else if (property != UCHAR_SCRIPT &&
- property != UCHAR_SCRIPT_EXTENSIONS) {
- return false;
- }
- return LookupPropertyValueName(property, value_name, negate, add_to,
- zone());
- }
-}
-
-RegExpTree* RegExpParser::GetPropertySequence(const ZoneVector<char>& name_1) {
- if (!FLAG_harmony_regexp_sequence) return nullptr;
- const char* name = name_1.data();
- const uc32* sequence_list = nullptr;
- JSRegExp::Flags flags = JSRegExp::kUnicode;
- if (NameEquals(name, "Emoji_Flag_Sequence")) {
- sequence_list = UnicodePropertySequences::kEmojiFlagSequences;
- } else if (NameEquals(name, "Emoji_Tag_Sequence")) {
- sequence_list = UnicodePropertySequences::kEmojiTagSequences;
- } else if (NameEquals(name, "Emoji_ZWJ_Sequence")) {
- sequence_list = UnicodePropertySequences::kEmojiZWJSequences;
- }
- if (sequence_list != nullptr) {
- // TODO(yangguo): this creates huge regexp code. Alternative to this is
- // to create a new operator that checks for these sequences at runtime.
- RegExpBuilder builder(zone(), flags);
- while (true) { // Iterate through list of sequences.
- while (*sequence_list != 0) { // Iterate through sequence.
- builder.AddUnicodeCharacter(*sequence_list);
- sequence_list++;
- }
- sequence_list++;
- if (*sequence_list == 0) break;
- builder.NewAlternative();
- }
- return builder.ToRegExp();
- }
-
- if (NameEquals(name, "Emoji_Keycap_Sequence")) {
- // https://unicode.org/reports/tr51/#def_emoji_keycap_sequence
- // emoji_keycap_sequence := [0-9#*] \x{FE0F 20E3}
- RegExpBuilder builder(zone(), flags);
- ZoneList<CharacterRange>* prefix_ranges =
- new (zone()) ZoneList<CharacterRange>(2, zone());
- prefix_ranges->Add(CharacterRange::Range('0', '9'), zone());
- prefix_ranges->Add(CharacterRange::Singleton('#'), zone());
- prefix_ranges->Add(CharacterRange::Singleton('*'), zone());
- builder.AddCharacterClass(
- new (zone()) RegExpCharacterClass(zone(), prefix_ranges, flags));
- builder.AddCharacter(0xFE0F);
- builder.AddCharacter(0x20E3);
- return builder.ToRegExp();
- } else if (NameEquals(name, "Emoji_Modifier_Sequence")) {
- // https://unicode.org/reports/tr51/#def_emoji_modifier_sequence
- // emoji_modifier_sequence := emoji_modifier_base emoji_modifier
- RegExpBuilder builder(zone(), flags);
- ZoneList<CharacterRange>* modifier_base_ranges =
- new (zone()) ZoneList<CharacterRange>(2, zone());
- LookupPropertyValueName(UCHAR_EMOJI_MODIFIER_BASE, "Y", false,
- modifier_base_ranges, zone());
- builder.AddCharacterClass(
- new (zone()) RegExpCharacterClass(zone(), modifier_base_ranges, flags));
- ZoneList<CharacterRange>* modifier_ranges =
- new (zone()) ZoneList<CharacterRange>(2, zone());
- LookupPropertyValueName(UCHAR_EMOJI_MODIFIER, "Y", false, modifier_ranges,
- zone());
- builder.AddCharacterClass(
- new (zone()) RegExpCharacterClass(zone(), modifier_ranges, flags));
- return builder.ToRegExp();
- }
-
- return nullptr;
-}
-
-#else // V8_INTL_SUPPORT
-
-bool RegExpParser::ParsePropertyClassName(ZoneVector<char>* name_1,
- ZoneVector<char>* name_2) {
- return false;
-}
-
-bool RegExpParser::AddPropertyClassRange(ZoneList<CharacterRange>* add_to,
- bool negate,
- const ZoneVector<char>& name_1,
- const ZoneVector<char>& name_2) {
- return false;
-}
-
-RegExpTree* RegExpParser::GetPropertySequence(const ZoneVector<char>& name) {
- return nullptr;
-}
-
-#endif // V8_INTL_SUPPORT
-
-bool RegExpParser::ParseUnlimitedLengthHexNumber(int max_value, uc32* value) {
- uc32 x = 0;
- int d = HexValue(current());
- if (d < 0) {
- return false;
- }
- while (d >= 0) {
- x = x * 16 + d;
- if (x > max_value) {
- return false;
- }
- Advance();
- d = HexValue(current());
- }
- *value = x;
- return true;
-}
-
-
-uc32 RegExpParser::ParseClassCharacterEscape() {
- DCHECK_EQ('\\', current());
- DCHECK(has_next() && !IsSpecialClassEscape(Next()));
- Advance();
- switch (current()) {
- case 'b':
- Advance();
- return '\b';
- // ControlEscape :: one of
- // f n r t v
- case 'f':
- Advance();
- return '\f';
- case 'n':
- Advance();
- return '\n';
- case 'r':
- Advance();
- return '\r';
- case 't':
- Advance();
- return '\t';
- case 'v':
- Advance();
- return '\v';
- case 'c': {
- uc32 controlLetter = Next();
- uc32 letter = controlLetter & ~('A' ^ 'a');
- // Inside a character class, we also accept digits and underscore as
- // control characters, unless with /u. See Annex B:
- // ES#prod-annexB-ClassControlLetter
- if (letter >= 'A' && letter <= 'Z') {
- Advance(2);
- // Control letters mapped to ASCII control characters in the range
- // 0x00-0x1F.
- return controlLetter & 0x1F;
- }
- if (unicode()) {
- // With /u, invalid escapes are not treated as identity escapes.
- ReportError(RegExpError::kInvalidClassEscape);
- return 0;
- }
- if ((controlLetter >= '0' && controlLetter <= '9') ||
- controlLetter == '_') {
- Advance(2);
- return controlLetter & 0x1F;
- }
- // We match JSC in reading the backslash as a literal
- // character instead of as starting an escape.
- // TODO(v8:6201): Not yet covered by the spec.
- return '\\';
- }
- case '0':
- // With /u, \0 is interpreted as NUL if not followed by another digit.
- if (unicode() && !(Next() >= '0' && Next() <= '9')) {
- Advance();
- return 0;
- }
- V8_FALLTHROUGH;
- case '1':
- case '2':
- case '3':
- case '4':
- case '5':
- case '6':
- case '7':
- // For compatibility, we interpret a decimal escape that isn't
- // a back reference (and therefore either \0 or not valid according
- // to the specification) as a 1..3 digit octal character code.
- // ES#prod-annexB-LegacyOctalEscapeSequence
- if (unicode()) {
- // With /u, decimal escape is not interpreted as octal character code.
- ReportError(RegExpError::kInvalidClassEscape);
- return 0;
- }
- return ParseOctalLiteral();
- case 'x': {
- Advance();
- uc32 value;
- if (ParseHexEscape(2, &value)) return value;
- if (unicode()) {
- // With /u, invalid escapes are not treated as identity escapes.
- ReportError(RegExpError::kInvalidEscape);
- return 0;
- }
- // If \x is not followed by a two-digit hexadecimal, treat it
- // as an identity escape.
- return 'x';
- }
- case 'u': {
- Advance();
- uc32 value;
- if (ParseUnicodeEscape(&value)) return value;
- if (unicode()) {
- // With /u, invalid escapes are not treated as identity escapes.
- ReportError(RegExpError::kInvalidUnicodeEscape);
- return 0;
- }
- // If \u is not followed by a two-digit hexadecimal, treat it
- // as an identity escape.
- return 'u';
- }
- default: {
- uc32 result = current();
- // With /u, no identity escapes except for syntax characters and '-' are
- // allowed. Otherwise, all identity escapes are allowed.
- if (!unicode() || IsSyntaxCharacterOrSlash(result) || result == '-') {
- Advance();
- return result;
- }
- ReportError(RegExpError::kInvalidEscape);
- return 0;
- }
- }
- UNREACHABLE();
-}
-
-void RegExpParser::ParseClassEscape(ZoneList<CharacterRange>* ranges,
- Zone* zone,
- bool add_unicode_case_equivalents,
- uc32* char_out, bool* is_class_escape) {
- uc32 current_char = current();
- if (current_char == '\\') {
- switch (Next()) {
- case 'w':
- case 'W':
- case 'd':
- case 'D':
- case 's':
- case 'S': {
- CharacterRange::AddClassEscape(static_cast<char>(Next()), ranges,
- add_unicode_case_equivalents, zone);
- Advance(2);
- *is_class_escape = true;
- return;
- }
- case kEndMarker:
- ReportError(RegExpError::kEscapeAtEndOfPattern);
- return;
- case 'p':
- case 'P':
- if (unicode()) {
- bool negate = Next() == 'P';
- Advance(2);
- ZoneVector<char> name_1(zone);
- ZoneVector<char> name_2(zone);
- if (!ParsePropertyClassName(&name_1, &name_2) ||
- !AddPropertyClassRange(ranges, negate, name_1, name_2)) {
- ReportError(RegExpError::kInvalidClassPropertyName);
- }
- *is_class_escape = true;
- return;
- }
- break;
- default:
- break;
- }
- *char_out = ParseClassCharacterEscape();
- *is_class_escape = false;
- } else {
- Advance();
- *char_out = current_char;
- *is_class_escape = false;
- }
-}
-
-RegExpTree* RegExpParser::ParseCharacterClass(const RegExpBuilder* builder) {
- DCHECK_EQ(current(), '[');
- Advance();
- bool is_negated = false;
- if (current() == '^') {
- is_negated = true;
- Advance();
- }
- ZoneList<CharacterRange>* ranges =
- new (zone()) ZoneList<CharacterRange>(2, zone());
- bool add_unicode_case_equivalents = unicode() && builder->ignore_case();
- while (has_more() && current() != ']') {
- uc32 char_1, char_2;
- bool is_class_1, is_class_2;
- ParseClassEscape(ranges, zone(), add_unicode_case_equivalents, &char_1,
- &is_class_1 CHECK_FAILED);
- if (current() == '-') {
- Advance();
- if (current() == kEndMarker) {
- // If we reach the end we break out of the loop and let the
- // following code report an error.
- break;
- } else if (current() == ']') {
- if (!is_class_1) ranges->Add(CharacterRange::Singleton(char_1), zone());
- ranges->Add(CharacterRange::Singleton('-'), zone());
- break;
- }
- ParseClassEscape(ranges, zone(), add_unicode_case_equivalents, &char_2,
- &is_class_2 CHECK_FAILED);
- if (is_class_1 || is_class_2) {
- // Either end is an escaped character class. Treat the '-' verbatim.
- if (unicode()) {
- // ES2015 21.2.2.15.1 step 1.
- return ReportError(RegExpError::kInvalidCharacterClass);
- }
- if (!is_class_1) ranges->Add(CharacterRange::Singleton(char_1), zone());
- ranges->Add(CharacterRange::Singleton('-'), zone());
- if (!is_class_2) ranges->Add(CharacterRange::Singleton(char_2), zone());
- continue;
- }
- // ES2015 21.2.2.15.1 step 6.
- if (char_1 > char_2) {
- return ReportError(RegExpError::kOutOfOrderCharacterClass);
- }
- ranges->Add(CharacterRange::Range(char_1, char_2), zone());
- } else {
- if (!is_class_1) ranges->Add(CharacterRange::Singleton(char_1), zone());
- }
- }
- if (!has_more()) {
- return ReportError(RegExpError::kUnterminatedCharacterClass);
- }
- Advance();
- RegExpCharacterClass::CharacterClassFlags character_class_flags;
- if (is_negated) character_class_flags = RegExpCharacterClass::NEGATED;
- return new (zone()) RegExpCharacterClass(zone(), ranges, builder->flags(),
- character_class_flags);
-}
-
-
-#undef CHECK_FAILED
-
-
-bool RegExpParser::ParseRegExp(Isolate* isolate, Zone* zone,
- FlatStringReader* input, JSRegExp::Flags flags,
- RegExpCompileData* result) {
- DCHECK(result != nullptr);
- RegExpParser parser(input, flags, isolate, zone);
- RegExpTree* tree = parser.ParsePattern();
- if (parser.failed()) {
- DCHECK(tree == nullptr);
- DCHECK(parser.error_ != RegExpError::kNone);
- result->error = parser.error_;
- result->error_pos = parser.error_pos_;
- } else {
- DCHECK(tree != nullptr);
- DCHECK(parser.error_ == RegExpError::kNone);
- if (FLAG_trace_regexp_parser) {
- StdoutStream os;
- tree->Print(os, zone);
- os << "\n";
- }
- result->tree = tree;
- int capture_count = parser.captures_started();
- result->simple = tree->IsAtom() && parser.simple() && capture_count == 0;
- result->contains_anchor = parser.contains_anchor();
- result->capture_name_map = parser.CreateCaptureNameMap();
- result->capture_count = capture_count;
- }
- return !parser.failed();
-}
-
-RegExpBuilder::RegExpBuilder(Zone* zone, JSRegExp::Flags flags)
- : zone_(zone),
- pending_empty_(false),
- flags_(flags),
- characters_(nullptr),
- pending_surrogate_(kNoPendingSurrogate),
- terms_(),
- alternatives_()
-#ifdef DEBUG
- ,
- last_added_(ADD_NONE)
-#endif
-{
-}
-
-
-void RegExpBuilder::AddLeadSurrogate(uc16 lead_surrogate) {
- DCHECK(unibrow::Utf16::IsLeadSurrogate(lead_surrogate));
- FlushPendingSurrogate();
- // Hold onto the lead surrogate, waiting for a trail surrogate to follow.
- pending_surrogate_ = lead_surrogate;
-}
-
-
-void RegExpBuilder::AddTrailSurrogate(uc16 trail_surrogate) {
- DCHECK(unibrow::Utf16::IsTrailSurrogate(trail_surrogate));
- if (pending_surrogate_ != kNoPendingSurrogate) {
- uc16 lead_surrogate = pending_surrogate_;
- pending_surrogate_ = kNoPendingSurrogate;
- DCHECK(unibrow::Utf16::IsLeadSurrogate(lead_surrogate));
- uc32 combined =
- unibrow::Utf16::CombineSurrogatePair(lead_surrogate, trail_surrogate);
- if (NeedsDesugaringForIgnoreCase(combined)) {
- AddCharacterClassForDesugaring(combined);
- } else {
- ZoneList<uc16> surrogate_pair(2, zone());
- surrogate_pair.Add(lead_surrogate, zone());
- surrogate_pair.Add(trail_surrogate, zone());
- RegExpAtom* atom =
- new (zone()) RegExpAtom(surrogate_pair.ToConstVector(), flags_);
- AddAtom(atom);
- }
- } else {
- pending_surrogate_ = trail_surrogate;
- FlushPendingSurrogate();
- }
-}
-
-
-void RegExpBuilder::FlushPendingSurrogate() {
- if (pending_surrogate_ != kNoPendingSurrogate) {
- DCHECK(unicode());
- uc32 c = pending_surrogate_;
- pending_surrogate_ = kNoPendingSurrogate;
- AddCharacterClassForDesugaring(c);
- }
-}
-
-
-void RegExpBuilder::FlushCharacters() {
- FlushPendingSurrogate();
- pending_empty_ = false;
- if (characters_ != nullptr) {
- RegExpTree* atom =
- new (zone()) RegExpAtom(characters_->ToConstVector(), flags_);
- characters_ = nullptr;
- text_.Add(atom, zone());
- LAST(ADD_ATOM);
- }
-}
-
-
-void RegExpBuilder::FlushText() {
- FlushCharacters();
- int num_text = text_.length();
- if (num_text == 0) {
- return;
- } else if (num_text == 1) {
- terms_.Add(text_.last(), zone());
- } else {
- RegExpText* text = new (zone()) RegExpText(zone());
- for (int i = 0; i < num_text; i++) text_.Get(i)->AppendToText(text, zone());
- terms_.Add(text, zone());
- }
- text_.Clear();
-}
-
-
-void RegExpBuilder::AddCharacter(uc16 c) {
- FlushPendingSurrogate();
- pending_empty_ = false;
- if (NeedsDesugaringForIgnoreCase(c)) {
- AddCharacterClassForDesugaring(c);
- } else {
- if (characters_ == nullptr) {
- characters_ = new (zone()) ZoneList<uc16>(4, zone());
- }
- characters_->Add(c, zone());
- LAST(ADD_CHAR);
- }
-}
-
-
-void RegExpBuilder::AddUnicodeCharacter(uc32 c) {
- if (c > static_cast<uc32>(unibrow::Utf16::kMaxNonSurrogateCharCode)) {
- DCHECK(unicode());
- AddLeadSurrogate(unibrow::Utf16::LeadSurrogate(c));
- AddTrailSurrogate(unibrow::Utf16::TrailSurrogate(c));
- } else if (unicode() && unibrow::Utf16::IsLeadSurrogate(c)) {
- AddLeadSurrogate(c);
- } else if (unicode() && unibrow::Utf16::IsTrailSurrogate(c)) {
- AddTrailSurrogate(c);
- } else {
- AddCharacter(static_cast<uc16>(c));
- }
-}
-
-void RegExpBuilder::AddEscapedUnicodeCharacter(uc32 character) {
- // A lead or trail surrogate parsed via escape sequence will not
- // pair up with any preceding lead or following trail surrogate.
- FlushPendingSurrogate();
- AddUnicodeCharacter(character);
- FlushPendingSurrogate();
-}
-
-void RegExpBuilder::AddEmpty() { pending_empty_ = true; }
-
-
-void RegExpBuilder::AddCharacterClass(RegExpCharacterClass* cc) {
- if (NeedsDesugaringForUnicode(cc)) {
- // With /u, character class needs to be desugared, so it
- // must be a standalone term instead of being part of a RegExpText.
- AddTerm(cc);
- } else {
- AddAtom(cc);
- }
-}
-
-void RegExpBuilder::AddCharacterClassForDesugaring(uc32 c) {
- AddTerm(new (zone()) RegExpCharacterClass(
- zone(), CharacterRange::List(zone(), CharacterRange::Singleton(c)),
- flags_));
-}
-
-
-void RegExpBuilder::AddAtom(RegExpTree* term) {
- if (term->IsEmpty()) {
- AddEmpty();
- return;
- }
- if (term->IsTextElement()) {
- FlushCharacters();
- text_.Add(term, zone());
- } else {
- FlushText();
- terms_.Add(term, zone());
- }
- LAST(ADD_ATOM);
-}
-
-
-void RegExpBuilder::AddTerm(RegExpTree* term) {
- FlushText();
- terms_.Add(term, zone());
- LAST(ADD_ATOM);
-}
-
-
-void RegExpBuilder::AddAssertion(RegExpTree* assert) {
- FlushText();
- terms_.Add(assert, zone());
- LAST(ADD_ASSERT);
-}
-
-
-void RegExpBuilder::NewAlternative() { FlushTerms(); }
-
-
-void RegExpBuilder::FlushTerms() {
- FlushText();
- int num_terms = terms_.length();
- RegExpTree* alternative;
- if (num_terms == 0) {
- alternative = new (zone()) RegExpEmpty();
- } else if (num_terms == 1) {
- alternative = terms_.last();
- } else {
- alternative = new (zone()) RegExpAlternative(terms_.GetList(zone()));
- }
- alternatives_.Add(alternative, zone());
- terms_.Clear();
- LAST(ADD_NONE);
-}
-
-
-bool RegExpBuilder::NeedsDesugaringForUnicode(RegExpCharacterClass* cc) {
- if (!unicode()) return false;
- // TODO(yangguo): we could be smarter than this. Case-insensitivity does not
- // necessarily mean that we need to desugar. It's probably nicer to have a
- // separate pass to figure out unicode desugarings.
- if (ignore_case()) return true;
- ZoneList<CharacterRange>* ranges = cc->ranges(zone());
- CharacterRange::Canonicalize(ranges);
- for (int i = ranges->length() - 1; i >= 0; i--) {
- uc32 from = ranges->at(i).from();
- uc32 to = ranges->at(i).to();
- // Check for non-BMP characters.
- if (to >= kNonBmpStart) return true;
- // Check for lone surrogates.
- if (from <= kTrailSurrogateEnd && to >= kLeadSurrogateStart) return true;
- }
- return false;
-}
-
-
-bool RegExpBuilder::NeedsDesugaringForIgnoreCase(uc32 c) {
-#ifdef V8_INTL_SUPPORT
- if (unicode() && ignore_case()) {
- icu::UnicodeSet set(c, c);
- set.closeOver(USET_CASE_INSENSITIVE);
- set.removeAllStrings();
- return set.size() > 1;
- }
- // In the case where ICU is not included, we act as if the unicode flag is
- // not set, and do not desugar.
-#endif // V8_INTL_SUPPORT
- return false;
-}
-
-
-RegExpTree* RegExpBuilder::ToRegExp() {
- FlushTerms();
- int num_alternatives = alternatives_.length();
- if (num_alternatives == 0) return new (zone()) RegExpEmpty();
- if (num_alternatives == 1) return alternatives_.last();
- return new (zone()) RegExpDisjunction(alternatives_.GetList(zone()));
-}
-
-bool RegExpBuilder::AddQuantifierToAtom(
- int min, int max, RegExpQuantifier::QuantifierType quantifier_type) {
- FlushPendingSurrogate();
- if (pending_empty_) {
- pending_empty_ = false;
- return true;
- }
- RegExpTree* atom;
- if (characters_ != nullptr) {
- DCHECK(last_added_ == ADD_CHAR);
- // Last atom was character.
- Vector<const uc16> char_vector = characters_->ToConstVector();
- int num_chars = char_vector.length();
- if (num_chars > 1) {
- Vector<const uc16> prefix = char_vector.SubVector(0, num_chars - 1);
- text_.Add(new (zone()) RegExpAtom(prefix, flags_), zone());
- char_vector = char_vector.SubVector(num_chars - 1, num_chars);
- }
- characters_ = nullptr;
- atom = new (zone()) RegExpAtom(char_vector, flags_);
- FlushText();
- } else if (text_.length() > 0) {
- DCHECK(last_added_ == ADD_ATOM);
- atom = text_.RemoveLast();
- FlushText();
- } else if (terms_.length() > 0) {
- DCHECK(last_added_ == ADD_ATOM);
- atom = terms_.RemoveLast();
- if (atom->IsLookaround()) {
- // With /u, lookarounds are not quantifiable.
- if (unicode()) return false;
- // Lookbehinds are not quantifiable.
- if (atom->AsLookaround()->type() == RegExpLookaround::LOOKBEHIND) {
- return false;
- }
- }
- if (atom->max_match() == 0) {
- // Guaranteed to only match an empty string.
- LAST(ADD_TERM);
- if (min == 0) {
- return true;
- }
- terms_.Add(atom, zone());
- return true;
- }
- } else {
- // Only call immediately after adding an atom or character!
- UNREACHABLE();
- }
- terms_.Add(new (zone()) RegExpQuantifier(min, max, quantifier_type, atom),
- zone());
- LAST(ADD_TERM);
- return true;
-}
-
-} // namespace internal
-} // namespace v8
diff --git a/js/src/new-regexp/regexp-parser.h b/js/src/new-regexp/regexp-parser.h
deleted file mode 100644
index 1b2a9fe18..000000000
--- a/js/src/new-regexp/regexp-parser.h
+++ /dev/null
@@ -1,363 +0,0 @@
-// Copyright 2016 the V8 project authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#ifndef V8_REGEXP_REGEXP_PARSER_H_
-#define V8_REGEXP_REGEXP_PARSER_H_
-
-#include "new-regexp/regexp-ast.h"
-#include "new-regexp/regexp-error.h"
-
-namespace v8 {
-namespace internal {
-
-struct RegExpCompileData;
-
-// A BufferedZoneList is an automatically growing list, just like (and backed
-// by) a ZoneList, that is optimized for the case of adding and removing
-// a single element. The last element added is stored outside the backing list,
-// and if no more than one element is ever added, the ZoneList isn't even
-// allocated.
-// Elements must not be nullptr pointers.
-template <typename T, int initial_size>
-class BufferedZoneList {
- public:
- BufferedZoneList() : list_(nullptr), last_(nullptr) {}
-
- // Adds element at end of list. This element is buffered and can
- // be read using last() or removed using RemoveLast until a new Add or until
- // RemoveLast or GetList has been called.
- void Add(T* value, Zone* zone) {
- if (last_ != nullptr) {
- if (list_ == nullptr) {
- list_ = new (zone) ZoneList<T*>(initial_size, zone);
- }
- list_->Add(last_, zone);
- }
- last_ = value;
- }
-
- T* last() {
- DCHECK(last_ != nullptr);
- return last_;
- }
-
- T* RemoveLast() {
- DCHECK(last_ != nullptr);
- T* result = last_;
- if ((list_ != nullptr) && (list_->length() > 0))
- last_ = list_->RemoveLast();
- else
- last_ = nullptr;
- return result;
- }
-
- T* Get(int i) {
- DCHECK((0 <= i) && (i < length()));
- if (list_ == nullptr) {
- DCHECK_EQ(0, i);
- return last_;
- } else {
- if (i == list_->length()) {
- DCHECK(last_ != nullptr);
- return last_;
- } else {
- return list_->at(i);
- }
- }
- }
-
- void Clear() {
- list_ = nullptr;
- last_ = nullptr;
- }
-
- int length() {
- int length = (list_ == nullptr) ? 0 : list_->length();
- return length + ((last_ == nullptr) ? 0 : 1);
- }
-
- ZoneList<T*>* GetList(Zone* zone) {
- if (list_ == nullptr) {
- list_ = new (zone) ZoneList<T*>(initial_size, zone);
- }
- if (last_ != nullptr) {
- list_->Add(last_, zone);
- last_ = nullptr;
- }
- return list_;
- }
-
- private:
- ZoneList<T*>* list_;
- T* last_;
-};
-
-
-// Accumulates RegExp atoms and assertions into lists of terms and alternatives.
-class RegExpBuilder : public ZoneObject {
- public:
- RegExpBuilder(Zone* zone, JSRegExp::Flags flags);
- void AddCharacter(uc16 character);
- void AddUnicodeCharacter(uc32 character);
- void AddEscapedUnicodeCharacter(uc32 character);
- // "Adds" an empty expression. Does nothing except consume a
- // following quantifier
- void AddEmpty();
- void AddCharacterClass(RegExpCharacterClass* cc);
- void AddCharacterClassForDesugaring(uc32 c);
- void AddAtom(RegExpTree* tree);
- void AddTerm(RegExpTree* tree);
- void AddAssertion(RegExpTree* tree);
- void NewAlternative(); // '|'
- bool AddQuantifierToAtom(int min, int max,
- RegExpQuantifier::QuantifierType type);
- void FlushText();
- RegExpTree* ToRegExp();
- JSRegExp::Flags flags() const { return flags_; }
- void set_flags(JSRegExp::Flags flags) { flags_ = flags; }
-
- bool ignore_case() const { return (flags_ & JSRegExp::kIgnoreCase) != 0; }
- bool multiline() const { return (flags_ & JSRegExp::kMultiline) != 0; }
- bool dotall() const { return (flags_ & JSRegExp::kDotAll) != 0; }
-
- private:
- static const uc16 kNoPendingSurrogate = 0;
- void AddLeadSurrogate(uc16 lead_surrogate);
- void AddTrailSurrogate(uc16 trail_surrogate);
- void FlushPendingSurrogate();
- void FlushCharacters();
- void FlushTerms();
- bool NeedsDesugaringForUnicode(RegExpCharacterClass* cc);
- bool NeedsDesugaringForIgnoreCase(uc32 c);
- Zone* zone() const { return zone_; }
- bool unicode() const { return (flags_ & JSRegExp::kUnicode) != 0; }
-
- Zone* zone_;
- bool pending_empty_;
- JSRegExp::Flags flags_;
- ZoneList<uc16>* characters_;
- uc16 pending_surrogate_;
- BufferedZoneList<RegExpTree, 2> terms_;
- BufferedZoneList<RegExpTree, 2> text_;
- BufferedZoneList<RegExpTree, 2> alternatives_;
-#ifdef DEBUG
- enum { ADD_NONE, ADD_CHAR, ADD_TERM, ADD_ASSERT, ADD_ATOM } last_added_;
-#define LAST(x) last_added_ = x;
-#else
-#define LAST(x)
-#endif
-};
-
-class V8_EXPORT_PRIVATE RegExpParser {
- public:
- RegExpParser(FlatStringReader* in, JSRegExp::Flags flags, Isolate* isolate,
- Zone* zone);
-
- static bool ParseRegExp(Isolate* isolate, Zone* zone, FlatStringReader* input,
- JSRegExp::Flags flags, RegExpCompileData* result);
-
- RegExpTree* ParsePattern();
- RegExpTree* ParseDisjunction();
- RegExpTree* ParseGroup();
-
- // Parses a {...,...} quantifier and stores the range in the given
- // out parameters.
- bool ParseIntervalQuantifier(int* min_out, int* max_out);
-
- // Parses and returns a single escaped character. The character
- // must not be 'b' or 'B' since they are usually handle specially.
- uc32 ParseClassCharacterEscape();
-
- // Checks whether the following is a length-digit hexadecimal number,
- // and sets the value if it is.
- bool ParseHexEscape(int length, uc32* value);
- bool ParseUnicodeEscape(uc32* value);
- bool ParseUnlimitedLengthHexNumber(int max_value, uc32* value);
-
- bool ParsePropertyClassName(ZoneVector<char>* name_1,
- ZoneVector<char>* name_2);
- bool AddPropertyClassRange(ZoneList<CharacterRange>* add_to, bool negate,
- const ZoneVector<char>& name_1,
- const ZoneVector<char>& name_2);
-
- RegExpTree* GetPropertySequence(const ZoneVector<char>& name_1);
- RegExpTree* ParseCharacterClass(const RegExpBuilder* state);
-
- uc32 ParseOctalLiteral();
-
- // Tries to parse the input as a back reference. If successful it
- // stores the result in the output parameter and returns true. If
- // it fails it will push back the characters read so the same characters
- // can be reparsed.
- bool ParseBackReferenceIndex(int* index_out);
-
- // Parse inside a class. Either add escaped class to the range, or return
- // false and pass parsed single character through |char_out|.
- void ParseClassEscape(ZoneList<CharacterRange>* ranges, Zone* zone,
- bool add_unicode_case_equivalents, uc32* char_out,
- bool* is_class_escape);
-
- char ParseClassEscape();
-
- RegExpTree* ReportError(RegExpError error);
- void Advance();
- void Advance(int dist);
- void Reset(int pos);
-
- // Reports whether the pattern might be used as a literal search string.
- // Only use if the result of the parse is a single atom node.
- bool simple();
- bool contains_anchor() { return contains_anchor_; }
- void set_contains_anchor() { contains_anchor_ = true; }
- int captures_started() { return captures_started_; }
- int position() { return next_pos_ - 1; }
- bool failed() { return failed_; }
- // The Unicode flag can't be changed using in-regexp syntax, so it's OK to
- // just read the initial flag value here.
- bool unicode() const { return (top_level_flags_ & JSRegExp::kUnicode) != 0; }
-
- static bool IsSyntaxCharacterOrSlash(uc32 c);
-
- static const uc32 kEndMarker = (1 << 21);
-
- private:
- enum SubexpressionType {
- INITIAL,
- CAPTURE, // All positive values represent captures.
- POSITIVE_LOOKAROUND,
- NEGATIVE_LOOKAROUND,
- GROUPING
- };
-
- class RegExpParserState : public ZoneObject {
- public:
- // Push a state on the stack.
- RegExpParserState(RegExpParserState* previous_state,
- SubexpressionType group_type,
- RegExpLookaround::Type lookaround_type,
- int disjunction_capture_index,
- const ZoneVector<uc16>* capture_name,
- JSRegExp::Flags flags, Zone* zone)
- : previous_state_(previous_state),
- builder_(new (zone) RegExpBuilder(zone, flags)),
- group_type_(group_type),
- lookaround_type_(lookaround_type),
- disjunction_capture_index_(disjunction_capture_index),
- capture_name_(capture_name) {}
- // Parser state of containing expression, if any.
- RegExpParserState* previous_state() const { return previous_state_; }
- bool IsSubexpression() { return previous_state_ != nullptr; }
- // RegExpBuilder building this regexp's AST.
- RegExpBuilder* builder() const { return builder_; }
- // Type of regexp being parsed (parenthesized group or entire regexp).
- SubexpressionType group_type() const { return group_type_; }
- // Lookahead or Lookbehind.
- RegExpLookaround::Type lookaround_type() const { return lookaround_type_; }
- // Index in captures array of first capture in this sub-expression, if any.
- // Also the capture index of this sub-expression itself, if group_type
- // is CAPTURE.
- int capture_index() const { return disjunction_capture_index_; }
- // The name of the current sub-expression, if group_type is CAPTURE. Only
- // used for named captures.
- const ZoneVector<uc16>* capture_name() const { return capture_name_; }
-
- bool IsNamedCapture() const { return capture_name_ != nullptr; }
-
- // Check whether the parser is inside a capture group with the given index.
- bool IsInsideCaptureGroup(int index);
- // Check whether the parser is inside a capture group with the given name.
- bool IsInsideCaptureGroup(const ZoneVector<uc16>* name);
-
- private:
- // Linked list implementation of stack of states.
- RegExpParserState* const previous_state_;
- // Builder for the stored disjunction.
- RegExpBuilder* const builder_;
- // Stored disjunction type (capture, look-ahead or grouping), if any.
- const SubexpressionType group_type_;
- // Stored read direction.
- const RegExpLookaround::Type lookaround_type_;
- // Stored disjunction's capture index (if any).
- const int disjunction_capture_index_;
- // Stored capture name (if any).
- const ZoneVector<uc16>* const capture_name_;
- };
-
- // Return the 1-indexed RegExpCapture object, allocate if necessary.
- RegExpCapture* GetCapture(int index);
-
- // Creates a new named capture at the specified index. Must be called exactly
- // once for each named capture. Fails if a capture with the same name is
- // encountered.
- bool CreateNamedCaptureAtIndex(const ZoneVector<uc16>* name, int index);
-
- // Parses the name of a capture group (?<name>pattern). The name must adhere
- // to IdentifierName in the ECMAScript standard.
- const ZoneVector<uc16>* ParseCaptureGroupName();
-
- bool ParseNamedBackReference(RegExpBuilder* builder,
- RegExpParserState* state);
- RegExpParserState* ParseOpenParenthesis(RegExpParserState* state);
-
- // After the initial parsing pass, patch corresponding RegExpCapture objects
- // into all RegExpBackReferences. This is done after initial parsing in order
- // to avoid complicating cases in which references comes before the capture.
- void PatchNamedBackReferences();
-
- Handle<FixedArray> CreateCaptureNameMap();
-
- // Returns true iff the pattern contains named captures. May call
- // ScanForCaptures to look ahead at the remaining pattern.
- bool HasNamedCaptures();
-
- Isolate* isolate() { return isolate_; }
- Zone* zone() const { return zone_; }
-
- uc32 current() { return current_; }
- bool has_more() { return has_more_; }
- bool has_next() { return next_pos_ < in()->length(); }
- uc32 Next();
- template <bool update_position>
- uc32 ReadNext();
- FlatStringReader* in() { return in_; }
- void ScanForCaptures();
-
- struct RegExpCaptureNameLess {
- bool operator()(const RegExpCapture* lhs, const RegExpCapture* rhs) const {
- DCHECK_NOT_NULL(lhs);
- DCHECK_NOT_NULL(rhs);
- ZoneVector<uc16> lhname = *lhs->name();
- ZoneVector<uc16> rhname = *rhs->name();
- return lhname < rhname;
- }
- };
-
- Isolate* isolate_;
- Zone* zone_;
- RegExpError error_ = RegExpError::kNone;
- int error_pos_ = 0;
- ZoneList<RegExpCapture*>* captures_;
- ZoneSet<RegExpCapture*, RegExpCaptureNameLess>* named_captures_;
- ZoneList<RegExpBackReference*>* named_back_references_;
- FlatStringReader* in_;
- uc32 current_;
- // These are the flags specified outside the regexp syntax ie after the
- // terminating '/' or in the second argument to the constructor. The current
- // flags are stored on the RegExpBuilder.
- JSRegExp::Flags top_level_flags_;
- int next_pos_;
- int captures_started_;
- int capture_count_; // Only valid after we have scanned for captures.
- bool has_more_;
- bool simple_;
- bool contains_anchor_;
- bool is_scanned_for_captures_;
- bool has_named_captures_; // Only valid after we have scanned for captures.
- bool failed_;
-};
-
-} // namespace internal
-} // namespace v8
-
-#endif // V8_REGEXP_REGEXP_PARSER_H_
diff --git a/js/src/new-regexp/regexp-shim.cc b/js/src/new-regexp/regexp-shim.cc
deleted file mode 100644
index 51a9e2d83..000000000
--- a/js/src/new-regexp/regexp-shim.cc
+++ /dev/null
@@ -1,212 +0,0 @@
-/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
- * vim: set ts=8 sts=2 et sw=2 tw=80:
- * This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-
-// Copyright 2019 the V8 project authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#include <iostream>
-
-#include "new-regexp/regexp-shim.h"
-#include "new-regexp/regexp-stack.h"
-
-#include "mozilla/Sprintf.h" // for SprintfLiteral
-
-namespace v8 {
-namespace internal {
-
-void PrintF(const char* format, ...) {
- va_list arguments;
- va_start(arguments, format);
- vprintf(format, arguments);
- va_end(arguments);
-}
-
-void PrintF(FILE* out, const char* format, ...) {
- va_list arguments;
- va_start(arguments, format);
- vfprintf(out, format, arguments);
- va_end(arguments);
-}
-
-StdoutStream::operator std::ostream&() const { return std::cerr; }
-
-template <typename T>
-std::ostream& StdoutStream::operator<<(T t) { return std::cerr << t; }
-
-template std::ostream& StdoutStream::operator<<(char const* c);
-
-// Origin:
-// https://github.com/v8/v8/blob/855591a54d160303349a5f0a32fab15825c708d1/src/utils/ostreams.cc#L120-L169
-// (This is a hand-simplified version.)
-// Writes the given character to the output escaping everything outside
-// of printable ASCII range.
-std::ostream& operator<<(std::ostream& os, const AsUC16& c) {
- uc16 v = c.value;
- bool isPrint = 0x20 < v && v <= 0x7e;
- char buf[10];
- const char* format = isPrint ? "%c" : (v <= 0xFF) ? "\\x%02x" : "\\u%04x";
- SprintfLiteral(buf, format, v);
- return os << buf;
-}
-std::ostream& operator<<(std::ostream& os, const AsUC32& c) {
- int32_t v = c.value;
- if (v <= String::kMaxUtf16CodeUnit) {
- return os << AsUC16(v);
- }
- char buf[13];
- SprintfLiteral(buf, "\\u{%06x}", v);
- return os << buf;
-}
-
-HandleScope::HandleScope(Isolate* isolate)
- : isolate_(isolate) {
- isolate->openHandleScope(*this);
-}
-
-HandleScope::~HandleScope() {
- isolate_->closeHandleScope(level_, non_gc_level_);
-}
-
-template <typename T>
-Handle<T>::Handle(T object, Isolate* isolate)
- : location_(isolate->getHandleLocation(JS::Value(object))) {}
-
-template Handle<ByteArray>::Handle(ByteArray b, Isolate* isolate);
-template Handle<HeapObject>::Handle(JS::Value v, Isolate* isolate);
-template Handle<JSRegExp>::Handle(JSRegExp re, Isolate* isolate);
-template Handle<String>::Handle(String s, Isolate* isolate);
-
-template <typename T>
-Handle<T>::Handle(JS::Value value, Isolate* isolate)
- : location_(isolate->getHandleLocation(value)) {
- T::cast(Object(value)); // Assert that value has the correct type.
-}
-
-JS::Value* Isolate::getHandleLocation(JS::Value value) {
- js::AutoEnterOOMUnsafeRegion oomUnsafe;
- if (!handleArena_.Append(value)) {
- oomUnsafe.crash("Irregexp handle allocation");
- }
- return &handleArena_.GetLast();
-}
-
-void* Isolate::allocatePseudoHandle(size_t bytes) {
- PseudoHandle<void> ptr;
- ptr.reset(js_malloc(bytes));
- if (!ptr) {
- return nullptr;
- }
- if (!uniquePtrArena_.Append(std::move(ptr))) {
- return nullptr;
- }
- return uniquePtrArena_.GetLast().get();
-}
-
-template <typename T>
-PseudoHandle<T> Isolate::takeOwnership(void* ptr) {
- for (auto iter = uniquePtrArena_.IterFromLast(); !iter.Done(); iter.Prev()) {
- auto& entry = iter.Get();
- if (entry.get() == ptr) {
- PseudoHandle<T> result;
- result.reset(static_cast<T*>(entry.release()));
- return result;
- }
- }
- MOZ_CRASH("Tried to take ownership of pseudohandle that is not in the arena");
-}
-
-PseudoHandle<ByteArrayData> ByteArray::takeOwnership(Isolate* isolate) {
- PseudoHandle<ByteArrayData> result =
- isolate->takeOwnership<ByteArrayData>(value_.toPrivate());
- value_ = JS::PrivateValue(nullptr);
- return result;
-}
-
-void Isolate::trace(JSTracer* trc) {
- for (auto iter = handleArena_.Iter(); !iter.Done(); iter.Next()) {
- auto& elem = iter.Get();
- JS::GCPolicy<JS::Value>::trace(trc, &elem, "Isolate handle arena");
- }
-}
-
-/*static*/ Handle<String> String::Flatten(Isolate* isolate,
- Handle<String> string) {
- if (string->IsFlat()) {
- return string;
- }
- js::AutoEnterOOMUnsafeRegion oomUnsafe;
- JSLinearString* linear = string->str()->ensureLinear(isolate->cx());
- if (!linear) {
- oomUnsafe.crash("Irregexp String::Flatten");
- }
- return Handle<String>(JS::StringValue(linear), isolate);
-}
-
-// This is only used for trace messages printing the source of a
-// regular expression. To keep things simple, we just return an
-// empty string and don't print anything.
-std::unique_ptr<char[]> String::ToCString() {
- return std::unique_ptr<char[]>();
-}
-
-byte* Isolate::top_of_regexp_stack() const {
- return reinterpret_cast<byte*>(regexpStack_->memory_top_address_address());
-}
-
-Handle<ByteArray> Isolate::NewByteArray(int length, AllocationType alloc) {
- MOZ_RELEASE_ASSERT(length >= 0);
-
- js::AutoEnterOOMUnsafeRegion oomUnsafe;
-
- size_t alloc_size = sizeof(uint32_t) + length;
- ByteArrayData* data =
- static_cast<ByteArrayData*>(allocatePseudoHandle(alloc_size));
- if (!data) {
- oomUnsafe.crash("Irregexp NewByteArray");
- }
- data->length = length;
-
- return Handle<ByteArray>(JS::PrivateValue(data), this);
-}
-
-Handle<FixedArray> Isolate::NewFixedArray(int length) {
- MOZ_RELEASE_ASSERT(length >= 0);
- MOZ_CRASH("TODO");
-}
-
-template <typename CharT>
-Handle<String> Isolate::InternalizeString(const Vector<const CharT>& str) {
- js::AutoEnterOOMUnsafeRegion oomUnsafe;
- JSAtom* atom = js::AtomizeChars(cx(), str.begin(), str.length());
- if (!atom) {
- oomUnsafe.crash("Irregexp InternalizeString");
- }
- return Handle<String>(JS::StringValue(atom), this);
-}
-
-template Handle<String>
-Isolate::InternalizeString(const Vector<const uint8_t>& str);
-template Handle<String>
-Isolate::InternalizeString(const Vector<const char16_t>& str);
-
-// TODO: Map flags to jitoptions
-bool FLAG_correctness_fuzzer_suppressions = false;
-bool FLAG_enable_regexp_unaligned_accesses = false;
-bool FLAG_harmony_regexp_sequence = false;
-bool FLAG_regexp_interpret_all = false;
-bool FLAG_regexp_mode_modifiers = false;
-bool FLAG_regexp_optimization = true;
-bool FLAG_regexp_peephole_optimization = true;
-bool FLAG_regexp_possessive_quantifier = false;
-bool FLAG_regexp_tier_up = false;
-bool FLAG_trace_regexp_assembler = false;
-bool FLAG_trace_regexp_bytecodes = false;
-bool FLAG_trace_regexp_parser = false;
-bool FLAG_trace_regexp_peephole_optimization = false;
-
-} // namespace internal
-} // namespace v8
diff --git a/js/src/new-regexp/regexp-shim.h b/js/src/new-regexp/regexp-shim.h
deleted file mode 100644
index c49c25ff1..000000000
--- a/js/src/new-regexp/regexp-shim.h
+++ /dev/null
@@ -1,1181 +0,0 @@
-/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
- * vim: set ts=8 sts=2 et sw=2 tw=80:
- * This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-
-// Copyright 2019 the V8 project authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#ifndef RegexpShim_h
-#define RegexpShim_h
-
-#include "mozilla/Assertions.h"
-#include "mozilla/Attributes.h"
-#include "mozilla/MathAlgorithms.h"
-#include "mozilla/Maybe.h"
-#include "mozilla/SegmentedVector.h"
-#include "mozilla/Types.h"
-
-#include <algorithm>
-#include <cctype>
-#include <iostream> // needed for gcc 10
-
-#include "jit/Label.h"
-#include "jit/shared/Assembler-shared.h"
-#include "js/Value.h"
-#include "new-regexp/RegExpTypes.h"
-#include "new-regexp/util/flags.h"
-#include "new-regexp/util/vector.h"
-#include "new-regexp/util/zone.h"
-#include "vm/NativeObject.h"
-
-// Forward declaration of classes
-namespace v8 {
-namespace internal {
-
-class Heap;
-class Isolate;
-class RegExpMatchInfo;
-class RegExpStack;
-
-} // namespace internal
-} // namespace v8
-
-#define V8_WARN_UNUSED_RESULT MOZ_MUST_USE
-#define V8_EXPORT_PRIVATE MOZ_EXPORT
-#define V8_FALLTHROUGH MOZ_FALLTHROUGH
-
-#define FATAL(x) MOZ_CRASH(x)
-#define UNREACHABLE() MOZ_CRASH("unreachable code")
-#define UNIMPLEMENTED() MOZ_CRASH("unimplemented code")
-#define STATIC_ASSERT(exp) static_assert(exp, #exp)
-
-#define DCHECK MOZ_ASSERT
-#define DCHECK_EQ(lhs, rhs) MOZ_ASSERT((lhs) == (rhs))
-#define DCHECK_NE(lhs, rhs) MOZ_ASSERT((lhs) != (rhs))
-#define DCHECK_GT(lhs, rhs) MOZ_ASSERT((lhs) > (rhs))
-#define DCHECK_GE(lhs, rhs) MOZ_ASSERT((lhs) >= (rhs))
-#define DCHECK_LT(lhs, rhs) MOZ_ASSERT((lhs) < (rhs))
-#define DCHECK_LE(lhs, rhs) MOZ_ASSERT((lhs) <= (rhs))
-#define DCHECK_NULL(val) MOZ_ASSERT((val) == nullptr)
-#define DCHECK_NOT_NULL(val) MOZ_ASSERT((val) != nullptr)
-#define DCHECK_IMPLIES(lhs, rhs) MOZ_ASSERT_IF(lhs, rhs)
-#define CHECK MOZ_RELEASE_ASSERT
-#define CHECK_LE(lhs, rhs) MOZ_RELEASE_ASSERT((lhs) <= (rhs))
-
-template <class T>
-static constexpr inline T Min(T t1, T t2) {
- return t1 < t2 ? t1 : t2;
-}
-
-template <class T>
-static constexpr inline T Max(T t1, T t2) {
- return t1 > t2 ? t1 : t2;
-}
-#define MemCopy memcpy
-
-// Origin:
-// https://github.com/v8/v8/blob/855591a54d160303349a5f0a32fab15825c708d1/src/base/macros.h#L310-L319
-// ptrdiff_t is 't' according to the standard, but MSVC uses 'I'.
-#ifdef _MSC_VER
-# define V8PRIxPTRDIFF "Ix"
-# define V8PRIdPTRDIFF "Id"
-# define V8PRIuPTRDIFF "Iu"
-#else
-# define V8PRIxPTRDIFF "tx"
-# define V8PRIdPTRDIFF "td"
-# define V8PRIuPTRDIFF "tu"
-#endif
-
-// Origin:
-// https://github.com/v8/v8/blob/855591a54d160303349a5f0a32fab15825c708d1/src/base/macros.h#L27-L38
-// The arraysize(arr) macro returns the # of elements in an array arr.
-// The expression is a compile-time constant, and therefore can be
-// used in defining new arrays, for example. If you use arraysize on
-// a pointer by mistake, you will get a compile-time error.
-#define arraysize(array) (sizeof(ArraySizeHelper(array)))
-
-// This template function declaration is used in defining arraysize.
-// Note that the function doesn't need an implementation, as we only
-// use its type.
-template <typename T, size_t N>
-char (&ArraySizeHelper(T (&array)[N]))[N];
-
-// Explicitly declare the assignment operator as deleted.
-#define DISALLOW_ASSIGN(TypeName) TypeName& operator=(const TypeName&) = delete
-
-// Explicitly declare the copy constructor and assignment operator as deleted.
-// This also deletes the implicit move constructor and implicit move assignment
-// operator, but still allows to manually define them.
-#define DISALLOW_COPY_AND_ASSIGN(TypeName) \
- TypeName(const TypeName&) = delete; \
- DISALLOW_ASSIGN(TypeName)
-
-// Explicitly declare all implicit constructors as deleted, namely the
-// default constructor, copy constructor and operator= functions.
-// This is especially useful for classes containing only static methods.
-#define DISALLOW_IMPLICIT_CONSTRUCTORS(TypeName) \
- TypeName() = delete; \
- DISALLOW_COPY_AND_ASSIGN(TypeName)
-
-namespace v8 {
-
-// Origin:
-// https://github.com/v8/v8/blob/855591a54d160303349a5f0a32fab15825c708d1/src/base/macros.h#L364-L367
-template <typename T, typename U>
-constexpr inline bool IsAligned(T value, U alignment) {
- return (value & (alignment - 1)) == 0;
-}
-
-using byte = uint8_t;
-using Address = uintptr_t;
-static const Address kNullAddress = 0;
-
-// Latin1/UTF-16 constants
-// Code-point values in Unicode 4.0 are 21 bits wide.
-// Code units in UTF-16 are 16 bits wide.
-using uc16 = char16_t;
-using uc32 = int32_t;
-
-namespace base {
-
-// Origin:
-// https://github.com/v8/v8/blob/855591a54d160303349a5f0a32fab15825c708d1/src/base/macros.h#L247-L258
-// The USE(x, ...) template is used to silence C++ compiler warnings
-// issued for (yet) unused variables (typically parameters).
-// The arguments are guaranteed to be evaluated from left to right.
-struct Use {
- template <typename T>
- Use(T&&) {} // NOLINT(runtime/explicit)
-};
-#define USE(...) \
- do { \
- ::v8::base::Use unused_tmp_array_for_use_macro[]{__VA_ARGS__}; \
- (void)unused_tmp_array_for_use_macro; \
- } while (false)
-
-// Origin:
-// https://github.com/v8/v8/blob/855591a54d160303349a5f0a32fab15825c708d1/src/base/safe_conversions.h#L35-L39
-// saturated_cast<> is analogous to static_cast<> for numeric types, except
-// that the specified numeric conversion will saturate rather than overflow or
-// underflow.
-template <typename Dst, typename Src>
-inline Dst saturated_cast(Src value);
-
-// This is the only specialization that is needed for regexp code.
-// Instead of pulling in dozens of lines of template goo
-// to derive it, I used the implementation from uint8_clamped in
-// ArrayBufferObject.h.
-template <>
-inline uint8_t saturated_cast<uint8_t, int>(int x) {
- return (x >= 0) ? ((x < 255) ? uint8_t(x) : 255) : 0;
-}
-
-#define LAZY_INSTANCE_INITIALIZER { mozilla::Nothing() }
-
-template <typename T>
-struct LazyInstanceImpl {
- mozilla::Maybe<T> value_;
- T* Pointer() {
- if (value_.isNothing()) {
- value_.emplace();
- }
- return value_.ptr();
- }
-};
-
-template <typename T>
-class LazyInstance {
-public:
- using type = LazyInstanceImpl<T>;
-};
-
-
-namespace bits {
-
-inline uint64_t CountTrailingZeros(uint64_t value) {
- return mozilla::CountTrailingZeroes64(value);
-}
-
-inline size_t RoundUpToPowerOfTwo32(size_t value) {
- return mozilla::RoundUpPow2(value);
-}
-
-} // namespace bits
-} // namespace base
-
-namespace unibrow {
-
-using uchar = unsigned int;
-
-// Origin:
-// https://github.com/v8/v8/blob/1f1e4cdb04c75eab77adbecd5f5514ddc3eb56cf/src/strings/unicode.h#L133-L150
-class Latin1 {
- public:
- static const uc16 kMaxChar = 0xff;
-
- // Convert the character to Latin-1 case equivalent if possible.
- static inline uc16 TryConvertToLatin1(uc16 c) {
- // "GREEK CAPITAL LETTER MU" case maps to "MICRO SIGN".
- // "GREEK SMALL LETTER MU" case maps to "MICRO SIGN".
- if (c == 0x039C || c == 0x03BC) {
- return 0xB5;
- }
- // "LATIN CAPITAL LETTER Y WITH DIAERESIS" case maps to "LATIN SMALL LETTER
- // Y WITH DIAERESIS".
- if (c == 0x0178) {
- return 0xFF;
- }
- return c;
- }
-};
-
-// Origin:
-// https://github.com/v8/v8/blob/b4bfbce6f91fc2cc72178af42bb3172c5f5eaebb/src/strings/unicode.h#L99-L131
-class Utf16 {
- public:
- static inline bool IsLeadSurrogate(int code) {
- return js::unicode::IsLeadSurrogate(code);
- }
- static inline bool IsTrailSurrogate(int code) {
- return js::unicode::IsTrailSurrogate(code);
- }
- static inline uc16 LeadSurrogate(uint32_t char_code) {
- return js::unicode::LeadSurrogate(char_code);
- }
- static inline uc16 TrailSurrogate(uint32_t char_code) {
- return js::unicode::TrailSurrogate(char_code);
- }
- static inline uint32_t CombineSurrogatePair(char16_t lead, char16_t trail) {
- return js::unicode::UTF16Decode(lead, trail);
- }
- static const uchar kMaxNonSurrogateCharCode = 0xffff;
-};
-
-#ifndef V8_INTL_SUPPORT
-
-// A cache used in case conversion. It caches the value for characters
-// that either have no mapping or map to a single character independent
-// of context. Characters that map to more than one character or that
-// map differently depending on context are always looked up.
-// Origin:
-// https://github.com/v8/v8/blob/b4bfbce6f91fc2cc72178af42bb3172c5f5eaebb/src/strings/unicode.h#L64-L88
-template <class T, int size = 256>
-class Mapping {
- public:
- inline Mapping() = default;
- inline int get(uchar c, uchar n, uchar* result) {
- CacheEntry entry = entries_[c & kMask];
- if (entry.code_point_ == c) {
- if (entry.offset_ == 0) {
- return 0;
- } else {
- result[0] = c + entry.offset_;
- return 1;
- }
- } else {
- return CalculateValue(c, n, result);
- }
- }
-
- private:
- int CalculateValue(uchar c, uchar n, uchar* result) {
- bool allow_caching = true;
- int length = T::Convert(c, n, result, &allow_caching);
- if (allow_caching) {
- if (length == 1) {
- entries_[c & kMask] = CacheEntry(c, result[0] - c);
- return 1;
- } else {
- entries_[c & kMask] = CacheEntry(c, 0);
- return 0;
- }
- } else {
- return length;
- }
- }
-
- struct CacheEntry {
- inline CacheEntry() : code_point_(kNoChar), offset_(0) {}
- inline CacheEntry(uchar code_point, signed offset)
- : code_point_(code_point), offset_(offset) {}
- uchar code_point_;
- signed offset_;
- static const int kNoChar = (1 << 21) - 1;
- };
- static const int kSize = size;
- static const int kMask = kSize - 1;
- CacheEntry entries_[kSize];
-};
-
-// Origin:
-// https://github.com/v8/v8/blob/b4bfbce6f91fc2cc72178af42bb3172c5f5eaebb/src/strings/unicode.h#L241-L252
-struct Ecma262Canonicalize {
- static const int kMaxWidth = 1;
- static int Convert(uchar c, uchar n, uchar* result, bool* allow_caching_ptr);
-};
-struct Ecma262UnCanonicalize {
- static const int kMaxWidth = 4;
- static int Convert(uchar c, uchar n, uchar* result, bool* allow_caching_ptr);
-};
-struct CanonicalizationRange {
- static const int kMaxWidth = 1;
- static int Convert(uchar c, uchar n, uchar* result, bool* allow_caching_ptr);
-};
-
-#endif // !V8_INTL_SUPPORT
-
-struct Letter {
- static bool Is(uchar c);
-};
-
-} // namespace unibrow
-
-namespace internal {
-
-#define PRINTF_FORMAT(x, y) MOZ_FORMAT_PRINTF(x, y)
-void PRINTF_FORMAT(1, 2) PrintF(const char* format, ...);
-void PRINTF_FORMAT(2, 3) PrintF(FILE* out, const char* format, ...);
-
-// Superclass for classes only using static method functions.
-// The subclass of AllStatic cannot be instantiated at all.
-class AllStatic {
-#ifdef DEBUG
- public:
- AllStatic() = delete;
-#endif
-};
-
-// Superclass for classes managed with new and delete.
-// In irregexp, this is only AlternativeGeneration (in regexp-compiler.cc)
-// Compare:
-// https://github.com/v8/v8/blob/7b3332844212d78ee87a9426f3a6f7f781a8fbfa/src/utils/allocation.cc#L88-L96
-class Malloced {
- public:
- static void* operator new(size_t size) {
- js::AutoEnterOOMUnsafeRegion oomUnsafe;
- void* result = js_malloc(size);
- if (!result) {
- oomUnsafe.crash("Irregexp Malloced shim");
- }
- return result;
- }
- static void operator delete(void* p) { js_free(p); }
-};
-
-constexpr int32_t KB = 1024;
-constexpr int32_t MB = 1024 * 1024;
-
-#define kMaxInt JSVAL_INT_MAX
-#define kMinInt JSVAL_INT_MIN
-constexpr int kSystemPointerSize = sizeof(void*);
-
-// The largest integer n such that n and n + 1 are both exactly
-// representable as a Number value. ES6 section 20.1.2.6
-constexpr double kMaxSafeInteger = 9007199254740991.0; // 2^53-1
-
-constexpr int kBitsPerByte = 8;
-constexpr int kBitsPerByteLog2 = 3;
-constexpr int kUInt32Size = sizeof(uint32_t);
-constexpr int kInt64Size = sizeof(int64_t);
-constexpr int kUC16Size = sizeof(uc16);
-
-inline constexpr bool IsDecimalDigit(uc32 c) { return c >= '0' && c <= '9'; }
-inline bool is_uint24(int val) { return (val & 0x00ffffff) == val; }
-
-inline bool IsIdentifierStart(uc32 c) {
- return js::unicode::IsIdentifierStart(uint32_t(c));
-}
-inline bool IsIdentifierPart(uc32 c) {
- return js::unicode::IsIdentifierPart(uint32_t(c));
-}
-
-// Wrappers to disambiguate char16_t and uc16.
-struct AsUC16 {
- explicit AsUC16(char16_t v) : value(v) {}
- char16_t value;
-};
-
-struct AsUC32 {
- explicit AsUC32(int32_t v) : value(v) {}
- int32_t value;
-};
-
-std::ostream& operator<<(std::ostream& os, const AsUC16& c);
-std::ostream& operator<<(std::ostream& os, const AsUC32& c);
-
-// This class is used for the output of trace-regexp-parser. V8 has
-// an elaborate implementation to ensure that the output gets to the
-// right place, even on Android. We just need something that will
-// print output (ideally to stderr, to match the rest of our tracing
-// code). This is an empty wrapper that will convert itself to
-// std::cerr when used.
-class StdoutStream {
-public:
- operator std::ostream&() const;
- template <typename T> std::ostream& operator<<(T t);
-};
-
-// Reuse existing Maybe implementation
-using mozilla::Maybe;
-
-template <typename T>
-Maybe<T> Just(const T& value) {
- return mozilla::Some(value);
-}
-
-template <typename T>
-mozilla::Nothing Nothing() {
- return mozilla::Nothing();
-}
-
-
-template <typename T>
-using PseudoHandle = mozilla::UniquePtr<T, JS::FreePolicy>;
-
-// Origin:
-// https://github.com/v8/v8/blob/855591a54d160303349a5f0a32fab15825c708d1/src/utils/utils.h#L600-L642
-// Compare 8bit/16bit chars to 8bit/16bit chars.
-// Used indirectly by regexp-interpreter.cc
-template <typename lchar, typename rchar>
-inline int CompareCharsUnsigned(const lchar* lhs, const rchar* rhs,
- size_t chars) {
- const lchar* limit = lhs + chars;
- if (sizeof(*lhs) == sizeof(char) && sizeof(*rhs) == sizeof(char)) {
- // memcmp compares byte-by-byte, yielding wrong results for two-byte
- // strings on little-endian systems.
- return memcmp(lhs, rhs, chars);
- }
- while (lhs < limit) {
- int r = static_cast<int>(*lhs) - static_cast<int>(*rhs);
- if (r != 0) return r;
- ++lhs;
- ++rhs;
- }
- return 0;
-}
-template <typename lchar, typename rchar>
-inline int CompareChars(const lchar* lhs, const rchar* rhs, size_t chars) {
- DCHECK_LE(sizeof(lchar), 2);
- DCHECK_LE(sizeof(rchar), 2);
- if (sizeof(lchar) == 1) {
- if (sizeof(rchar) == 1) {
- return CompareCharsUnsigned(reinterpret_cast<const uint8_t*>(lhs),
- reinterpret_cast<const uint8_t*>(rhs), chars);
- } else {
- return CompareCharsUnsigned(reinterpret_cast<const uint8_t*>(lhs),
- reinterpret_cast<const char16_t*>(rhs),
- chars);
- }
- } else {
- if (sizeof(rchar) == 1) {
- return CompareCharsUnsigned(reinterpret_cast<const char16_t*>(lhs),
- reinterpret_cast<const uint8_t*>(rhs), chars);
- } else {
- return CompareCharsUnsigned(reinterpret_cast<const char16_t*>(lhs),
- reinterpret_cast<const char16_t*>(rhs),
- chars);
- }
- }
-}
-
-// Origin:
-// https://github.com/v8/v8/blob/855591a54d160303349a5f0a32fab15825c708d1/src/utils/utils.h#L40-L48
-// Returns the value (0 .. 15) of a hexadecimal character c.
-// If c is not a legal hexadecimal character, returns a value < 0.
-// Used in regexp-parser.cc
-inline int HexValue(uc32 c) {
- c -= '0';
- if (static_cast<unsigned>(c) <= 9) return c;
- c = (c | 0x20) - ('a' - '0'); // detect 0x11..0x16 and 0x31..0x36.
- if (static_cast<unsigned>(c) <= 5) return c + 10;
- return -1;
-}
-
-// V8::Object ~= JS::Value
-class Object {
- public:
- // The default object constructor in V8 stores a nullptr,
- // which has its low bit clear and is interpreted as Smi(0).
- constexpr Object() : value_(JS::Int32Value(0)) {}
-
- // Conversions to/from SpiderMonkey types
- constexpr Object(JS::Value value) : value_(value) {}
- operator JS::Value() const { return value_; }
-
- // Used in regexp-macro-assembler.cc and regexp-interpreter.cc to
- // check the return value of isolate->stack_guard()->HandleInterrupts()
- // In V8, this will be either an exception object or undefined.
- // In SM, we store the exception in the context, so we can use our normal
- // idiom: return false iff we are throwing an exception.
- inline bool IsException(Isolate*) const { return !value_.toBoolean(); }
-
- protected:
- JS::Value value_;
-};
-
-class Smi : public Object {
- public:
- static Smi FromInt(int32_t value) {
- Smi smi;
- smi.value_ = JS::Int32Value(value);
- return smi;
- }
- static inline int32_t ToInt(const Object object) {
- return JS::Value(object).toInt32();
- }
-};
-
-// V8::HeapObject ~= JSObject
-class HeapObject : public Object {
- public:
- inline static HeapObject cast(Object object) {
- HeapObject h;
- h.value_ = JS::Value(object);
- return h;
- }
-};
-
-// A fixed-size array with Objects (aka Values) as element types
-// Only used for named captures. Allocated during parsing, so
-// can't be a GC thing.
-// TODO: implement.
-class FixedArray : public HeapObject {
- public:
- inline void set(uint32_t index, Object value) {}
- inline static FixedArray cast(Object object) { MOZ_CRASH("TODO"); }
-};
-
-class ByteArrayData {
-public:
- uint32_t length;
- uint8_t* data();
-};
-
-/*
- * Conceptually, ByteArrayData is a variable-size structure. To
- * implement this in a C++-approved way, we allocate a struct
- * containing the 32-bit length field, followed by additional memory
- * for the data. To access the data, we get a pointer to the next byte
- * after the length field and cast it to the correct type.
- */
-inline uint8_t* ByteArrayData::data() {
- static_assert(alignof(uint8_t) <= alignof(ByteArrayData),
- "The trailing data must be aligned to start immediately "
- "after the header with no padding.");
- ByteArrayData* immediatelyAfter = this + 1;
- return reinterpret_cast<uint8_t*>(immediatelyAfter);
-}
-
-// A fixed-size array of bytes.
-class ByteArray : public HeapObject {
- ByteArrayData* inner() const {
- return static_cast<ByteArrayData*>(value_.toPrivate());
- }
-public:
- PseudoHandle<ByteArrayData> takeOwnership(Isolate* isolate);
- byte get(uint32_t index) {
- MOZ_ASSERT(index < length());
- return inner()->data()[index];
- }
- void set(uint32_t index, byte val) {
- MOZ_ASSERT(index < length());
- inner()->data()[index] = val;
- }
- uint32_t length() const { return inner()->length; }
- byte* GetDataStartAddress() { return inner()->data(); }
-
- static ByteArray cast(Object object) {
- ByteArray b;
- b.value_ = JS::Value(object);
- return b;
- }
-};
-
-// Like Handles in SM, V8 handles are references to marked pointers.
-// Unlike SM, where Rooted pointers are created individually on the
-// stack, the target of a V8 handle lives in an arena on the isolate
-// (~= JSContext). Whenever a Handle is created, a new "root" is
-// created at the end of the arena.
-//
-// HandleScopes are used to manage the lifetimes of these handles. A
-// HandleScope lives on the stack and stores the size of the arena at
-// the time of its creation. When the function returns and the
-// HandleScope is destroyed, the arena is truncated to its previous
-// size, clearing all roots that were created since the creation of
-// the HandleScope.
-//
-// In some cases, objects that are GC-allocated in V8 are not in SM.
-// In particular, irregexp allocates ByteArrays during code generation
-// to store lookup tables. This does not play nicely with the SM
-// macroassembler's requirement that no GC allocations take place
-// while it is on the stack. To work around this, this shim layer also
-// provides the ability to create pseudo-handles, which are not
-// managed by the GC but provide the same API to irregexp. The "root"
-// of a pseudohandle is a unique pointer living in a second arena. If
-// the allocated object should outlive the HandleScope, it must be
-// manually moved out of the arena using takeOwnership.
-
-class MOZ_STACK_CLASS HandleScope {
-public:
- HandleScope(Isolate* isolate);
- ~HandleScope();
-
- private:
- size_t level_;
- size_t non_gc_level_;
- Isolate* isolate_;
-
- friend class Isolate;
-};
-
-// Origin:
-// https://github.com/v8/v8/blob/5792f3587116503fc047d2f68c951c72dced08a5/src/handles/handles.h#L88-L171
-template <typename T>
-class MOZ_NONHEAP_CLASS Handle {
- public:
- Handle() : location_(nullptr) {}
- Handle(T object, Isolate* isolate);
- Handle(JS::Value value, Isolate* isolate);
-
- // Constructor for handling automatic up casting.
- template <typename S, typename = typename std::enable_if<
- std::is_convertible<S*, T*>::value>::type>
- inline Handle(Handle<S> handle) : location_(handle.location_) {}
-
- template <typename S>
- inline static const Handle<T> cast(Handle<S> that) {
- return Handle<T>(that.location_);
- }
-
- inline bool is_null() const { return location_ == nullptr; }
-
- inline T operator*() const {
- return T::cast(Object(*location_));
- };
-
- // {ObjectRef} is returned by {Handle::operator->}. It should never be stored
- // anywhere or used in any other code; no one should ever have to spell out
- // {ObjectRef} in code. Its only purpose is to be dereferenced immediately by
- // "operator-> chaining". Returning the address of the field is valid because
- // this object's lifetime only ends at the end of the full statement.
- // Origin:
- // https://github.com/v8/v8/blob/03aaa4b3bf4cb01eee1f223b252e6869b04ab08c/src/handles/handles.h#L91-L105
- class ObjectRef {
- public:
- T* operator->() { return &object_; }
-
- private:
- friend class Handle;
- explicit ObjectRef(T object) : object_(object) {}
-
- T object_;
- };
- inline ObjectRef operator->() const { return ObjectRef{**this}; }
-
- static Handle<T> fromHandleValue(JS::HandleValue handle) {
- return Handle(handle.address());
- }
-
- private:
- Handle(const JS::Value* location) : location_(location) {}
-
- template <typename>
- friend class Handle;
- template <typename>
- friend class MaybeHandle;
-
- const JS::Value* location_;
-};
-
-// A Handle can be converted into a MaybeHandle. Converting a MaybeHandle
-// into a Handle requires checking that it does not point to nullptr. This
-// ensures nullptr checks before use.
-//
-// Also note that Handles do not provide default equality comparison or hashing
-// operators on purpose. Such operators would be misleading, because intended
-// semantics is ambiguous between Handle location and object identity.
-// Origin:
-// https://github.com/v8/v8/blob/5792f3587116503fc047d2f68c951c72dced08a5/src/handles/maybe-handles.h#L15-L78
-template <typename T>
-class MOZ_NONHEAP_CLASS MaybeHandle final {
- public:
- MaybeHandle() : location_(nullptr) {}
-
- // Constructor for handling automatic up casting from Handle.
- // Ex. Handle<JSArray> can be passed when MaybeHandle<Object> is expected.
- template <typename S, typename = typename std::enable_if<
- std::is_convertible<S*, T*>::value>::type>
- MaybeHandle(Handle<S> handle) : location_(handle.location_) {}
-
- inline Handle<T> ToHandleChecked() const {
- MOZ_RELEASE_ASSERT(location_);
- return Handle<T>(location_);
- }
-
- // Convert to a Handle with a type that can be upcasted to.
- template <typename S>
- inline bool ToHandle(Handle<S>* out) const {
- if (location_) {
- *out = Handle<T>(location_);
- return true;
- } else {
- *out = Handle<T>();
- return false;
- }
- }
-
-private:
- JS::Value* location_;
-};
-
-// From v8/src/handles/handles-inl.h
-
-template <typename T>
-inline Handle<T> handle(T object, Isolate* isolate) {
- return Handle<T>(object, isolate);
-}
-
-// RAII Guard classes
-
-class DisallowHeapAllocation {
- public:
- DisallowHeapAllocation() {}
- operator const JS::AutoCheckCannotGC&() const { return no_gc_; }
-
- private:
- const JS::AutoCheckCannotGC no_gc_;
-};
-
-// This is used inside DisallowHeapAllocation regions to enable
-// allocation just before throwing an exception, to allocate the
-// exception object. Specifically, it only ever guards:
-// - isolate->stack_guard()->HandleInterrupts()
-// - isolate->StackOverflow()
-// Those cases don't allocate in SpiderMonkey, so this can be a no-op.
-class AllowHeapAllocation {
- public:
- // Empty constructor to avoid unused_variable warnings
- AllowHeapAllocation() {}
-};
-
-// Origin:
-// https://github.com/v8/v8/blob/84f3877c15bc7f8956d21614da4311337525a3c8/src/objects/string.h#L83-L474
-class String : public HeapObject {
- private:
- JSString* str() const { return value_.toString(); }
-
- public:
- String() : HeapObject() {}
- String(JSString* str) { value_ = JS::StringValue(str); }
-
- operator JSString*() const { return str(); }
-
- // Max char codes.
- static const int32_t kMaxOneByteCharCode = unibrow::Latin1::kMaxChar;
- static const uint32_t kMaxOneByteCharCodeU = unibrow::Latin1::kMaxChar;
- static const int kMaxUtf16CodeUnit = 0xffff;
- static const uc32 kMaxCodePoint = 0x10ffff;
-
- MOZ_ALWAYS_INLINE int length() const { return str()->length(); }
- bool IsFlat() { return str()->isLinear(); };
-
- // Origin:
- // https://github.com/v8/v8/blob/84f3877c15bc7f8956d21614da4311337525a3c8/src/objects/string.h#L95-L152
- class FlatContent {
- public:
- FlatContent(JSLinearString* string, const DisallowHeapAllocation& no_gc)
- : string_(string), no_gc_(no_gc) {}
- inline bool IsOneByte() const { return string_->hasLatin1Chars(); }
- inline bool IsTwoByte() const { return !string_->hasLatin1Chars(); }
-
- Vector<const uint8_t> ToOneByteVector() const {
- MOZ_ASSERT(IsOneByte());
- return Vector<const uint8_t>(string_->latin1Chars(no_gc_),
- string_->length());
- }
- Vector<const uc16> ToUC16Vector() const {
- MOZ_ASSERT(IsTwoByte());
- return Vector<const uc16>(string_->twoByteChars(no_gc_),
- string_->length());
- }
- private:
- const JSLinearString* string_;
- const JS::AutoCheckCannotGC& no_gc_;
- };
- FlatContent GetFlatContent(const DisallowHeapAllocation& no_gc) {
- MOZ_ASSERT(IsFlat());
- return FlatContent(&str()->asLinear(), no_gc);
- }
-
- static Handle<String> Flatten(Isolate* isolate, Handle<String> string);
-
- inline static String cast(Object object) {
- String s;
- s.value_ = JS::StringValue(JS::Value(object).toString());
- return s;
- }
-
- inline static bool IsOneByteRepresentationUnderneath(String string) {
- return string.str()->hasLatin1Chars();
- }
- inline bool IsOneByteRepresentation() const {
- return str()->hasLatin1Chars();
- }
-
- std::unique_ptr<char[]> ToCString();
-
- template <typename Char>
- Vector<const Char> GetCharVector(const DisallowHeapAllocation& no_gc);
-};
-
-template <>
-inline Vector<const uint8_t> String::GetCharVector(
- const DisallowHeapAllocation& no_gc) {
- String::FlatContent flat = GetFlatContent(no_gc);
- MOZ_ASSERT(flat.IsOneByte());
- return flat.ToOneByteVector();
-}
-
-template <>
-inline Vector<const uc16> String::GetCharVector(
- const DisallowHeapAllocation& no_gc) {
- String::FlatContent flat = GetFlatContent(no_gc);
- MOZ_ASSERT(flat.IsTwoByte());
- return flat.ToUC16Vector();
-}
-
-// A flat string reader provides random access to the contents of a
-// string independent of the character width of the string. The handle
-// must be valid as long as the reader is being used.
-// Origin:
-// https://github.com/v8/v8/blob/84f3877c15bc7f8956d21614da4311337525a3c8/src/objects/string.h#L807-L825
-class MOZ_STACK_CLASS FlatStringReader {
- public:
- FlatStringReader(JSLinearString* string)
- : length_(string->length()),
- is_latin1_(string->hasLatin1Chars()) {
-
- if (is_latin1_) {
- latin1_chars_ = string->latin1Chars(nogc_);
- } else {
- two_byte_chars_ = string->twoByteChars(nogc_);
- }
- }
- FlatStringReader(const char16_t* chars, size_t length)
- : two_byte_chars_(chars),
- length_(length),
- is_latin1_(false) {}
-
- int length() { return length_; }
-
- inline char16_t Get(size_t index) {
- MOZ_ASSERT(index < length_);
- if (is_latin1_) {
- return latin1_chars_[index];
- } else {
- return two_byte_chars_[index];
- }
- }
-
- private:
- union {
- const JS::Latin1Char *latin1_chars_;
- const char16_t* two_byte_chars_;
- };
- size_t length_;
- bool is_latin1_;
- JS::AutoCheckCannotGC nogc_;
-};
-
-class JSRegExp : public HeapObject {
- public:
- // ******************************************************
- // Methods that are called from inside the implementation
- // ******************************************************
- void TierUpTick() { /*inner()->tierUpTick();*/ }
- bool MarkedForTierUp() const {
- return false; /*inner()->markedForTierUp();*/
- }
-
- // TODO: hook these up
- Object Code(bool is_latin1) const { return Object(JS::UndefinedValue()); }
- Object Bytecode(bool is_latin1) const { return Object(JS::UndefinedValue()); }
-
- uint32_t BacktrackLimit() const {
- return 0; /*inner()->backtrackLimit();*/
- }
-
- static JSRegExp cast(Object object) {
- JSRegExp regexp;
- MOZ_ASSERT(JS::Value(object).toGCThing()->is<js::RegExpShared>());
- regexp.value_ = JS::PrivateGCThingValue(JS::Value(object).toGCThing());
- return regexp;
- }
-
- // ******************************
- // Static constants
- // ******************************
-
- // Meaning of Type:
- // NOT_COMPILED: Initial value. No data has been stored in the JSRegExp yet.
- // ATOM: A simple string to match against using an indexOf operation.
- // IRREGEXP: Compiled with Irregexp.
- enum Type { NOT_COMPILED, ATOM, IRREGEXP };
-
- // Maximum number of captures allowed.
- static constexpr int kMaxCaptures = 1 << 16;
-
- // **************************************************
- // JSRegExp::Flags
- // **************************************************
-
- struct FlagShiftBit {
- static constexpr int kGlobal = 0;
- static constexpr int kIgnoreCase = 1;
- static constexpr int kMultiline = 2;
- static constexpr int kSticky = 3;
- static constexpr int kUnicode = 4;
- static constexpr int kDotAll = 5;
- static constexpr int kInvalid = 6;
- };
- enum Flag : uint8_t {
- kNone = 0,
- kGlobal = 1 << FlagShiftBit::kGlobal,
- kIgnoreCase = 1 << FlagShiftBit::kIgnoreCase,
- kMultiline = 1 << FlagShiftBit::kMultiline,
- kSticky = 1 << FlagShiftBit::kSticky,
- kUnicode = 1 << FlagShiftBit::kUnicode,
- kDotAll = 1 << FlagShiftBit::kDotAll,
- kInvalid = 1 << FlagShiftBit::kInvalid, // Not included in FlagCount.
- };
- using Flags = base::Flags<Flag>;
- static constexpr int kFlagCount = 6;
-
- static constexpr int kNoBacktrackLimit = 0;
-
-private:
- js::RegExpShared* inner() {
- return reinterpret_cast<js::RegExpShared*>(value_.toGCThing());
- }
-};
-
-class Histogram {
- public:
- inline void AddSample(int sample) {}
-};
-
-class Counters {
- public:
- Histogram* regexp_backtracks() { return &regexp_backtracks_; }
-
- private:
- Histogram regexp_backtracks_;
-};
-
-#define PROFILE(isolate, call) \
- do { \
- } while (false);
-
-enum class AllocationType : uint8_t {
- kYoung, // Allocate in the nursery
- kOld, // Allocate in the tenured heap
-};
-
-using StackGuard = Isolate;
-using Factory = Isolate;
-
-class Isolate {
- public:
- //********** Isolate code **********//
- RegExpStack* regexp_stack() const { return regexpStack_; }
- byte* top_of_regexp_stack() const;
-
- // This is called from inside no-GC code. Instead of suppressing GC
- // to allocate the error, we return false from Execute and call
- // ReportOverRecursed in the caller.
- void StackOverflow() {}
-
-#ifndef V8_INTL_SUPPORT
- unibrow::Mapping<unibrow::Ecma262UnCanonicalize>* jsregexp_uncanonicalize() {
- return &jsregexp_uncanonicalize_;
- }
- unibrow::Mapping<unibrow::Ecma262Canonicalize>*
- regexp_macro_assembler_canonicalize() {
- return &regexp_macro_assembler_canonicalize_;
- }
- unibrow::Mapping<unibrow::CanonicalizationRange>* jsregexp_canonrange() {
- return &jsregexp_canonrange_;
- }
-
-private:
- unibrow::Mapping<unibrow::Ecma262UnCanonicalize> jsregexp_uncanonicalize_;
- unibrow::Mapping<unibrow::Ecma262Canonicalize>
- regexp_macro_assembler_canonicalize_;
- unibrow::Mapping<unibrow::CanonicalizationRange> jsregexp_canonrange_;
-#endif // !V8_INTL_SUPPORT
-
-public:
- // An empty stub for telemetry we don't support
- void IncreaseTotalRegexpCodeGenerated(Handle<HeapObject> code) {}
-
- Counters* counters() { return &counters_; }
-
- //********** Factory code **********//
- inline Factory* factory() { return this; }
-
- Handle<ByteArray> NewByteArray(
- int length, AllocationType allocation = AllocationType::kYoung);
-
- // Allocates a fixed array initialized with undefined values.
- Handle<FixedArray> NewFixedArray(int length);
-
- template <typename Char>
- Handle<String> InternalizeString(const Vector<const Char>& str);
-
- //********** Stack guard code **********//
- inline StackGuard* stack_guard() { return this; }
- Object HandleInterrupts() {
- return Object(JS::BooleanValue(cx()->handleInterrupt(cx())));
- }
-
- JSContext* cx() const { return cx_; }
-
- void trace(JSTracer* trc);
-
- //********** Handle code **********//
-
- JS::Value* getHandleLocation(JS::Value value);
-
- private:
-
- mozilla::SegmentedVector<JS::Value> handleArena_;
- mozilla::SegmentedVector<PseudoHandle<void>> uniquePtrArena_;
-
- void* allocatePseudoHandle(size_t bytes);
-
-public:
- template <typename T>
- PseudoHandle<T> takeOwnership(void* ptr);
-
-private:
- void openHandleScope(HandleScope& scope) {
- scope.level_ = handleArena_.Length();
- scope.non_gc_level_ = uniquePtrArena_.Length();
- }
- void closeHandleScope(size_t prevLevel, size_t prevUniqueLevel) {
- size_t currLevel = handleArena_.Length();
- handleArena_.PopLastN(currLevel - prevLevel);
-
- size_t currUniqueLevel = uniquePtrArena_.Length();
- uniquePtrArena_.PopLastN(currUniqueLevel - prevUniqueLevel);
- }
- friend class HandleScope;
-
- JSContext* cx_;
- RegExpStack* regexpStack_;
- Counters counters_;
-};
-
-// Origin:
-// https://github.com/v8/v8/blob/50dcf2af54ce27801a71c47c1be1d2c5e36b0dd6/src/execution/isolate.h#L1909-L1931
-class StackLimitCheck {
- public:
- StackLimitCheck(Isolate* isolate) : cx_(isolate->cx()) {}
-
- // Use this to check for stack-overflows in C++ code.
- bool HasOverflowed() {
- JS_CHECK_RECURSION_DONT_REPORT(cx_, return true);
- return false;
- }
-
- // Use this to check for interrupt request in C++ code.
- bool InterruptRequested() {
- JSRuntime* rt = cx_->runtime();
- return rt->hasPendingInterrupt();
- }
-
- // Use this to check for stack-overflow when entering runtime from JS code.
- bool JsHasOverflowed() {
- JS_CHECK_RECURSION_CONSERVATIVE_DONT_REPORT(cx_, return true);
- return false;
- }
-
- private:
- JSContext* cx_;
-};
-
-class Code : public HeapObject {
- public:
- uint8_t* raw_instruction_start() { return inner()->raw(); }
-
- static Code cast(Object object) {
- Code c;
- MOZ_ASSERT(JS::Value(object).toGCThing()->is<js::jit::JitCode>());
- c.value_ = JS::PrivateGCThingValue(JS::Value(object).toGCThing());
- return c;
- }
- js::jit::JitCode* inner() {
- return value_.toGCThing()->as<js::jit::JitCode>();
- }
-};
-
-enum class MessageTemplate { kStackOverflow };
-
-class MessageFormatter {
- public:
- static const char* TemplateString(MessageTemplate index) {
- switch (index) {
- case MessageTemplate::kStackOverflow:
- return "too much recursion";
- }
- }
-};
-
-// Origin: https://github.com/v8/v8/blob/master/src/codegen/label.h
-class Label {
- public:
- Label() : inner_(js::jit::Label()) {}
-
- js::jit::Label* inner() { return &inner_; }
-
- void Unuse() { inner_.reset(); }
-
- bool is_linked() { return inner_.used(); }
- bool is_bound() { return inner_.bound(); }
- bool is_unused() { return !inner_.used() && !inner_.bound(); }
-
- int pos() { return inner_.offset(); }
- void link_to(int pos) { inner_.use(pos); }
- void bind_to(int pos) { inner_.bind(pos); }
-
- private:
- js::jit::Label inner_;
- js::jit::CodeOffset patchOffset_;
-
- friend class SMRegExpMacroAssembler;
-};
-
-// TODO: Map flags to jitoptions
-extern bool FLAG_correctness_fuzzer_suppressions;
-extern bool FLAG_enable_regexp_unaligned_accesses;
-extern bool FLAG_harmony_regexp_sequence;
-extern bool FLAG_regexp_interpret_all;
-extern bool FLAG_regexp_mode_modifiers;
-extern bool FLAG_regexp_optimization;
-extern bool FLAG_regexp_peephole_optimization;
-extern bool FLAG_regexp_possessive_quantifier;
-extern bool FLAG_regexp_tier_up;
-extern bool FLAG_trace_regexp_assembler;
-extern bool FLAG_trace_regexp_bytecodes;
-extern bool FLAG_trace_regexp_parser;
-extern bool FLAG_trace_regexp_peephole_optimization;
-
-#define COMPILING_IRREGEXP_FOR_EXTERNAL_EMBEDDER
-
-} // namespace internal
-} // namespace v8
-
-#endif // RegexpShim_h
diff --git a/js/src/new-regexp/regexp-stack.cc b/js/src/new-regexp/regexp-stack.cc
deleted file mode 100644
index c8944541c..000000000
--- a/js/src/new-regexp/regexp-stack.cc
+++ /dev/null
@@ -1,97 +0,0 @@
-// Copyright 2009 the V8 project authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#include "new-regexp/regexp-stack.h"
-
-
-namespace v8 {
-namespace internal {
-
-RegExpStackScope::RegExpStackScope(Isolate* isolate)
- : regexp_stack_(isolate->regexp_stack()) {
- // Initialize, if not already initialized.
- regexp_stack_->EnsureCapacity(0);
-}
-
-
-RegExpStackScope::~RegExpStackScope() {
- // Reset the buffer if it has grown.
- regexp_stack_->Reset();
-}
-
-RegExpStack::RegExpStack() : thread_local_(this), isolate_(nullptr) {}
-
-RegExpStack::~RegExpStack() { thread_local_.FreeAndInvalidate(); }
-
-char* RegExpStack::ArchiveStack(char* to) {
- if (!thread_local_.owns_memory_) {
- // Force dynamic stacks prior to archiving. Any growth will do. A dynamic
- // stack is needed because stack archival & restoration rely on `memory_`
- // pointing at a fixed-location backing store, whereas the static stack is
- // tied to a RegExpStack instance.
- EnsureCapacity(thread_local_.memory_size_ + 1);
- DCHECK(thread_local_.owns_memory_);
- }
-
- size_t size = sizeof(thread_local_);
- MemCopy(reinterpret_cast<void*>(to), &thread_local_, size);
- thread_local_ = ThreadLocal(this);
- return to + size;
-}
-
-
-char* RegExpStack::RestoreStack(char* from) {
- size_t size = sizeof(thread_local_);
- MemCopy(&thread_local_, reinterpret_cast<void*>(from), size);
- return from + size;
-}
-
-void RegExpStack::Reset() { thread_local_.ResetToStaticStack(this); }
-
-void RegExpStack::ThreadLocal::ResetToStaticStack(RegExpStack* regexp_stack) {
- if (owns_memory_) DeleteArray(memory_);
-
- memory_ = regexp_stack->static_stack_;
- memory_top_ = regexp_stack->static_stack_ + kStaticStackSize;
- memory_size_ = kStaticStackSize;
- limit_ = reinterpret_cast<Address>(regexp_stack->static_stack_) +
- kStackLimitSlack * kSystemPointerSize;
- owns_memory_ = false;
-}
-
-void RegExpStack::ThreadLocal::FreeAndInvalidate() {
- if (owns_memory_) DeleteArray(memory_);
-
- // This stack may not be used after being freed. Just reset to invalid values
- // to ensure we don't accidentally use old memory areas.
- memory_ = nullptr;
- memory_top_ = nullptr;
- memory_size_ = 0;
- limit_ = kMemoryTop;
-}
-
-Address RegExpStack::EnsureCapacity(size_t size) {
- if (size > kMaximumStackSize) return kNullAddress;
- if (size < kMinimumDynamicStackSize) size = kMinimumDynamicStackSize;
- if (thread_local_.memory_size_ < size) {
- byte* new_memory = NewArray<byte>(size);
- if (thread_local_.memory_size_ > 0) {
- // Copy original memory into top of new memory.
- MemCopy(new_memory + size - thread_local_.memory_size_,
- thread_local_.memory_, thread_local_.memory_size_);
- if (thread_local_.owns_memory_) DeleteArray(thread_local_.memory_);
- }
- thread_local_.memory_ = new_memory;
- thread_local_.memory_top_ = new_memory + size;
- thread_local_.memory_size_ = size;
- thread_local_.limit_ = reinterpret_cast<Address>(new_memory) +
- kStackLimitSlack * kSystemPointerSize;
- thread_local_.owns_memory_ = true;
- }
- return reinterpret_cast<Address>(thread_local_.memory_top_);
-}
-
-
-} // namespace internal
-} // namespace v8
diff --git a/js/src/new-regexp/regexp-stack.h b/js/src/new-regexp/regexp-stack.h
deleted file mode 100644
index e32d0ed1f..000000000
--- a/js/src/new-regexp/regexp-stack.h
+++ /dev/null
@@ -1,141 +0,0 @@
-// Copyright 2009 the V8 project authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#ifndef V8_REGEXP_REGEXP_STACK_H_
-#define V8_REGEXP_REGEXP_STACK_H_
-
-#include "new-regexp/regexp-shim.h"
-
-namespace v8 {
-namespace internal {
-
-class RegExpStack;
-
-// Maintains a per-v8thread stack area that can be used by irregexp
-// implementation for its backtracking stack.
-// Since there is only one stack area, the Irregexp implementation is not
-// re-entrant. I.e., no regular expressions may be executed in the same thread
-// during a preempted Irregexp execution.
-class RegExpStackScope {
- public:
- // Create and delete an instance to control the life-time of a growing stack.
-
- // Initializes the stack memory area if necessary.
- explicit RegExpStackScope(Isolate* isolate);
- ~RegExpStackScope(); // Releases the stack if it has grown.
-
- RegExpStack* stack() const { return regexp_stack_; }
-
- private:
- RegExpStack* regexp_stack_;
-
- DISALLOW_COPY_AND_ASSIGN(RegExpStackScope);
-};
-
-
-class RegExpStack {
- public:
- RegExpStack();
- ~RegExpStack();
-
- // Number of allocated locations on the stack below the limit.
- // No sequence of pushes must be longer that this without doing a stack-limit
- // check.
- static constexpr int kStackLimitSlack = 32;
-
- // Gives the top of the memory used as stack.
- Address stack_base() {
- DCHECK_NE(0, thread_local_.memory_size_);
- DCHECK_EQ(thread_local_.memory_top_,
- thread_local_.memory_ + thread_local_.memory_size_);
- return reinterpret_cast<Address>(thread_local_.memory_top_);
- }
-
- // The total size of the memory allocated for the stack.
- size_t stack_capacity() { return thread_local_.memory_size_; }
-
- // If the stack pointer gets below the limit, we should react and
- // either grow the stack or report an out-of-stack exception.
- // There is only a limited number of locations below the stack limit,
- // so users of the stack should check the stack limit during any
- // sequence of pushes longer that this.
- Address* limit_address_address() { return &(thread_local_.limit_); }
-
- // Ensures that there is a memory area with at least the specified size.
- // If passing zero, the default/minimum size buffer is allocated.
- Address EnsureCapacity(size_t size);
-
- // Thread local archiving.
- static constexpr int ArchiveSpacePerThread() {
- return static_cast<int>(sizeof(ThreadLocal));
- }
- char* ArchiveStack(char* to);
- char* RestoreStack(char* from);
- void FreeThreadResources() { thread_local_.ResetToStaticStack(this); }
-
- // Maximal size of allocated stack area.
- static constexpr size_t kMaximumStackSize = 64 * MB;
-
- private:
- // Artificial limit used when the thread-local state has been destroyed.
- static const Address kMemoryTop =
- static_cast<Address>(static_cast<uintptr_t>(-1));
-
- // Minimal size of dynamically-allocated stack area.
- static constexpr size_t kMinimumDynamicStackSize = 1 * KB;
-
- // In addition to dynamically-allocated, variable-sized stacks, we also have
- // a statically allocated and sized area that is used whenever no dynamic
- // stack is allocated. This guarantees that a stack is always available and
- // we can skip availability-checks later on.
- // It's double the slack size to ensure that we have a bit of breathing room
- // before NativeRegExpMacroAssembler::GrowStack must be called.
- static constexpr size_t kStaticStackSize =
- 2 * kStackLimitSlack * kSystemPointerSize;
- byte static_stack_[kStaticStackSize] = {0};
-
- STATIC_ASSERT(kStaticStackSize <= kMaximumStackSize);
-
- // Structure holding the allocated memory, size and limit.
- struct ThreadLocal {
- explicit ThreadLocal(RegExpStack* regexp_stack) {
- ResetToStaticStack(regexp_stack);
- }
-
- // If memory_size_ > 0 then memory_ and memory_top_ must be non-nullptr
- // and memory_top_ = memory_ + memory_size_
- byte* memory_ = nullptr;
- byte* memory_top_ = nullptr;
- size_t memory_size_ = 0;
- Address limit_ = kNullAddress;
- bool owns_memory_ = false; // Whether memory_ is owned and must be freed.
-
- void ResetToStaticStack(RegExpStack* regexp_stack);
- void FreeAndInvalidate();
- };
-
- // Address of top of memory used as stack.
- Address memory_top_address_address() {
- return reinterpret_cast<Address>(&thread_local_.memory_top_);
- }
-
- // Resets the buffer if it has grown beyond the default/minimum size.
- // After this, the buffer is either the default size, or it is empty, so
- // you have to call EnsureCapacity before using it again.
- void Reset();
-
- ThreadLocal thread_local_;
- Isolate* isolate_;
-
- friend class ExternalReference;
- friend class Isolate;
- friend class RegExpStackScope;
-
- DISALLOW_COPY_AND_ASSIGN(RegExpStack);
-};
-
-} // namespace internal
-} // namespace v8
-
-#endif // V8_REGEXP_REGEXP_STACK_H_
diff --git a/js/src/new-regexp/regexp.h b/js/src/new-regexp/regexp.h
deleted file mode 100644
index f1e403bf0..000000000
--- a/js/src/new-regexp/regexp.h
+++ /dev/null
@@ -1,195 +0,0 @@
-// Copyright 2012 the V8 project authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#ifndef V8_REGEXP_REGEXP_H_
-#define V8_REGEXP_REGEXP_H_
-
-#include "new-regexp/regexp-error.h"
-#include "new-regexp/regexp-shim.h"
-
-namespace v8 {
-namespace internal {
-
-class RegExpNode;
-class RegExpTree;
-
-enum class RegExpCompilationTarget : int { kBytecode, kNative };
-
-// TODO(jgruber): Do not expose in regexp.h.
-// TODO(jgruber): Consider splitting between ParseData and CompileData.
-struct RegExpCompileData {
- // The parsed AST as produced by the RegExpParser.
- RegExpTree* tree = nullptr;
-
- // The compiled Node graph as produced by RegExpTree::ToNode methods.
- RegExpNode* node = nullptr;
-
- // Either the generated code as produced by the compiler or a trampoline
- // to the interpreter.
- Object code;
-
- // True, iff the pattern is a 'simple' atom with zero captures. In other
- // words, the pattern consists of a string with no metacharacters and special
- // regexp features, and can be implemented as a standard string search.
- bool simple = true;
-
- // True, iff the pattern is anchored at the start of the string with '^'.
- bool contains_anchor = false;
-
- // Only use if the pattern contains named captures. If so, this contains a
- // mapping of capture names to capture indices.
- Handle<FixedArray> capture_name_map;
-
- // The error message. Only used if an error occurred during parsing or
- // compilation.
- RegExpError error = RegExpError::kNone;
-
- // The position at which the error was detected. Only used if an
- // error occurred.
- int error_pos = 0;
-
- // The number of capture groups, without the global capture \0.
- int capture_count = 0;
-
- // The number of registers used by the generated code.
- int register_count = 0;
-
- // The compilation target (bytecode or native code).
- RegExpCompilationTarget compilation_target;
-};
-
-class RegExp final : public AllStatic {
- public:
- // Whether the irregexp engine generates interpreter bytecode.
- static bool CanGenerateBytecode() {
- return FLAG_regexp_interpret_all || FLAG_regexp_tier_up;
- }
-
- // Parses the RegExp pattern and prepares the JSRegExp object with
- // generic data and choice of implementation - as well as what
- // the implementation wants to store in the data field.
- // Returns false if compilation fails.
- V8_WARN_UNUSED_RESULT static MaybeHandle<Object> Compile(
- Isolate* isolate, Handle<JSRegExp> re, Handle<String> pattern,
- JSRegExp::Flags flags, uint32_t backtrack_limit);
-
- enum CallOrigin : int {
- kFromRuntime = 0,
- kFromJs = 1,
- };
-
- // See ECMA-262 section 15.10.6.2.
- // This function calls the garbage collector if necessary.
- V8_EXPORT_PRIVATE V8_WARN_UNUSED_RESULT static MaybeHandle<Object> Exec(
- Isolate* isolate, Handle<JSRegExp> regexp, Handle<String> subject,
- int index, Handle<RegExpMatchInfo> last_match_info);
-
- // Integral return values used throughout regexp code layers.
- static constexpr int kInternalRegExpFailure = 0;
- static constexpr int kInternalRegExpSuccess = 1;
- static constexpr int kInternalRegExpException = -1;
- static constexpr int kInternalRegExpRetry = -2;
-
- enum IrregexpResult : int32_t {
- RE_FAILURE = kInternalRegExpFailure,
- RE_SUCCESS = kInternalRegExpSuccess,
- RE_EXCEPTION = kInternalRegExpException,
- };
-
- // Prepare a RegExp for being executed one or more times (using
- // IrregexpExecOnce) on the subject.
- // This ensures that the regexp is compiled for the subject, and that
- // the subject is flat.
- // Returns the number of integer spaces required by IrregexpExecOnce
- // as its "registers" argument. If the regexp cannot be compiled,
- // an exception is set as pending, and this function returns negative.
- static int IrregexpPrepare(Isolate* isolate, Handle<JSRegExp> regexp,
- Handle<String> subject);
-
- // Set last match info. If match is nullptr, then setting captures is
- // omitted.
- static Handle<RegExpMatchInfo> SetLastMatchInfo(
- Isolate* isolate, Handle<RegExpMatchInfo> last_match_info,
- Handle<String> subject, int capture_count, int32_t* match);
-
- V8_EXPORT_PRIVATE static bool CompileForTesting(Isolate* isolate, Zone* zone,
- RegExpCompileData* input,
- JSRegExp::Flags flags,
- Handle<String> pattern,
- Handle<String> sample_subject,
- bool is_one_byte);
-
- V8_EXPORT_PRIVATE static void DotPrintForTesting(const char* label,
- RegExpNode* node);
-
- static const int kRegExpTooLargeToOptimize = 20 * KB;
-};
-
-// Uses a special global mode of irregexp-generated code to perform a global
-// search and return multiple results at once. As such, this is essentially an
-// iterator over multiple results (retrieved batch-wise in advance).
-class RegExpGlobalCache final {
- public:
- RegExpGlobalCache(Handle<JSRegExp> regexp, Handle<String> subject,
- Isolate* isolate);
-
- ~RegExpGlobalCache();
-
- // Fetch the next entry in the cache for global regexp match results.
- // This does not set the last match info. Upon failure, nullptr is
- // returned. The cause can be checked with Result(). The previous result is
- // still in available in memory when a failure happens.
- int32_t* FetchNext();
-
- int32_t* LastSuccessfulMatch();
-
- bool HasException() { return num_matches_ < 0; }
-
- private:
- int AdvanceZeroLength(int last_index);
-
- int num_matches_;
- int max_matches_;
- int current_match_index_;
- int registers_per_match_;
- // Pointer to the last set of captures.
- int32_t* register_array_;
- int register_array_size_;
- Handle<JSRegExp> regexp_;
- Handle<String> subject_;
- Isolate* isolate_;
-};
-
-// Caches results for specific regexp queries on the isolate. At the time of
-// writing, this is used during global calls to RegExp.prototype.exec and
-// @@split.
-class RegExpResultsCache final : public AllStatic {
- public:
- enum ResultsCacheType { REGEXP_MULTIPLE_INDICES, STRING_SPLIT_SUBSTRINGS };
-
- // Attempt to retrieve a cached result. On failure, 0 is returned as a Smi.
- // On success, the returned result is guaranteed to be a COW-array.
- static Object Lookup(Heap* heap, String key_string, Object key_pattern,
- FixedArray* last_match_out, ResultsCacheType type);
- // Attempt to add value_array to the cache specified by type. On success,
- // value_array is turned into a COW-array.
- static void Enter(Isolate* isolate, Handle<String> key_string,
- Handle<Object> key_pattern, Handle<FixedArray> value_array,
- Handle<FixedArray> last_match_cache, ResultsCacheType type);
- static void Clear(FixedArray cache);
-
- static constexpr int kRegExpResultsCacheSize = 0x100;
-
- private:
- static constexpr int kStringOffset = 0;
- static constexpr int kPatternOffset = 1;
- static constexpr int kArrayOffset = 2;
- static constexpr int kLastMatchOffset = 3;
- static constexpr int kArrayEntriesPerCacheEntry = 4;
-};
-
-} // namespace internal
-} // namespace v8
-
-#endif // V8_REGEXP_REGEXP_H_
diff --git a/js/src/new-regexp/special-case.cc b/js/src/new-regexp/special-case.cc
deleted file mode 100644
index d767b94c2..000000000
--- a/js/src/new-regexp/special-case.cc
+++ /dev/null
@@ -1,88 +0,0 @@
-// Copyright 2020 the V8 project authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that
-// can be found in the LICENSE file.
-
-// Automatically generated by regexp/gen-regexp-special-case.cc
-
-// The following functions are used to build UnicodeSets
-// for special cases where the case-folding algorithm used by
-// UnicodeSet::closeOver(USET_CASE_INSENSITIVE) does not match
-// the algorithm defined in ECMAScript 2020 21.2.2.8.2 (Runtime
-// Semantics: Canonicalize) step 3.
-
-#ifdef V8_INTL_SUPPORT
-#include "new-regexp/special-case.h"
-
-#include "unicode/uniset.h"
-namespace v8 {
-namespace internal {
-
-icu::UnicodeSet BuildIgnoreSet() {
- icu::UnicodeSet set;
- set.add(0xdf);
- set.add(0x17f);
- set.add(0x390);
- set.add(0x3b0);
- set.add(0x3f4);
- set.add(0x1e9e);
- set.add(0x1f80, 0x1faf);
- set.add(0x1fb3);
- set.add(0x1fbc);
- set.add(0x1fc3);
- set.add(0x1fcc);
- set.add(0x1fd3);
- set.add(0x1fe3);
- set.add(0x1ff3);
- set.add(0x1ffc);
- set.add(0x2126);
- set.add(0x212a, 0x212b);
- set.add(0xfb05, 0xfb06);
- set.freeze();
- return set;
-}
-
-struct IgnoreSetData {
- IgnoreSetData() : set(BuildIgnoreSet()) {}
- const icu::UnicodeSet set;
-};
-
-//static
-const icu::UnicodeSet& RegExpCaseFolding::IgnoreSet() {
- static base::LazyInstance<IgnoreSetData>::type set =
- LAZY_INSTANCE_INITIALIZER;
- return set.Pointer()->set;
-}
-
-icu::UnicodeSet BuildSpecialAddSet() {
- icu::UnicodeSet set;
- set.add(0x4b);
- set.add(0x53);
- set.add(0x6b);
- set.add(0x73);
- set.add(0xc5);
- set.add(0xe5);
- set.add(0x398);
- set.add(0x3a9);
- set.add(0x3b8);
- set.add(0x3c9);
- set.add(0x3d1);
- set.freeze();
- return set;
-}
-
-struct SpecialAddSetData {
- SpecialAddSetData() : set(BuildSpecialAddSet()) {}
- const icu::UnicodeSet set;
-};
-
-//static
-const icu::UnicodeSet& RegExpCaseFolding::SpecialAddSet() {
- static base::LazyInstance<SpecialAddSetData>::type set =
- LAZY_INSTANCE_INITIALIZER;
- return set.Pointer()->set;
-}
-
-
-} // namespace internal
-} // namespace v8
-#endif // V8_INTL_SUPPORT
diff --git a/js/src/new-regexp/special-case.h b/js/src/new-regexp/special-case.h
deleted file mode 100644
index 31dfd7858..000000000
--- a/js/src/new-regexp/special-case.h
+++ /dev/null
@@ -1,117 +0,0 @@
-// Copyright 2019 the V8 project authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#ifndef V8_REGEXP_SPECIAL_CASE_H_
-#define V8_REGEXP_SPECIAL_CASE_H_
-
-#ifdef V8_INTL_SUPPORT
-#include "new-regexp/regexp-shim.h"
-
-#include "unicode/uchar.h"
-#include "unicode/uniset.h"
-#include "unicode/unistr.h"
-
-namespace v8 {
-namespace internal {
-
-// Sets of Unicode characters that need special handling under "i" mode
-
-// For non-unicode ignoreCase matches (aka "i", not "iu"), ECMA 262
-// defines slightly different case-folding rules than Unicode. An
-// input character should match a pattern character if the result of
-// the Canonicalize algorithm is the same for both characters.
-//
-// Roughly speaking, for "i" regexps, Canonicalize(c) is the same as
-// c.toUpperCase(), unless a) c.toUpperCase() is a multi-character
-// string, or b) c is non-ASCII, and c.toUpperCase() is ASCII. See
-// https://tc39.es/ecma262/#sec-runtime-semantics-canonicalize-ch for
-// the precise definition.
-//
-// While compiling such regular expressions, we need to compute the
-// set of characters that should match a given input character. (See
-// GetCaseIndependentLetters and CharacterRange::AddCaseEquivalents.)
-// For almost all characters, this can be efficiently computed using
-// UnicodeSet::closeOver(USET_CASE_INSENSITIVE). These sets represent
-// the remaining special cases.
-//
-// For a character c, the rules are as follows:
-//
-// 1. If c is in neither IgnoreSet nor SpecialAddSet, then calling
-// UnicodeSet::closeOver(USET_CASE_INSENSITIVE) on a UnicodeSet
-// containing c will produce the set of characters that should
-// match /c/i (or /[c]/i), and only those characters.
-//
-// 2. If c is in IgnoreSet, then the only character it should match is
-// itself. However, closeOver will add additional incorrect
-// matches. For example, consider SHARP S: 'ß' (U+00DF) and 'ẞ'
-// (U+1E9E). Although closeOver('ß') = "ßẞ", uppercase('ß') is
-// "SS". Step 3.e therefore requires that 'ß' canonicalizes to
-// itself, and should not match 'ẞ'. In these cases, we can skip
-// the closeOver entirely, because it will never add an equivalent
-// character.
-//
-// 3. If c is in SpecialAddSet, then it should match at least one
-// character other than itself. However, closeOver will add at
-// least one additional incorrect match. For example, consider the
-// letter 'k'. Closing over 'k' gives "kKK" (lowercase k, uppercase
-// K, U+212A KELVIN SIGN). However, because of step 3.g, KELVIN
-// SIGN should not match either of the other two characters. As a
-// result, "k" and "K" are in SpecialAddSet (and KELVIN SIGN is in
-// IgnoreSet). To find the correct matches for characters in
-// SpecialAddSet, we closeOver the original character, but filter
-// out the results that do not have the same canonical value.
-//
-// The contents of these sets are calculated at build time by
-// src/regexp/gen-regexp-special-case.cc, which generates
-// gen/src/regexp/special-case.cc. This is done by iterating over the
-// result of closeOver for each BMP character, and finding sets for
-// which at least one character has a different canonical value than
-// another character. Characters that match no other characters in
-// their equivalence class are added to IgnoreSet. Characters that
-// match at least one other character are added to SpecialAddSet.
-
-class RegExpCaseFolding final : public AllStatic {
- public:
- static const icu::UnicodeSet& IgnoreSet();
- static const icu::UnicodeSet& SpecialAddSet();
-
- // This implements ECMAScript 2020 21.2.2.8.2 (Runtime Semantics:
- // Canonicalize) step 3, which is used to determine whether
- // characters match when ignoreCase is true and unicode is false.
- static UChar32 Canonicalize(UChar32 ch) {
- // a. Assert: ch is a UTF-16 code unit.
- CHECK_LE(ch, 0xffff);
-
- // b. Let s be the String value consisting of the single code unit ch.
- icu::UnicodeString s(ch);
-
- // c. Let u be the same result produced as if by performing the algorithm
- // for String.prototype.toUpperCase using s as the this value.
- // d. Assert: Type(u) is String.
- icu::UnicodeString& u = s.toUpper();
-
- // e. If u does not consist of a single code unit, return ch.
- if (u.length() != 1) {
- return ch;
- }
-
- // f. Let cu be u's single code unit element.
- UChar32 cu = u.char32At(0);
-
- // g. If the value of ch >= 128 and the value of cu < 128, return ch.
- if (ch >= 128 && cu < 128) {
- return ch;
- }
-
- // h. Return cu.
- return cu;
- }
-};
-
-} // namespace internal
-} // namespace v8
-
-#endif // V8_INTL_SUPPORT
-
-#endif // V8_REGEXP_SPECIAL_CASE_H_
diff --git a/js/src/new-regexp/util/flags.h b/js/src/new-regexp/util/flags.h
deleted file mode 100644
index 1fa421fc0..000000000
--- a/js/src/new-regexp/util/flags.h
+++ /dev/null
@@ -1,93 +0,0 @@
-// Copyright 2014 the V8 project authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#ifndef V8_UTIL_FLAGS_H_
-#define V8_UTIL_FLAGS_H_
-
-// Origin: https://github.com/v8/v8/blob/1bafcc6b999b23ea1d394f5d267a08183e3c4e19/src/base/flags.h#L15-L90
-
-namespace v8 {
-namespace base {
-
-// The Flags class provides a type-safe way of storing OR-combinations of enum
-// values. The Flags<T, S> class is a template class, where T is an enum type,
-// and S is the underlying storage type (usually int).
-//
-// The traditional C++ approach for storing OR-combinations of enum values is to
-// use an int or unsigned int variable. The inconvenience with this approach is
-// that there's no type checking at all; any enum value can be OR'd with any
-// other enum value and passed on to a function that takes an int or unsigned
-// int.
-template <typename T, typename S = int>
-class Flags final {
- public:
- using flag_type = T;
- using mask_type = S;
-
- constexpr Flags() : mask_(0) {}
- constexpr Flags(flag_type flag)
- : mask_(static_cast<S>(flag)) {}
- constexpr explicit Flags(mask_type mask) : mask_(static_cast<S>(mask)) {}
-
- constexpr bool operator==(flag_type flag) const {
- return mask_ == static_cast<S>(flag);
- }
- constexpr bool operator!=(flag_type flag) const {
- return mask_ != static_cast<S>(flag);
- }
-
- Flags& operator&=(const Flags& flags) {
- mask_ &= flags.mask_;
- return *this;
- }
- Flags& operator|=(const Flags& flags) {
- mask_ |= flags.mask_;
- return *this;
- }
- Flags& operator^=(const Flags& flags) {
- mask_ ^= flags.mask_;
- return *this;
- }
-
- constexpr Flags operator&(const Flags& flags) const {
- return Flags(mask_ & flags.mask_);
- }
- constexpr Flags operator|(const Flags& flags) const {
- return Flags(mask_ | flags.mask_);
- }
- constexpr Flags operator^(const Flags& flags) const {
- return Flags(mask_ ^ flags.mask_);
- }
-
- Flags& operator&=(flag_type flag) { return operator&=(Flags(flag)); }
- Flags& operator|=(flag_type flag) { return operator|=(Flags(flag)); }
- Flags& operator^=(flag_type flag) { return operator^=(Flags(flag)); }
-
- constexpr Flags operator&(flag_type flag) const {
- return operator&(Flags(flag));
- }
- constexpr Flags operator|(flag_type flag) const {
- return operator|(Flags(flag));
- }
- constexpr Flags operator^(flag_type flag) const {
- return operator^(Flags(flag));
- }
-
- constexpr Flags operator~() const { return Flags(~mask_); }
-
- constexpr operator mask_type() const { return mask_; }
- constexpr bool operator!() const { return !mask_; }
-
- Flags without(flag_type flag) { return *this & (~Flags(flag)); }
-
- friend size_t hash_value(const Flags& flags) { return flags.mask_; }
-
- private:
- mask_type mask_;
-};
-
-} // namespace base
-} // namespace v8
-
-#endif // V8_UTIL_FLAG_H_
diff --git a/js/src/new-regexp/util/unicode.cc b/js/src/new-regexp/util/unicode.cc
deleted file mode 100644
index ba9ea607c..000000000
--- a/js/src/new-regexp/util/unicode.cc
+++ /dev/null
@@ -1,1865 +0,0 @@
-// Copyright 2012 the V8 project authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-//
-// This file is a subset of:
-// https://github.com/v8/v8/blob/master/src/strings/unicode.cc
-
-#include "new-regexp/regexp-shim.h"
-
-#ifdef V8_INTL_SUPPORT
-#include "unicode/uchar.h"
-#endif
-
-namespace v8 {
-namespace unibrow {
-
-#ifndef V8_INTL_SUPPORT
-static const int kStartBit = (1 << 30);
-static const int kChunkBits = (1 << 13);
-#endif // !V8_INTL_SUPPORT
-
-static const uchar kSentinel = static_cast<uchar>(-1);
-
-/**
- * \file
- * Implementations of functions for working with Unicode.
- */
-
-using int16_t = signed short; // NOLINT
-using uint16_t = unsigned short; // NOLINT
-using int32_t = int; // NOLINT
-
-#ifndef V8_INTL_SUPPORT
-// All access to the character table should go through this function.
-template <int D>
-static inline uchar TableGet(const int32_t* table, int index) {
- return table[D * index];
-}
-
-static inline uchar GetEntry(int32_t entry) { return entry & (kStartBit - 1); }
-
-static inline bool IsStart(int32_t entry) { return (entry & kStartBit) != 0; }
-
-/**
- * Look up a character in the Unicode table using a mix of binary and
- * interpolation search. For a uniformly distributed array
- * interpolation search beats binary search by a wide margin. However,
- * in this case interpolation search degenerates because of some very
- * high values in the lower end of the table so this function uses a
- * combination. The average number of steps to look up the information
- * about a character is around 10, slightly higher if there is no
- * information available about the character.
- */
-static bool LookupPredicate(const int32_t* table, uint16_t size, uchar chr) {
- static const int kEntryDist = 1;
- uint16_t value = chr & (kChunkBits - 1);
- unsigned int low = 0;
- unsigned int high = size - 1;
- while (high != low) {
- unsigned int mid = low + ((high - low) >> 1);
- uchar current_value = GetEntry(TableGet<kEntryDist>(table, mid));
- // If we've found an entry less than or equal to this one, and the
- // next one is not also less than this one, we've arrived.
- if ((current_value <= value) &&
- (mid + 1 == size ||
- GetEntry(TableGet<kEntryDist>(table, mid + 1)) > value)) {
- low = mid;
- break;
- } else if (current_value < value) {
- low = mid + 1;
- } else if (current_value > value) {
- // If we've just checked the bottom-most value and it's not
- // the one we're looking for, we're done.
- if (mid == 0) break;
- high = mid - 1;
- }
- }
- int32_t field = TableGet<kEntryDist>(table, low);
- uchar entry = GetEntry(field);
- bool is_start = IsStart(field);
- return (entry == value) || (entry < value && is_start);
-}
-#endif // !V8_INTL_SUPPORT
-
-template <int kW>
-struct MultiCharacterSpecialCase {
- static const uchar kEndOfEncoding = kSentinel;
- uchar chars[kW];
-};
-
-#ifndef V8_INTL_SUPPORT
-// Look up the mapping for the given character in the specified table,
-// which is of the specified length and uses the specified special case
-// mapping for multi-char mappings. The next parameter is the character
-// following the one to map. The result will be written in to the result
-// buffer and the number of characters written will be returned. Finally,
-// if the allow_caching_ptr is non-null then false will be stored in
-// it if the result contains multiple characters or depends on the
-// context.
-// If ranges are linear, a match between a start and end point is
-// offset by the distance between the match and the start. Otherwise
-// the result is the same as for the start point on the entire range.
-template <bool ranges_are_linear, int kW>
-static int LookupMapping(const int32_t* table, uint16_t size,
- const MultiCharacterSpecialCase<kW>* multi_chars,
- uchar chr, uchar next, uchar* result,
- bool* allow_caching_ptr) {
- static const int kEntryDist = 2;
- uint16_t key = chr & (kChunkBits - 1);
- uint16_t chunk_start = chr - key;
- unsigned int low = 0;
- unsigned int high = size - 1;
- while (high != low) {
- unsigned int mid = low + ((high - low) >> 1);
- uchar current_value = GetEntry(TableGet<kEntryDist>(table, mid));
- // If we've found an entry less than or equal to this one, and the next one
- // is not also less than this one, we've arrived.
- if ((current_value <= key) &&
- (mid + 1 == size ||
- GetEntry(TableGet<kEntryDist>(table, mid + 1)) > key)) {
- low = mid;
- break;
- } else if (current_value < key) {
- low = mid + 1;
- } else if (current_value > key) {
- // If we've just checked the bottom-most value and it's not
- // the one we're looking for, we're done.
- if (mid == 0) break;
- high = mid - 1;
- }
- }
- int32_t field = TableGet<kEntryDist>(table, low);
- uchar entry = GetEntry(field);
- bool is_start = IsStart(field);
- bool found = (entry == key) || (entry < key && is_start);
- if (found) {
- int32_t value = table[2 * low + 1];
- if (value == 0) {
- // 0 means not present
- return 0;
- } else if ((value & 3) == 0) {
- // Low bits 0 means a constant offset from the given character.
- if (ranges_are_linear) {
- result[0] = chr + (value >> 2);
- } else {
- result[0] = entry + chunk_start + (value >> 2);
- }
- return 1;
- } else if ((value & 3) == 1) {
- // Low bits 1 means a special case mapping
- if (allow_caching_ptr) *allow_caching_ptr = false;
- const MultiCharacterSpecialCase<kW>& mapping = multi_chars[value >> 2];
- int length = 0;
- for (length = 0; length < kW; length++) {
- uchar mapped = mapping.chars[length];
- if (mapped == MultiCharacterSpecialCase<kW>::kEndOfEncoding) break;
- if (ranges_are_linear) {
- result[length] = mapped + (key - entry);
- } else {
- result[length] = mapped;
- }
- }
- return length;
- } else {
- // Low bits 2 means a really really special case
- if (allow_caching_ptr) *allow_caching_ptr = false;
- // The cases of this switch are defined in unicode.py in the
- // really_special_cases mapping.
- switch (value >> 2) {
- case 1:
- // Really special case 1: upper case sigma. This letter
- // converts to two different lower case sigmas depending on
- // whether or not it occurs at the end of a word.
- if (next != 0 && Letter::Is(next)) {
- result[0] = 0x03C3;
- } else {
- result[0] = 0x03C2;
- }
- return 1;
- default:
- return 0;
- }
- return -1;
- }
- } else {
- return 0;
- }
-}
-#endif // !V8_INTL_SUPPORT
-
-// Letter: point.category in ['Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl']
-#ifdef V8_INTL_SUPPORT
-bool Letter::Is(uchar c) { return static_cast<bool>(u_isalpha(c)); }
-#else
-static const uint16_t kLetterTable0Size = 431;
-static const int32_t kLetterTable0[431] = {
- 1073741889, 90, 1073741921, 122,
- 170, 181, 186, 1073742016, // NOLINT
- 214, 1073742040, 246, 1073742072,
- 705, 1073742534, 721, 1073742560, // NOLINT
- 740, 748, 750, 1073742704,
- 884, 1073742710, 887, 1073742714, // NOLINT
- 893, 895, 902, 1073742728,
- 906, 908, 1073742734, 929, // NOLINT
- 1073742755, 1013, 1073742839, 1153,
- 1073742986, 1327, 1073743153, 1366, // NOLINT
- 1369, 1073743201, 1415, 1073743312,
- 1514, 1073743344, 1522, 1073743392, // NOLINT
- 1610, 1073743470, 1647, 1073743473,
- 1747, 1749, 1073743589, 1766, // NOLINT
- 1073743598, 1775, 1073743610, 1788,
- 1791, 1808, 1073743634, 1839, // NOLINT
- 1073743693, 1957, 1969, 1073743818,
- 2026, 1073743860, 2037, 2042, // NOLINT
- 1073743872, 2069, 2074, 2084,
- 2088, 1073743936, 2136, 1073744032, // NOLINT
- 2226, 1073744132, 2361, 2365,
- 2384, 1073744216, 2401, 1073744241, // NOLINT
- 2432, 1073744261, 2444, 1073744271,
- 2448, 1073744275, 2472, 1073744298, // NOLINT
- 2480, 2482, 1073744310, 2489,
- 2493, 2510, 1073744348, 2525, // NOLINT
- 1073744351, 2529, 1073744368, 2545,
- 1073744389, 2570, 1073744399, 2576, // NOLINT
- 1073744403, 2600, 1073744426, 2608,
- 1073744434, 2611, 1073744437, 2614, // NOLINT
- 1073744440, 2617, 1073744473, 2652,
- 2654, 1073744498, 2676, 1073744517, // NOLINT
- 2701, 1073744527, 2705, 1073744531,
- 2728, 1073744554, 2736, 1073744562, // NOLINT
- 2739, 1073744565, 2745, 2749,
- 2768, 1073744608, 2785, 1073744645, // NOLINT
- 2828, 1073744655, 2832, 1073744659,
- 2856, 1073744682, 2864, 1073744690, // NOLINT
- 2867, 1073744693, 2873, 2877,
- 1073744732, 2909, 1073744735, 2913, // NOLINT
- 2929, 2947, 1073744773, 2954,
- 1073744782, 2960, 1073744786, 2965, // NOLINT
- 1073744793, 2970, 2972, 1073744798,
- 2975, 1073744803, 2980, 1073744808, // NOLINT
- 2986, 1073744814, 3001, 3024,
- 1073744901, 3084, 1073744910, 3088, // NOLINT
- 1073744914, 3112, 1073744938, 3129,
- 3133, 1073744984, 3161, 1073744992, // NOLINT
- 3169, 1073745029, 3212, 1073745038,
- 3216, 1073745042, 3240, 1073745066, // NOLINT
- 3251, 1073745077, 3257, 3261,
- 3294, 1073745120, 3297, 1073745137, // NOLINT
- 3314, 1073745157, 3340, 1073745166,
- 3344, 1073745170, 3386, 3389, // NOLINT
- 3406, 1073745248, 3425, 1073745274,
- 3455, 1073745285, 3478, 1073745306, // NOLINT
- 3505, 1073745331, 3515, 3517,
- 1073745344, 3526, 1073745409, 3632, // NOLINT
- 1073745458, 3635, 1073745472, 3654,
- 1073745537, 3714, 3716, 1073745543, // NOLINT
- 3720, 3722, 3725, 1073745556,
- 3735, 1073745561, 3743, 1073745569, // NOLINT
- 3747, 3749, 3751, 1073745578,
- 3755, 1073745581, 3760, 1073745586, // NOLINT
- 3763, 3773, 1073745600, 3780,
- 3782, 1073745628, 3807, 3840, // NOLINT
- 1073745728, 3911, 1073745737, 3948,
- 1073745800, 3980, 1073745920, 4138, // NOLINT
- 4159, 1073746000, 4181, 1073746010,
- 4189, 4193, 1073746021, 4198, // NOLINT
- 1073746030, 4208, 1073746037, 4225,
- 4238, 1073746080, 4293, 4295, // NOLINT
- 4301, 1073746128, 4346, 1073746172,
- 4680, 1073746506, 4685, 1073746512, // NOLINT
- 4694, 4696, 1073746522, 4701,
- 1073746528, 4744, 1073746570, 4749, // NOLINT
- 1073746576, 4784, 1073746610, 4789,
- 1073746616, 4798, 4800, 1073746626, // NOLINT
- 4805, 1073746632, 4822, 1073746648,
- 4880, 1073746706, 4885, 1073746712, // NOLINT
- 4954, 1073746816, 5007, 1073746848,
- 5108, 1073746945, 5740, 1073747567, // NOLINT
- 5759, 1073747585, 5786, 1073747616,
- 5866, 1073747694, 5880, 1073747712, // NOLINT
- 5900, 1073747726, 5905, 1073747744,
- 5937, 1073747776, 5969, 1073747808, // NOLINT
- 5996, 1073747822, 6000, 1073747840,
- 6067, 6103, 6108, 1073748000, // NOLINT
- 6263, 1073748096, 6312, 6314,
- 1073748144, 6389, 1073748224, 6430, // NOLINT
- 1073748304, 6509, 1073748336, 6516,
- 1073748352, 6571, 1073748417, 6599, // NOLINT
- 1073748480, 6678, 1073748512, 6740,
- 6823, 1073748741, 6963, 1073748805, // NOLINT
- 6987, 1073748867, 7072, 1073748910,
- 7087, 1073748922, 7141, 1073748992, // NOLINT
- 7203, 1073749069, 7247, 1073749082,
- 7293, 1073749225, 7404, 1073749230, // NOLINT
- 7409, 1073749237, 7414, 1073749248,
- 7615, 1073749504, 7957, 1073749784, // NOLINT
- 7965, 1073749792, 8005, 1073749832,
- 8013, 1073749840, 8023, 8025, // NOLINT
- 8027, 8029, 1073749855, 8061,
- 1073749888, 8116, 1073749942, 8124, // NOLINT
- 8126, 1073749954, 8132, 1073749958,
- 8140, 1073749968, 8147, 1073749974, // NOLINT
- 8155, 1073749984, 8172, 1073750002,
- 8180, 1073750006, 8188}; // NOLINT
-static const uint16_t kLetterTable1Size = 87;
-static const int32_t kLetterTable1[87] = {
- 113, 127, 1073741968, 156,
- 258, 263, 1073742090, 275, // NOLINT
- 277, 1073742105, 285, 292,
- 294, 296, 1073742122, 301, // NOLINT
- 1073742127, 313, 1073742140, 319,
- 1073742149, 329, 334, 1073742176, // NOLINT
- 392, 1073744896, 3118, 1073744944,
- 3166, 1073744992, 3300, 1073745131, // NOLINT
- 3310, 1073745138, 3315, 1073745152,
- 3365, 3367, 3373, 1073745200, // NOLINT
- 3431, 3439, 1073745280, 3478,
- 1073745312, 3494, 1073745320, 3502, // NOLINT
- 1073745328, 3510, 1073745336, 3518,
- 1073745344, 3526, 1073745352, 3534, // NOLINT
- 1073745360, 3542, 1073745368, 3550,
- 3631, 1073745925, 4103, 1073745953, // NOLINT
- 4137, 1073745969, 4149, 1073745976,
- 4156, 1073745985, 4246, 1073746077, // NOLINT
- 4255, 1073746081, 4346, 1073746172,
- 4351, 1073746181, 4397, 1073746225, // NOLINT
- 4494, 1073746336, 4538, 1073746416,
- 4607, 1073746944, 8191}; // NOLINT
-static const uint16_t kLetterTable2Size = 4;
-static const int32_t kLetterTable2[4] = {1073741824, 3509, 1073745408,
- 8191}; // NOLINT
-static const uint16_t kLetterTable3Size = 2;
-static const int32_t kLetterTable3[2] = {1073741824, 8191}; // NOLINT
-static const uint16_t kLetterTable4Size = 2;
-static const int32_t kLetterTable4[2] = {1073741824, 8140}; // NOLINT
-static const uint16_t kLetterTable5Size = 100;
-static const int32_t kLetterTable5[100] = {
- 1073741824, 1164, 1073743056, 1277,
- 1073743104, 1548, 1073743376, 1567, // NOLINT
- 1073743402, 1579, 1073743424, 1646,
- 1073743487, 1693, 1073743520, 1775, // NOLINT
- 1073743639, 1823, 1073743650, 1928,
- 1073743755, 1934, 1073743760, 1965, // NOLINT
- 1073743792, 1969, 1073743863, 2049,
- 1073743875, 2053, 1073743879, 2058, // NOLINT
- 1073743884, 2082, 1073743936, 2163,
- 1073744002, 2227, 1073744114, 2295, // NOLINT
- 2299, 1073744138, 2341, 1073744176,
- 2374, 1073744224, 2428, 1073744260, // NOLINT
- 2482, 2511, 1073744352, 2532,
- 1073744358, 2543, 1073744378, 2558, // NOLINT
- 1073744384, 2600, 1073744448, 2626,
- 1073744452, 2635, 1073744480, 2678, // NOLINT
- 2682, 1073744510, 2735, 2737,
- 1073744565, 2742, 1073744569, 2749, // NOLINT
- 2752, 2754, 1073744603, 2781,
- 1073744608, 2794, 1073744626, 2804, // NOLINT
- 1073744641, 2822, 1073744649, 2830,
- 1073744657, 2838, 1073744672, 2854, // NOLINT
- 1073744680, 2862, 1073744688, 2906,
- 1073744732, 2911, 1073744740, 2917, // NOLINT
- 1073744832, 3042, 1073744896, 8191}; // NOLINT
-static const uint16_t kLetterTable6Size = 6;
-static const int32_t kLetterTable6[6] = {1073741824, 6051, 1073747888, 6086,
- 1073747915, 6139}; // NOLINT
-static const uint16_t kLetterTable7Size = 48;
-static const int32_t kLetterTable7[48] = {
- 1073748224, 6765, 1073748592, 6873,
- 1073748736, 6918, 1073748755, 6935, // NOLINT
- 6941, 1073748767, 6952, 1073748778,
- 6966, 1073748792, 6972, 6974, // NOLINT
- 1073748800, 6977, 1073748803, 6980,
- 1073748806, 7089, 1073748947, 7485, // NOLINT
- 1073749328, 7567, 1073749394, 7623,
- 1073749488, 7675, 1073749616, 7796, // NOLINT
- 1073749622, 7932, 1073749793, 7994,
- 1073749825, 8026, 1073749862, 8126, // NOLINT
- 1073749954, 8135, 1073749962, 8143,
- 1073749970, 8151, 1073749978, 8156}; // NOLINT
-bool Letter::Is(uchar c) {
- int chunk_index = c >> 13;
- switch (chunk_index) {
- case 0:
- return LookupPredicate(kLetterTable0, kLetterTable0Size, c);
- case 1:
- return LookupPredicate(kLetterTable1, kLetterTable1Size, c);
- case 2:
- return LookupPredicate(kLetterTable2, kLetterTable2Size, c);
- case 3:
- return LookupPredicate(kLetterTable3, kLetterTable3Size, c);
- case 4:
- return LookupPredicate(kLetterTable4, kLetterTable4Size, c);
- case 5:
- return LookupPredicate(kLetterTable5, kLetterTable5Size, c);
- case 6:
- return LookupPredicate(kLetterTable6, kLetterTable6Size, c);
- case 7:
- return LookupPredicate(kLetterTable7, kLetterTable7Size, c);
- default:
- return false;
- }
-}
-#endif
-
-#ifndef V8_INTL_SUPPORT
-
-static const MultiCharacterSpecialCase<1> kEcma262CanonicalizeMultiStrings0[1] =
- { // NOLINT
- {{kSentinel}}}; // NOLINT
-static const uint16_t kEcma262CanonicalizeTable0Size = 498; // NOLINT
-static const int32_t kEcma262CanonicalizeTable0[996] = {
- 1073741921, -128, 122, -128, 181, 2972,
- 1073742048, -128, 246, -128, 1073742072, -128,
- 254, -128, 255, 484, // NOLINT
- 257, -4, 259, -4, 261, -4,
- 263, -4, 265, -4, 267, -4,
- 269, -4, 271, -4, // NOLINT
- 273, -4, 275, -4, 277, -4,
- 279, -4, 281, -4, 283, -4,
- 285, -4, 287, -4, // NOLINT
- 289, -4, 291, -4, 293, -4,
- 295, -4, 297, -4, 299, -4,
- 301, -4, 303, -4, // NOLINT
- 307, -4, 309, -4, 311, -4,
- 314, -4, 316, -4, 318, -4,
- 320, -4, 322, -4, // NOLINT
- 324, -4, 326, -4, 328, -4,
- 331, -4, 333, -4, 335, -4,
- 337, -4, 339, -4, // NOLINT
- 341, -4, 343, -4, 345, -4,
- 347, -4, 349, -4, 351, -4,
- 353, -4, 355, -4, // NOLINT
- 357, -4, 359, -4, 361, -4,
- 363, -4, 365, -4, 367, -4,
- 369, -4, 371, -4, // NOLINT
- 373, -4, 375, -4, 378, -4,
- 380, -4, 382, -4, 384, 780,
- 387, -4, 389, -4, // NOLINT
- 392, -4, 396, -4, 402, -4,
- 405, 388, 409, -4, 410, 652,
- 414, 520, 417, -4, // NOLINT
- 419, -4, 421, -4, 424, -4,
- 429, -4, 432, -4, 436, -4,
- 438, -4, 441, -4, // NOLINT
- 445, -4, 447, 224, 453, -4,
- 454, -8, 456, -4, 457, -8,
- 459, -4, 460, -8, // NOLINT
- 462, -4, 464, -4, 466, -4,
- 468, -4, 470, -4, 472, -4,
- 474, -4, 476, -4, // NOLINT
- 477, -316, 479, -4, 481, -4,
- 483, -4, 485, -4, 487, -4,
- 489, -4, 491, -4, // NOLINT
- 493, -4, 495, -4, 498, -4,
- 499, -8, 501, -4, 505, -4,
- 507, -4, 509, -4, // NOLINT
- 511, -4, 513, -4, 515, -4,
- 517, -4, 519, -4, 521, -4,
- 523, -4, 525, -4, // NOLINT
- 527, -4, 529, -4, 531, -4,
- 533, -4, 535, -4, 537, -4,
- 539, -4, 541, -4, // NOLINT
- 543, -4, 547, -4, 549, -4,
- 551, -4, 553, -4, 555, -4,
- 557, -4, 559, -4, // NOLINT
- 561, -4, 563, -4, 572, -4,
- 1073742399, 43260, 576, 43260, 578, -4,
- 583, -4, 585, -4, // NOLINT
- 587, -4, 589, -4, 591, -4,
- 592, 43132, 593, 43120, 594, 43128,
- 595, -840, 596, -824, // NOLINT
- 1073742422, -820, 599, -820, 601, -808,
- 603, -812, 604, 169276, 608, -820,
- 609, 169260, 611, -828, // NOLINT
- 613, 169120, 614, 169232, 616, -836,
- 617, -844, 619, 42972, 620, 169220,
- 623, -844, 625, 42996, // NOLINT
- 626, -852, 629, -856, 637, 42908,
- 640, -872, 643, -872, 647, 169128,
- 648, -872, 649, -276, // NOLINT
- 1073742474, -868, 651, -868, 652, -284,
- 658, -876, 670, 169032, 837, 336,
- 881, -4, 883, -4, // NOLINT
- 887, -4, 1073742715, 520, 893, 520,
- 940, -152, 1073742765, -148, 943, -148,
- 1073742769, -128, 961, -128, // NOLINT
- 962, -124, 1073742787, -128, 971, -128,
- 972, -256, 1073742797, -252, 974, -252,
- 976, -248, 977, -228, // NOLINT
- 981, -188, 982, -216, 983, -32,
- 985, -4, 987, -4, 989, -4,
- 991, -4, 993, -4, // NOLINT
- 995, -4, 997, -4, 999, -4,
- 1001, -4, 1003, -4, 1005, -4,
- 1007, -4, 1008, -344, // NOLINT
- 1009, -320, 1010, 28, 1011, -464,
- 1013, -384, 1016, -4, 1019, -4,
- 1073742896, -128, 1103, -128, // NOLINT
- 1073742928, -320, 1119, -320, 1121, -4,
- 1123, -4, 1125, -4, 1127, -4,
- 1129, -4, 1131, -4, // NOLINT
- 1133, -4, 1135, -4, 1137, -4,
- 1139, -4, 1141, -4, 1143, -4,
- 1145, -4, 1147, -4, // NOLINT
- 1149, -4, 1151, -4, 1153, -4,
- 1163, -4, 1165, -4, 1167, -4,
- 1169, -4, 1171, -4, // NOLINT
- 1173, -4, 1175, -4, 1177, -4,
- 1179, -4, 1181, -4, 1183, -4,
- 1185, -4, 1187, -4, // NOLINT
- 1189, -4, 1191, -4, 1193, -4,
- 1195, -4, 1197, -4, 1199, -4,
- 1201, -4, 1203, -4, // NOLINT
- 1205, -4, 1207, -4, 1209, -4,
- 1211, -4, 1213, -4, 1215, -4,
- 1218, -4, 1220, -4, // NOLINT
- 1222, -4, 1224, -4, 1226, -4,
- 1228, -4, 1230, -4, 1231, -60,
- 1233, -4, 1235, -4, // NOLINT
- 1237, -4, 1239, -4, 1241, -4,
- 1243, -4, 1245, -4, 1247, -4,
- 1249, -4, 1251, -4, // NOLINT
- 1253, -4, 1255, -4, 1257, -4,
- 1259, -4, 1261, -4, 1263, -4,
- 1265, -4, 1267, -4, // NOLINT
- 1269, -4, 1271, -4, 1273, -4,
- 1275, -4, 1277, -4, 1279, -4,
- 1281, -4, 1283, -4, // NOLINT
- 1285, -4, 1287, -4, 1289, -4,
- 1291, -4, 1293, -4, 1295, -4,
- 1297, -4, 1299, -4, // NOLINT
- 1301, -4, 1303, -4, 1305, -4,
- 1307, -4, 1309, -4, 1311, -4,
- 1313, -4, 1315, -4, // NOLINT
- 1317, -4, 1319, -4, 1321, -4,
- 1323, -4, 1325, -4, 1327, -4,
- 1073743201, -192, 1414, -192, // NOLINT
- 7545, 141328, 7549, 15256, 7681, -4,
- 7683, -4, 7685, -4, 7687, -4,
- 7689, -4, 7691, -4, // NOLINT
- 7693, -4, 7695, -4, 7697, -4,
- 7699, -4, 7701, -4, 7703, -4,
- 7705, -4, 7707, -4, // NOLINT
- 7709, -4, 7711, -4, 7713, -4,
- 7715, -4, 7717, -4, 7719, -4,
- 7721, -4, 7723, -4, // NOLINT
- 7725, -4, 7727, -4, 7729, -4,
- 7731, -4, 7733, -4, 7735, -4,
- 7737, -4, 7739, -4, // NOLINT
- 7741, -4, 7743, -4, 7745, -4,
- 7747, -4, 7749, -4, 7751, -4,
- 7753, -4, 7755, -4, // NOLINT
- 7757, -4, 7759, -4, 7761, -4,
- 7763, -4, 7765, -4, 7767, -4,
- 7769, -4, 7771, -4, // NOLINT
- 7773, -4, 7775, -4, 7777, -4,
- 7779, -4, 7781, -4, 7783, -4,
- 7785, -4, 7787, -4, // NOLINT
- 7789, -4, 7791, -4, 7793, -4,
- 7795, -4, 7797, -4, 7799, -4,
- 7801, -4, 7803, -4, // NOLINT
- 7805, -4, 7807, -4, 7809, -4,
- 7811, -4, 7813, -4, 7815, -4,
- 7817, -4, 7819, -4, // NOLINT
- 7821, -4, 7823, -4, 7825, -4,
- 7827, -4, 7829, -4, 7835, -236,
- 7841, -4, 7843, -4, // NOLINT
- 7845, -4, 7847, -4, 7849, -4,
- 7851, -4, 7853, -4, 7855, -4,
- 7857, -4, 7859, -4, // NOLINT
- 7861, -4, 7863, -4, 7865, -4,
- 7867, -4, 7869, -4, 7871, -4,
- 7873, -4, 7875, -4, // NOLINT
- 7877, -4, 7879, -4, 7881, -4,
- 7883, -4, 7885, -4, 7887, -4,
- 7889, -4, 7891, -4, // NOLINT
- 7893, -4, 7895, -4, 7897, -4,
- 7899, -4, 7901, -4, 7903, -4,
- 7905, -4, 7907, -4, // NOLINT
- 7909, -4, 7911, -4, 7913, -4,
- 7915, -4, 7917, -4, 7919, -4,
- 7921, -4, 7923, -4, // NOLINT
- 7925, -4, 7927, -4, 7929, -4,
- 7931, -4, 7933, -4, 7935, -4,
- 1073749760, 32, 7943, 32, // NOLINT
- 1073749776, 32, 7957, 32, 1073749792, 32,
- 7975, 32, 1073749808, 32, 7991, 32,
- 1073749824, 32, 8005, 32, // NOLINT
- 8017, 32, 8019, 32, 8021, 32,
- 8023, 32, 1073749856, 32, 8039, 32,
- 1073749872, 296, 8049, 296, // NOLINT
- 1073749874, 344, 8053, 344, 1073749878, 400,
- 8055, 400, 1073749880, 512, 8057, 512,
- 1073749882, 448, 8059, 448, // NOLINT
- 1073749884, 504, 8061, 504, 1073749936, 32,
- 8113, 32, 8126, -28820, 1073749968, 32,
- 8145, 32, 1073749984, 32, // NOLINT
- 8161, 32, 8165, 28}; // NOLINT
-static const MultiCharacterSpecialCase<1> kEcma262CanonicalizeMultiStrings1[1] =
- { // NOLINT
- {{kSentinel}}}; // NOLINT
-static const uint16_t kEcma262CanonicalizeTable1Size = 73; // NOLINT
-static const int32_t kEcma262CanonicalizeTable1[146] = {
- 334, -112, 1073742192, -64, 383, -64,
- 388, -4, 1073743056, -104, 1257, -104,
- 1073744944, -192, 3166, -192, // NOLINT
- 3169, -4, 3173, -43180, 3174, -43168,
- 3176, -4, 3178, -4, 3180, -4,
- 3187, -4, 3190, -4, // NOLINT
- 3201, -4, 3203, -4, 3205, -4,
- 3207, -4, 3209, -4, 3211, -4,
- 3213, -4, 3215, -4, // NOLINT
- 3217, -4, 3219, -4, 3221, -4,
- 3223, -4, 3225, -4, 3227, -4,
- 3229, -4, 3231, -4, // NOLINT
- 3233, -4, 3235, -4, 3237, -4,
- 3239, -4, 3241, -4, 3243, -4,
- 3245, -4, 3247, -4, // NOLINT
- 3249, -4, 3251, -4, 3253, -4,
- 3255, -4, 3257, -4, 3259, -4,
- 3261, -4, 3263, -4, // NOLINT
- 3265, -4, 3267, -4, 3269, -4,
- 3271, -4, 3273, -4, 3275, -4,
- 3277, -4, 3279, -4, // NOLINT
- 3281, -4, 3283, -4, 3285, -4,
- 3287, -4, 3289, -4, 3291, -4,
- 3293, -4, 3295, -4, // NOLINT
- 3297, -4, 3299, -4, 3308, -4,
- 3310, -4, 3315, -4, 1073745152, -29056,
- 3365, -29056, 3367, -29056, // NOLINT
- 3373, -29056}; // NOLINT
-static const MultiCharacterSpecialCase<1> kEcma262CanonicalizeMultiStrings5[1] =
- { // NOLINT
- {{kSentinel}}}; // NOLINT
-static const uint16_t kEcma262CanonicalizeTable5Size = 95; // NOLINT
-static const int32_t kEcma262CanonicalizeTable5[190] = {
- 1601, -4, 1603, -4, 1605, -4, 1607, -4,
- 1609, -4, 1611, -4, 1613, -4, 1615, -4, // NOLINT
- 1617, -4, 1619, -4, 1621, -4, 1623, -4,
- 1625, -4, 1627, -4, 1629, -4, 1631, -4, // NOLINT
- 1633, -4, 1635, -4, 1637, -4, 1639, -4,
- 1641, -4, 1643, -4, 1645, -4, 1665, -4, // NOLINT
- 1667, -4, 1669, -4, 1671, -4, 1673, -4,
- 1675, -4, 1677, -4, 1679, -4, 1681, -4, // NOLINT
- 1683, -4, 1685, -4, 1687, -4, 1689, -4,
- 1691, -4, 1827, -4, 1829, -4, 1831, -4, // NOLINT
- 1833, -4, 1835, -4, 1837, -4, 1839, -4,
- 1843, -4, 1845, -4, 1847, -4, 1849, -4, // NOLINT
- 1851, -4, 1853, -4, 1855, -4, 1857, -4,
- 1859, -4, 1861, -4, 1863, -4, 1865, -4, // NOLINT
- 1867, -4, 1869, -4, 1871, -4, 1873, -4,
- 1875, -4, 1877, -4, 1879, -4, 1881, -4, // NOLINT
- 1883, -4, 1885, -4, 1887, -4, 1889, -4,
- 1891, -4, 1893, -4, 1895, -4, 1897, -4, // NOLINT
- 1899, -4, 1901, -4, 1903, -4, 1914, -4,
- 1916, -4, 1919, -4, 1921, -4, 1923, -4, // NOLINT
- 1925, -4, 1927, -4, 1932, -4, 1937, -4,
- 1939, -4, 1943, -4, 1945, -4, 1947, -4, // NOLINT
- 1949, -4, 1951, -4, 1953, -4, 1955, -4,
- 1957, -4, 1959, -4, 1961, -4}; // NOLINT
-static const MultiCharacterSpecialCase<1> kEcma262CanonicalizeMultiStrings7[1] =
- { // NOLINT
- {{kSentinel}}}; // NOLINT
-static const uint16_t kEcma262CanonicalizeTable7Size = 2; // NOLINT
-static const int32_t kEcma262CanonicalizeTable7[4] = {1073749825, -128, 8026,
- -128}; // NOLINT
-int Ecma262Canonicalize::Convert(uchar c, uchar n, uchar* result,
- bool* allow_caching_ptr) {
- int chunk_index = c >> 13;
- switch (chunk_index) {
- case 0:
- return LookupMapping<true>(
- kEcma262CanonicalizeTable0, kEcma262CanonicalizeTable0Size,
- kEcma262CanonicalizeMultiStrings0, c, n, result, allow_caching_ptr);
- case 1:
- return LookupMapping<true>(
- kEcma262CanonicalizeTable1, kEcma262CanonicalizeTable1Size,
- kEcma262CanonicalizeMultiStrings1, c, n, result, allow_caching_ptr);
- case 5:
- return LookupMapping<true>(
- kEcma262CanonicalizeTable5, kEcma262CanonicalizeTable5Size,
- kEcma262CanonicalizeMultiStrings5, c, n, result, allow_caching_ptr);
- case 7:
- return LookupMapping<true>(
- kEcma262CanonicalizeTable7, kEcma262CanonicalizeTable7Size,
- kEcma262CanonicalizeMultiStrings7, c, n, result, allow_caching_ptr);
- default:
- return 0;
- }
-}
-
-static const MultiCharacterSpecialCase<4>
- kEcma262UnCanonicalizeMultiStrings0[507] = { // NOLINT
- {{65, 97, kSentinel}},
- {{90, 122, kSentinel}},
- {{181, 924, 956, kSentinel}},
- {{192, 224, kSentinel}}, // NOLINT
- {{214, 246, kSentinel}},
- {{216, 248, kSentinel}},
- {{222, 254, kSentinel}},
- {{255, 376, kSentinel}}, // NOLINT
- {{256, 257, kSentinel}},
- {{258, 259, kSentinel}},
- {{260, 261, kSentinel}},
- {{262, 263, kSentinel}}, // NOLINT
- {{264, 265, kSentinel}},
- {{266, 267, kSentinel}},
- {{268, 269, kSentinel}},
- {{270, 271, kSentinel}}, // NOLINT
- {{272, 273, kSentinel}},
- {{274, 275, kSentinel}},
- {{276, 277, kSentinel}},
- {{278, 279, kSentinel}}, // NOLINT
- {{280, 281, kSentinel}},
- {{282, 283, kSentinel}},
- {{284, 285, kSentinel}},
- {{286, 287, kSentinel}}, // NOLINT
- {{288, 289, kSentinel}},
- {{290, 291, kSentinel}},
- {{292, 293, kSentinel}},
- {{294, 295, kSentinel}}, // NOLINT
- {{296, 297, kSentinel}},
- {{298, 299, kSentinel}},
- {{300, 301, kSentinel}},
- {{302, 303, kSentinel}}, // NOLINT
- {{306, 307, kSentinel}},
- {{308, 309, kSentinel}},
- {{310, 311, kSentinel}},
- {{313, 314, kSentinel}}, // NOLINT
- {{315, 316, kSentinel}},
- {{317, 318, kSentinel}},
- {{319, 320, kSentinel}},
- {{321, 322, kSentinel}}, // NOLINT
- {{323, 324, kSentinel}},
- {{325, 326, kSentinel}},
- {{327, 328, kSentinel}},
- {{330, 331, kSentinel}}, // NOLINT
- {{332, 333, kSentinel}},
- {{334, 335, kSentinel}},
- {{336, 337, kSentinel}},
- {{338, 339, kSentinel}}, // NOLINT
- {{340, 341, kSentinel}},
- {{342, 343, kSentinel}},
- {{344, 345, kSentinel}},
- {{346, 347, kSentinel}}, // NOLINT
- {{348, 349, kSentinel}},
- {{350, 351, kSentinel}},
- {{352, 353, kSentinel}},
- {{354, 355, kSentinel}}, // NOLINT
- {{356, 357, kSentinel}},
- {{358, 359, kSentinel}},
- {{360, 361, kSentinel}},
- {{362, 363, kSentinel}}, // NOLINT
- {{364, 365, kSentinel}},
- {{366, 367, kSentinel}},
- {{368, 369, kSentinel}},
- {{370, 371, kSentinel}}, // NOLINT
- {{372, 373, kSentinel}},
- {{374, 375, kSentinel}},
- {{377, 378, kSentinel}},
- {{379, 380, kSentinel}}, // NOLINT
- {{381, 382, kSentinel}},
- {{384, 579, kSentinel}},
- {{385, 595, kSentinel}},
- {{386, 387, kSentinel}}, // NOLINT
- {{388, 389, kSentinel}},
- {{390, 596, kSentinel}},
- {{391, 392, kSentinel}},
- {{393, 598, kSentinel}}, // NOLINT
- {{394, 599, kSentinel}},
- {{395, 396, kSentinel}},
- {{398, 477, kSentinel}},
- {{399, 601, kSentinel}}, // NOLINT
- {{400, 603, kSentinel}},
- {{401, 402, kSentinel}},
- {{403, 608, kSentinel}},
- {{404, 611, kSentinel}}, // NOLINT
- {{405, 502, kSentinel}},
- {{406, 617, kSentinel}},
- {{407, 616, kSentinel}},
- {{408, 409, kSentinel}}, // NOLINT
- {{410, 573, kSentinel}},
- {{412, 623, kSentinel}},
- {{413, 626, kSentinel}},
- {{414, 544, kSentinel}}, // NOLINT
- {{415, 629, kSentinel}},
- {{416, 417, kSentinel}},
- {{418, 419, kSentinel}},
- {{420, 421, kSentinel}}, // NOLINT
- {{422, 640, kSentinel}},
- {{423, 424, kSentinel}},
- {{425, 643, kSentinel}},
- {{428, 429, kSentinel}}, // NOLINT
- {{430, 648, kSentinel}},
- {{431, 432, kSentinel}},
- {{433, 650, kSentinel}},
- {{434, 651, kSentinel}}, // NOLINT
- {{435, 436, kSentinel}},
- {{437, 438, kSentinel}},
- {{439, 658, kSentinel}},
- {{440, 441, kSentinel}}, // NOLINT
- {{444, 445, kSentinel}},
- {{447, 503, kSentinel}},
- {{452, 453, 454, kSentinel}},
- {{455, 456, 457, kSentinel}}, // NOLINT
- {{458, 459, 460, kSentinel}},
- {{461, 462, kSentinel}},
- {{463, 464, kSentinel}},
- {{465, 466, kSentinel}}, // NOLINT
- {{467, 468, kSentinel}},
- {{469, 470, kSentinel}},
- {{471, 472, kSentinel}},
- {{473, 474, kSentinel}}, // NOLINT
- {{475, 476, kSentinel}},
- {{478, 479, kSentinel}},
- {{480, 481, kSentinel}},
- {{482, 483, kSentinel}}, // NOLINT
- {{484, 485, kSentinel}},
- {{486, 487, kSentinel}},
- {{488, 489, kSentinel}},
- {{490, 491, kSentinel}}, // NOLINT
- {{492, 493, kSentinel}},
- {{494, 495, kSentinel}},
- {{497, 498, 499, kSentinel}},
- {{500, 501, kSentinel}}, // NOLINT
- {{504, 505, kSentinel}},
- {{506, 507, kSentinel}},
- {{508, 509, kSentinel}},
- {{510, 511, kSentinel}}, // NOLINT
- {{512, 513, kSentinel}},
- {{514, 515, kSentinel}},
- {{516, 517, kSentinel}},
- {{518, 519, kSentinel}}, // NOLINT
- {{520, 521, kSentinel}},
- {{522, 523, kSentinel}},
- {{524, 525, kSentinel}},
- {{526, 527, kSentinel}}, // NOLINT
- {{528, 529, kSentinel}},
- {{530, 531, kSentinel}},
- {{532, 533, kSentinel}},
- {{534, 535, kSentinel}}, // NOLINT
- {{536, 537, kSentinel}},
- {{538, 539, kSentinel}},
- {{540, 541, kSentinel}},
- {{542, 543, kSentinel}}, // NOLINT
- {{546, 547, kSentinel}},
- {{548, 549, kSentinel}},
- {{550, 551, kSentinel}},
- {{552, 553, kSentinel}}, // NOLINT
- {{554, 555, kSentinel}},
- {{556, 557, kSentinel}},
- {{558, 559, kSentinel}},
- {{560, 561, kSentinel}}, // NOLINT
- {{562, 563, kSentinel}},
- {{570, 11365, kSentinel}},
- {{571, 572, kSentinel}},
- {{574, 11366, kSentinel}}, // NOLINT
- {{575, 11390, kSentinel}},
- {{576, 11391, kSentinel}},
- {{577, 578, kSentinel}},
- {{580, 649, kSentinel}}, // NOLINT
- {{581, 652, kSentinel}},
- {{582, 583, kSentinel}},
- {{584, 585, kSentinel}},
- {{586, 587, kSentinel}}, // NOLINT
- {{588, 589, kSentinel}},
- {{590, 591, kSentinel}},
- {{592, 11375, kSentinel}},
- {{593, 11373, kSentinel}}, // NOLINT
- {{594, 11376, kSentinel}},
- {{604, 42923, kSentinel}},
- {{609, 42924, kSentinel}},
- {{613, 42893, kSentinel}}, // NOLINT
- {{614, 42922, kSentinel}},
- {{619, 11362, kSentinel}},
- {{620, 42925, kSentinel}},
- {{625, 11374, kSentinel}}, // NOLINT
- {{637, 11364, kSentinel}},
- {{647, 42929, kSentinel}},
- {{670, 42928, kSentinel}},
- {{837, 921, 953, 8126}}, // NOLINT
- {{880, 881, kSentinel}},
- {{882, 883, kSentinel}},
- {{886, 887, kSentinel}},
- {{891, 1021, kSentinel}}, // NOLINT
- {{893, 1023, kSentinel}},
- {{895, 1011, kSentinel}},
- {{902, 940, kSentinel}},
- {{904, 941, kSentinel}}, // NOLINT
- {{906, 943, kSentinel}},
- {{908, 972, kSentinel}},
- {{910, 973, kSentinel}},
- {{911, 974, kSentinel}}, // NOLINT
- {{913, 945, kSentinel}},
- {{914, 946, 976, kSentinel}},
- {{915, 947, kSentinel}},
- {{916, 948, kSentinel}}, // NOLINT
- {{917, 949, 1013, kSentinel}},
- {{918, 950, kSentinel}},
- {{919, 951, kSentinel}},
- {{920, 952, 977, kSentinel}}, // NOLINT
- {{922, 954, 1008, kSentinel}},
- {{923, 955, kSentinel}},
- {{925, 957, kSentinel}},
- {{927, 959, kSentinel}}, // NOLINT
- {{928, 960, 982, kSentinel}},
- {{929, 961, 1009, kSentinel}},
- {{931, 962, 963, kSentinel}},
- {{932, 964, kSentinel}}, // NOLINT
- {{933, 965, kSentinel}},
- {{934, 966, 981, kSentinel}},
- {{935, 967, kSentinel}},
- {{939, 971, kSentinel}}, // NOLINT
- {{975, 983, kSentinel}},
- {{984, 985, kSentinel}},
- {{986, 987, kSentinel}},
- {{988, 989, kSentinel}}, // NOLINT
- {{990, 991, kSentinel}},
- {{992, 993, kSentinel}},
- {{994, 995, kSentinel}},
- {{996, 997, kSentinel}}, // NOLINT
- {{998, 999, kSentinel}},
- {{1000, 1001, kSentinel}},
- {{1002, 1003, kSentinel}},
- {{1004, 1005, kSentinel}}, // NOLINT
- {{1006, 1007, kSentinel}},
- {{1010, 1017, kSentinel}},
- {{1015, 1016, kSentinel}},
- {{1018, 1019, kSentinel}}, // NOLINT
- {{1024, 1104, kSentinel}},
- {{1039, 1119, kSentinel}},
- {{1040, 1072, kSentinel}},
- {{1071, 1103, kSentinel}}, // NOLINT
- {{1120, 1121, kSentinel}},
- {{1122, 1123, kSentinel}},
- {{1124, 1125, kSentinel}},
- {{1126, 1127, kSentinel}}, // NOLINT
- {{1128, 1129, kSentinel}},
- {{1130, 1131, kSentinel}},
- {{1132, 1133, kSentinel}},
- {{1134, 1135, kSentinel}}, // NOLINT
- {{1136, 1137, kSentinel}},
- {{1138, 1139, kSentinel}},
- {{1140, 1141, kSentinel}},
- {{1142, 1143, kSentinel}}, // NOLINT
- {{1144, 1145, kSentinel}},
- {{1146, 1147, kSentinel}},
- {{1148, 1149, kSentinel}},
- {{1150, 1151, kSentinel}}, // NOLINT
- {{1152, 1153, kSentinel}},
- {{1162, 1163, kSentinel}},
- {{1164, 1165, kSentinel}},
- {{1166, 1167, kSentinel}}, // NOLINT
- {{1168, 1169, kSentinel}},
- {{1170, 1171, kSentinel}},
- {{1172, 1173, kSentinel}},
- {{1174, 1175, kSentinel}}, // NOLINT
- {{1176, 1177, kSentinel}},
- {{1178, 1179, kSentinel}},
- {{1180, 1181, kSentinel}},
- {{1182, 1183, kSentinel}}, // NOLINT
- {{1184, 1185, kSentinel}},
- {{1186, 1187, kSentinel}},
- {{1188, 1189, kSentinel}},
- {{1190, 1191, kSentinel}}, // NOLINT
- {{1192, 1193, kSentinel}},
- {{1194, 1195, kSentinel}},
- {{1196, 1197, kSentinel}},
- {{1198, 1199, kSentinel}}, // NOLINT
- {{1200, 1201, kSentinel}},
- {{1202, 1203, kSentinel}},
- {{1204, 1205, kSentinel}},
- {{1206, 1207, kSentinel}}, // NOLINT
- {{1208, 1209, kSentinel}},
- {{1210, 1211, kSentinel}},
- {{1212, 1213, kSentinel}},
- {{1214, 1215, kSentinel}}, // NOLINT
- {{1216, 1231, kSentinel}},
- {{1217, 1218, kSentinel}},
- {{1219, 1220, kSentinel}},
- {{1221, 1222, kSentinel}}, // NOLINT
- {{1223, 1224, kSentinel}},
- {{1225, 1226, kSentinel}},
- {{1227, 1228, kSentinel}},
- {{1229, 1230, kSentinel}}, // NOLINT
- {{1232, 1233, kSentinel}},
- {{1234, 1235, kSentinel}},
- {{1236, 1237, kSentinel}},
- {{1238, 1239, kSentinel}}, // NOLINT
- {{1240, 1241, kSentinel}},
- {{1242, 1243, kSentinel}},
- {{1244, 1245, kSentinel}},
- {{1246, 1247, kSentinel}}, // NOLINT
- {{1248, 1249, kSentinel}},
- {{1250, 1251, kSentinel}},
- {{1252, 1253, kSentinel}},
- {{1254, 1255, kSentinel}}, // NOLINT
- {{1256, 1257, kSentinel}},
- {{1258, 1259, kSentinel}},
- {{1260, 1261, kSentinel}},
- {{1262, 1263, kSentinel}}, // NOLINT
- {{1264, 1265, kSentinel}},
- {{1266, 1267, kSentinel}},
- {{1268, 1269, kSentinel}},
- {{1270, 1271, kSentinel}}, // NOLINT
- {{1272, 1273, kSentinel}},
- {{1274, 1275, kSentinel}},
- {{1276, 1277, kSentinel}},
- {{1278, 1279, kSentinel}}, // NOLINT
- {{1280, 1281, kSentinel}},
- {{1282, 1283, kSentinel}},
- {{1284, 1285, kSentinel}},
- {{1286, 1287, kSentinel}}, // NOLINT
- {{1288, 1289, kSentinel}},
- {{1290, 1291, kSentinel}},
- {{1292, 1293, kSentinel}},
- {{1294, 1295, kSentinel}}, // NOLINT
- {{1296, 1297, kSentinel}},
- {{1298, 1299, kSentinel}},
- {{1300, 1301, kSentinel}},
- {{1302, 1303, kSentinel}}, // NOLINT
- {{1304, 1305, kSentinel}},
- {{1306, 1307, kSentinel}},
- {{1308, 1309, kSentinel}},
- {{1310, 1311, kSentinel}}, // NOLINT
- {{1312, 1313, kSentinel}},
- {{1314, 1315, kSentinel}},
- {{1316, 1317, kSentinel}},
- {{1318, 1319, kSentinel}}, // NOLINT
- {{1320, 1321, kSentinel}},
- {{1322, 1323, kSentinel}},
- {{1324, 1325, kSentinel}},
- {{1326, 1327, kSentinel}}, // NOLINT
- {{1329, 1377, kSentinel}},
- {{1366, 1414, kSentinel}},
- {{4256, 11520, kSentinel}},
- {{4293, 11557, kSentinel}}, // NOLINT
- {{4295, 11559, kSentinel}},
- {{4301, 11565, kSentinel}},
- {{7545, 42877, kSentinel}},
- {{7549, 11363, kSentinel}}, // NOLINT
- {{7680, 7681, kSentinel}},
- {{7682, 7683, kSentinel}},
- {{7684, 7685, kSentinel}},
- {{7686, 7687, kSentinel}}, // NOLINT
- {{7688, 7689, kSentinel}},
- {{7690, 7691, kSentinel}},
- {{7692, 7693, kSentinel}},
- {{7694, 7695, kSentinel}}, // NOLINT
- {{7696, 7697, kSentinel}},
- {{7698, 7699, kSentinel}},
- {{7700, 7701, kSentinel}},
- {{7702, 7703, kSentinel}}, // NOLINT
- {{7704, 7705, kSentinel}},
- {{7706, 7707, kSentinel}},
- {{7708, 7709, kSentinel}},
- {{7710, 7711, kSentinel}}, // NOLINT
- {{7712, 7713, kSentinel}},
- {{7714, 7715, kSentinel}},
- {{7716, 7717, kSentinel}},
- {{7718, 7719, kSentinel}}, // NOLINT
- {{7720, 7721, kSentinel}},
- {{7722, 7723, kSentinel}},
- {{7724, 7725, kSentinel}},
- {{7726, 7727, kSentinel}}, // NOLINT
- {{7728, 7729, kSentinel}},
- {{7730, 7731, kSentinel}},
- {{7732, 7733, kSentinel}},
- {{7734, 7735, kSentinel}}, // NOLINT
- {{7736, 7737, kSentinel}},
- {{7738, 7739, kSentinel}},
- {{7740, 7741, kSentinel}},
- {{7742, 7743, kSentinel}}, // NOLINT
- {{7744, 7745, kSentinel}},
- {{7746, 7747, kSentinel}},
- {{7748, 7749, kSentinel}},
- {{7750, 7751, kSentinel}}, // NOLINT
- {{7752, 7753, kSentinel}},
- {{7754, 7755, kSentinel}},
- {{7756, 7757, kSentinel}},
- {{7758, 7759, kSentinel}}, // NOLINT
- {{7760, 7761, kSentinel}},
- {{7762, 7763, kSentinel}},
- {{7764, 7765, kSentinel}},
- {{7766, 7767, kSentinel}}, // NOLINT
- {{7768, 7769, kSentinel}},
- {{7770, 7771, kSentinel}},
- {{7772, 7773, kSentinel}},
- {{7774, 7775, kSentinel}}, // NOLINT
- {{7776, 7777, 7835, kSentinel}},
- {{7778, 7779, kSentinel}},
- {{7780, 7781, kSentinel}},
- {{7782, 7783, kSentinel}}, // NOLINT
- {{7784, 7785, kSentinel}},
- {{7786, 7787, kSentinel}},
- {{7788, 7789, kSentinel}},
- {{7790, 7791, kSentinel}}, // NOLINT
- {{7792, 7793, kSentinel}},
- {{7794, 7795, kSentinel}},
- {{7796, 7797, kSentinel}},
- {{7798, 7799, kSentinel}}, // NOLINT
- {{7800, 7801, kSentinel}},
- {{7802, 7803, kSentinel}},
- {{7804, 7805, kSentinel}},
- {{7806, 7807, kSentinel}}, // NOLINT
- {{7808, 7809, kSentinel}},
- {{7810, 7811, kSentinel}},
- {{7812, 7813, kSentinel}},
- {{7814, 7815, kSentinel}}, // NOLINT
- {{7816, 7817, kSentinel}},
- {{7818, 7819, kSentinel}},
- {{7820, 7821, kSentinel}},
- {{7822, 7823, kSentinel}}, // NOLINT
- {{7824, 7825, kSentinel}},
- {{7826, 7827, kSentinel}},
- {{7828, 7829, kSentinel}},
- {{7840, 7841, kSentinel}}, // NOLINT
- {{7842, 7843, kSentinel}},
- {{7844, 7845, kSentinel}},
- {{7846, 7847, kSentinel}},
- {{7848, 7849, kSentinel}}, // NOLINT
- {{7850, 7851, kSentinel}},
- {{7852, 7853, kSentinel}},
- {{7854, 7855, kSentinel}},
- {{7856, 7857, kSentinel}}, // NOLINT
- {{7858, 7859, kSentinel}},
- {{7860, 7861, kSentinel}},
- {{7862, 7863, kSentinel}},
- {{7864, 7865, kSentinel}}, // NOLINT
- {{7866, 7867, kSentinel}},
- {{7868, 7869, kSentinel}},
- {{7870, 7871, kSentinel}},
- {{7872, 7873, kSentinel}}, // NOLINT
- {{7874, 7875, kSentinel}},
- {{7876, 7877, kSentinel}},
- {{7878, 7879, kSentinel}},
- {{7880, 7881, kSentinel}}, // NOLINT
- {{7882, 7883, kSentinel}},
- {{7884, 7885, kSentinel}},
- {{7886, 7887, kSentinel}},
- {{7888, 7889, kSentinel}}, // NOLINT
- {{7890, 7891, kSentinel}},
- {{7892, 7893, kSentinel}},
- {{7894, 7895, kSentinel}},
- {{7896, 7897, kSentinel}}, // NOLINT
- {{7898, 7899, kSentinel}},
- {{7900, 7901, kSentinel}},
- {{7902, 7903, kSentinel}},
- {{7904, 7905, kSentinel}}, // NOLINT
- {{7906, 7907, kSentinel}},
- {{7908, 7909, kSentinel}},
- {{7910, 7911, kSentinel}},
- {{7912, 7913, kSentinel}}, // NOLINT
- {{7914, 7915, kSentinel}},
- {{7916, 7917, kSentinel}},
- {{7918, 7919, kSentinel}},
- {{7920, 7921, kSentinel}}, // NOLINT
- {{7922, 7923, kSentinel}},
- {{7924, 7925, kSentinel}},
- {{7926, 7927, kSentinel}},
- {{7928, 7929, kSentinel}}, // NOLINT
- {{7930, 7931, kSentinel}},
- {{7932, 7933, kSentinel}},
- {{7934, 7935, kSentinel}},
- {{7936, 7944, kSentinel}}, // NOLINT
- {{7943, 7951, kSentinel}},
- {{7952, 7960, kSentinel}},
- {{7957, 7965, kSentinel}},
- {{7968, 7976, kSentinel}}, // NOLINT
- {{7975, 7983, kSentinel}},
- {{7984, 7992, kSentinel}},
- {{7991, 7999, kSentinel}},
- {{8000, 8008, kSentinel}}, // NOLINT
- {{8005, 8013, kSentinel}},
- {{8017, 8025, kSentinel}},
- {{8019, 8027, kSentinel}},
- {{8021, 8029, kSentinel}}, // NOLINT
- {{8023, 8031, kSentinel}},
- {{8032, 8040, kSentinel}},
- {{8039, 8047, kSentinel}},
- {{8048, 8122, kSentinel}}, // NOLINT
- {{8049, 8123, kSentinel}},
- {{8050, 8136, kSentinel}},
- {{8053, 8139, kSentinel}},
- {{8054, 8154, kSentinel}}, // NOLINT
- {{8055, 8155, kSentinel}},
- {{8056, 8184, kSentinel}},
- {{8057, 8185, kSentinel}},
- {{8058, 8170, kSentinel}}, // NOLINT
- {{8059, 8171, kSentinel}},
- {{8060, 8186, kSentinel}},
- {{8061, 8187, kSentinel}},
- {{8112, 8120, kSentinel}}, // NOLINT
- {{8113, 8121, kSentinel}},
- {{8144, 8152, kSentinel}},
- {{8145, 8153, kSentinel}},
- {{8160, 8168, kSentinel}}, // NOLINT
- {{8161, 8169, kSentinel}},
- {{8165, 8172, kSentinel}},
- {{kSentinel}}}; // NOLINT
-static const uint16_t kEcma262UnCanonicalizeTable0Size = 1005; // NOLINT
-static const int32_t kEcma262UnCanonicalizeTable0[2010] = {
- 1073741889, 1, 90, 5, 1073741921, 1,
- 122, 5, 181, 9, 1073742016, 13,
- 214, 17, 1073742040, 21, // NOLINT
- 222, 25, 1073742048, 13, 246, 17,
- 1073742072, 21, 254, 25, 255, 29,
- 256, 33, 257, 33, // NOLINT
- 258, 37, 259, 37, 260, 41,
- 261, 41, 262, 45, 263, 45,
- 264, 49, 265, 49, // NOLINT
- 266, 53, 267, 53, 268, 57,
- 269, 57, 270, 61, 271, 61,
- 272, 65, 273, 65, // NOLINT
- 274, 69, 275, 69, 276, 73,
- 277, 73, 278, 77, 279, 77,
- 280, 81, 281, 81, // NOLINT
- 282, 85, 283, 85, 284, 89,
- 285, 89, 286, 93, 287, 93,
- 288, 97, 289, 97, // NOLINT
- 290, 101, 291, 101, 292, 105,
- 293, 105, 294, 109, 295, 109,
- 296, 113, 297, 113, // NOLINT
- 298, 117, 299, 117, 300, 121,
- 301, 121, 302, 125, 303, 125,
- 306, 129, 307, 129, // NOLINT
- 308, 133, 309, 133, 310, 137,
- 311, 137, 313, 141, 314, 141,
- 315, 145, 316, 145, // NOLINT
- 317, 149, 318, 149, 319, 153,
- 320, 153, 321, 157, 322, 157,
- 323, 161, 324, 161, // NOLINT
- 325, 165, 326, 165, 327, 169,
- 328, 169, 330, 173, 331, 173,
- 332, 177, 333, 177, // NOLINT
- 334, 181, 335, 181, 336, 185,
- 337, 185, 338, 189, 339, 189,
- 340, 193, 341, 193, // NOLINT
- 342, 197, 343, 197, 344, 201,
- 345, 201, 346, 205, 347, 205,
- 348, 209, 349, 209, // NOLINT
- 350, 213, 351, 213, 352, 217,
- 353, 217, 354, 221, 355, 221,
- 356, 225, 357, 225, // NOLINT
- 358, 229, 359, 229, 360, 233,
- 361, 233, 362, 237, 363, 237,
- 364, 241, 365, 241, // NOLINT
- 366, 245, 367, 245, 368, 249,
- 369, 249, 370, 253, 371, 253,
- 372, 257, 373, 257, // NOLINT
- 374, 261, 375, 261, 376, 29,
- 377, 265, 378, 265, 379, 269,
- 380, 269, 381, 273, // NOLINT
- 382, 273, 384, 277, 385, 281,
- 386, 285, 387, 285, 388, 289,
- 389, 289, 390, 293, // NOLINT
- 391, 297, 392, 297, 1073742217, 301,
- 394, 305, 395, 309, 396, 309,
- 398, 313, 399, 317, // NOLINT
- 400, 321, 401, 325, 402, 325,
- 403, 329, 404, 333, 405, 337,
- 406, 341, 407, 345, // NOLINT
- 408, 349, 409, 349, 410, 353,
- 412, 357, 413, 361, 414, 365,
- 415, 369, 416, 373, // NOLINT
- 417, 373, 418, 377, 419, 377,
- 420, 381, 421, 381, 422, 385,
- 423, 389, 424, 389, // NOLINT
- 425, 393, 428, 397, 429, 397,
- 430, 401, 431, 405, 432, 405,
- 1073742257, 409, 434, 413, // NOLINT
- 435, 417, 436, 417, 437, 421,
- 438, 421, 439, 425, 440, 429,
- 441, 429, 444, 433, // NOLINT
- 445, 433, 447, 437, 452, 441,
- 453, 441, 454, 441, 455, 445,
- 456, 445, 457, 445, // NOLINT
- 458, 449, 459, 449, 460, 449,
- 461, 453, 462, 453, 463, 457,
- 464, 457, 465, 461, // NOLINT
- 466, 461, 467, 465, 468, 465,
- 469, 469, 470, 469, 471, 473,
- 472, 473, 473, 477, // NOLINT
- 474, 477, 475, 481, 476, 481,
- 477, 313, 478, 485, 479, 485,
- 480, 489, 481, 489, // NOLINT
- 482, 493, 483, 493, 484, 497,
- 485, 497, 486, 501, 487, 501,
- 488, 505, 489, 505, // NOLINT
- 490, 509, 491, 509, 492, 513,
- 493, 513, 494, 517, 495, 517,
- 497, 521, 498, 521, // NOLINT
- 499, 521, 500, 525, 501, 525,
- 502, 337, 503, 437, 504, 529,
- 505, 529, 506, 533, // NOLINT
- 507, 533, 508, 537, 509, 537,
- 510, 541, 511, 541, 512, 545,
- 513, 545, 514, 549, // NOLINT
- 515, 549, 516, 553, 517, 553,
- 518, 557, 519, 557, 520, 561,
- 521, 561, 522, 565, // NOLINT
- 523, 565, 524, 569, 525, 569,
- 526, 573, 527, 573, 528, 577,
- 529, 577, 530, 581, // NOLINT
- 531, 581, 532, 585, 533, 585,
- 534, 589, 535, 589, 536, 593,
- 537, 593, 538, 597, // NOLINT
- 539, 597, 540, 601, 541, 601,
- 542, 605, 543, 605, 544, 365,
- 546, 609, 547, 609, // NOLINT
- 548, 613, 549, 613, 550, 617,
- 551, 617, 552, 621, 553, 621,
- 554, 625, 555, 625, // NOLINT
- 556, 629, 557, 629, 558, 633,
- 559, 633, 560, 637, 561, 637,
- 562, 641, 563, 641, // NOLINT
- 570, 645, 571, 649, 572, 649,
- 573, 353, 574, 653, 1073742399, 657,
- 576, 661, 577, 665, // NOLINT
- 578, 665, 579, 277, 580, 669,
- 581, 673, 582, 677, 583, 677,
- 584, 681, 585, 681, // NOLINT
- 586, 685, 587, 685, 588, 689,
- 589, 689, 590, 693, 591, 693,
- 592, 697, 593, 701, // NOLINT
- 594, 705, 595, 281, 596, 293,
- 1073742422, 301, 599, 305, 601, 317,
- 603, 321, 604, 709, // NOLINT
- 608, 329, 609, 713, 611, 333,
- 613, 717, 614, 721, 616, 345,
- 617, 341, 619, 725, // NOLINT
- 620, 729, 623, 357, 625, 733,
- 626, 361, 629, 369, 637, 737,
- 640, 385, 643, 393, // NOLINT
- 647, 741, 648, 401, 649, 669,
- 1073742474, 409, 651, 413, 652, 673,
- 658, 425, 670, 745, // NOLINT
- 837, 749, 880, 753, 881, 753,
- 882, 757, 883, 757, 886, 761,
- 887, 761, 1073742715, 765, // NOLINT
- 893, 769, 895, 773, 902, 777,
- 1073742728, 781, 906, 785, 908, 789,
- 1073742734, 793, 911, 797, // NOLINT
- 913, 801, 914, 805, 1073742739, 809,
- 916, 813, 917, 817, 1073742742, 821,
- 919, 825, 920, 829, // NOLINT
- 921, 749, 922, 833, 923, 837,
- 924, 9, 1073742749, 841, 927, 845,
- 928, 849, 929, 853, // NOLINT
- 931, 857, 1073742756, 861, 933, 865,
- 934, 869, 1073742759, 873, 939, 877,
- 940, 777, 1073742765, 781, // NOLINT
- 943, 785, 945, 801, 946, 805,
- 1073742771, 809, 948, 813, 949, 817,
- 1073742774, 821, 951, 825, // NOLINT
- 952, 829, 953, 749, 954, 833,
- 955, 837, 956, 9, 1073742781, 841,
- 959, 845, 960, 849, // NOLINT
- 961, 853, 962, 857, 963, 857,
- 1073742788, 861, 965, 865, 966, 869,
- 1073742791, 873, 971, 877, // NOLINT
- 972, 789, 1073742797, 793, 974, 797,
- 975, 881, 976, 805, 977, 829,
- 981, 869, 982, 849, // NOLINT
- 983, 881, 984, 885, 985, 885,
- 986, 889, 987, 889, 988, 893,
- 989, 893, 990, 897, // NOLINT
- 991, 897, 992, 901, 993, 901,
- 994, 905, 995, 905, 996, 909,
- 997, 909, 998, 913, // NOLINT
- 999, 913, 1000, 917, 1001, 917,
- 1002, 921, 1003, 921, 1004, 925,
- 1005, 925, 1006, 929, // NOLINT
- 1007, 929, 1008, 833, 1009, 853,
- 1010, 933, 1011, 773, 1013, 817,
- 1015, 937, 1016, 937, // NOLINT
- 1017, 933, 1018, 941, 1019, 941,
- 1073742845, 765, 1023, 769, 1073742848, 945,
- 1039, 949, 1073742864, 953, // NOLINT
- 1071, 957, 1073742896, 953, 1103, 957,
- 1073742928, 945, 1119, 949, 1120, 961,
- 1121, 961, 1122, 965, // NOLINT
- 1123, 965, 1124, 969, 1125, 969,
- 1126, 973, 1127, 973, 1128, 977,
- 1129, 977, 1130, 981, // NOLINT
- 1131, 981, 1132, 985, 1133, 985,
- 1134, 989, 1135, 989, 1136, 993,
- 1137, 993, 1138, 997, // NOLINT
- 1139, 997, 1140, 1001, 1141, 1001,
- 1142, 1005, 1143, 1005, 1144, 1009,
- 1145, 1009, 1146, 1013, // NOLINT
- 1147, 1013, 1148, 1017, 1149, 1017,
- 1150, 1021, 1151, 1021, 1152, 1025,
- 1153, 1025, 1162, 1029, // NOLINT
- 1163, 1029, 1164, 1033, 1165, 1033,
- 1166, 1037, 1167, 1037, 1168, 1041,
- 1169, 1041, 1170, 1045, // NOLINT
- 1171, 1045, 1172, 1049, 1173, 1049,
- 1174, 1053, 1175, 1053, 1176, 1057,
- 1177, 1057, 1178, 1061, // NOLINT
- 1179, 1061, 1180, 1065, 1181, 1065,
- 1182, 1069, 1183, 1069, 1184, 1073,
- 1185, 1073, 1186, 1077, // NOLINT
- 1187, 1077, 1188, 1081, 1189, 1081,
- 1190, 1085, 1191, 1085, 1192, 1089,
- 1193, 1089, 1194, 1093, // NOLINT
- 1195, 1093, 1196, 1097, 1197, 1097,
- 1198, 1101, 1199, 1101, 1200, 1105,
- 1201, 1105, 1202, 1109, // NOLINT
- 1203, 1109, 1204, 1113, 1205, 1113,
- 1206, 1117, 1207, 1117, 1208, 1121,
- 1209, 1121, 1210, 1125, // NOLINT
- 1211, 1125, 1212, 1129, 1213, 1129,
- 1214, 1133, 1215, 1133, 1216, 1137,
- 1217, 1141, 1218, 1141, // NOLINT
- 1219, 1145, 1220, 1145, 1221, 1149,
- 1222, 1149, 1223, 1153, 1224, 1153,
- 1225, 1157, 1226, 1157, // NOLINT
- 1227, 1161, 1228, 1161, 1229, 1165,
- 1230, 1165, 1231, 1137, 1232, 1169,
- 1233, 1169, 1234, 1173, // NOLINT
- 1235, 1173, 1236, 1177, 1237, 1177,
- 1238, 1181, 1239, 1181, 1240, 1185,
- 1241, 1185, 1242, 1189, // NOLINT
- 1243, 1189, 1244, 1193, 1245, 1193,
- 1246, 1197, 1247, 1197, 1248, 1201,
- 1249, 1201, 1250, 1205, // NOLINT
- 1251, 1205, 1252, 1209, 1253, 1209,
- 1254, 1213, 1255, 1213, 1256, 1217,
- 1257, 1217, 1258, 1221, // NOLINT
- 1259, 1221, 1260, 1225, 1261, 1225,
- 1262, 1229, 1263, 1229, 1264, 1233,
- 1265, 1233, 1266, 1237, // NOLINT
- 1267, 1237, 1268, 1241, 1269, 1241,
- 1270, 1245, 1271, 1245, 1272, 1249,
- 1273, 1249, 1274, 1253, // NOLINT
- 1275, 1253, 1276, 1257, 1277, 1257,
- 1278, 1261, 1279, 1261, 1280, 1265,
- 1281, 1265, 1282, 1269, // NOLINT
- 1283, 1269, 1284, 1273, 1285, 1273,
- 1286, 1277, 1287, 1277, 1288, 1281,
- 1289, 1281, 1290, 1285, // NOLINT
- 1291, 1285, 1292, 1289, 1293, 1289,
- 1294, 1293, 1295, 1293, 1296, 1297,
- 1297, 1297, 1298, 1301, // NOLINT
- 1299, 1301, 1300, 1305, 1301, 1305,
- 1302, 1309, 1303, 1309, 1304, 1313,
- 1305, 1313, 1306, 1317, // NOLINT
- 1307, 1317, 1308, 1321, 1309, 1321,
- 1310, 1325, 1311, 1325, 1312, 1329,
- 1313, 1329, 1314, 1333, // NOLINT
- 1315, 1333, 1316, 1337, 1317, 1337,
- 1318, 1341, 1319, 1341, 1320, 1345,
- 1321, 1345, 1322, 1349, // NOLINT
- 1323, 1349, 1324, 1353, 1325, 1353,
- 1326, 1357, 1327, 1357, 1073743153, 1361,
- 1366, 1365, 1073743201, 1361, // NOLINT
- 1414, 1365, 1073746080, 1369, 4293, 1373,
- 4295, 1377, 4301, 1381, 7545, 1385,
- 7549, 1389, 7680, 1393, // NOLINT
- 7681, 1393, 7682, 1397, 7683, 1397,
- 7684, 1401, 7685, 1401, 7686, 1405,
- 7687, 1405, 7688, 1409, // NOLINT
- 7689, 1409, 7690, 1413, 7691, 1413,
- 7692, 1417, 7693, 1417, 7694, 1421,
- 7695, 1421, 7696, 1425, // NOLINT
- 7697, 1425, 7698, 1429, 7699, 1429,
- 7700, 1433, 7701, 1433, 7702, 1437,
- 7703, 1437, 7704, 1441, // NOLINT
- 7705, 1441, 7706, 1445, 7707, 1445,
- 7708, 1449, 7709, 1449, 7710, 1453,
- 7711, 1453, 7712, 1457, // NOLINT
- 7713, 1457, 7714, 1461, 7715, 1461,
- 7716, 1465, 7717, 1465, 7718, 1469,
- 7719, 1469, 7720, 1473, // NOLINT
- 7721, 1473, 7722, 1477, 7723, 1477,
- 7724, 1481, 7725, 1481, 7726, 1485,
- 7727, 1485, 7728, 1489, // NOLINT
- 7729, 1489, 7730, 1493, 7731, 1493,
- 7732, 1497, 7733, 1497, 7734, 1501,
- 7735, 1501, 7736, 1505, // NOLINT
- 7737, 1505, 7738, 1509, 7739, 1509,
- 7740, 1513, 7741, 1513, 7742, 1517,
- 7743, 1517, 7744, 1521, // NOLINT
- 7745, 1521, 7746, 1525, 7747, 1525,
- 7748, 1529, 7749, 1529, 7750, 1533,
- 7751, 1533, 7752, 1537, // NOLINT
- 7753, 1537, 7754, 1541, 7755, 1541,
- 7756, 1545, 7757, 1545, 7758, 1549,
- 7759, 1549, 7760, 1553, // NOLINT
- 7761, 1553, 7762, 1557, 7763, 1557,
- 7764, 1561, 7765, 1561, 7766, 1565,
- 7767, 1565, 7768, 1569, // NOLINT
- 7769, 1569, 7770, 1573, 7771, 1573,
- 7772, 1577, 7773, 1577, 7774, 1581,
- 7775, 1581, 7776, 1585, // NOLINT
- 7777, 1585, 7778, 1589, 7779, 1589,
- 7780, 1593, 7781, 1593, 7782, 1597,
- 7783, 1597, 7784, 1601, // NOLINT
- 7785, 1601, 7786, 1605, 7787, 1605,
- 7788, 1609, 7789, 1609, 7790, 1613,
- 7791, 1613, 7792, 1617, // NOLINT
- 7793, 1617, 7794, 1621, 7795, 1621,
- 7796, 1625, 7797, 1625, 7798, 1629,
- 7799, 1629, 7800, 1633, // NOLINT
- 7801, 1633, 7802, 1637, 7803, 1637,
- 7804, 1641, 7805, 1641, 7806, 1645,
- 7807, 1645, 7808, 1649, // NOLINT
- 7809, 1649, 7810, 1653, 7811, 1653,
- 7812, 1657, 7813, 1657, 7814, 1661,
- 7815, 1661, 7816, 1665, // NOLINT
- 7817, 1665, 7818, 1669, 7819, 1669,
- 7820, 1673, 7821, 1673, 7822, 1677,
- 7823, 1677, 7824, 1681, // NOLINT
- 7825, 1681, 7826, 1685, 7827, 1685,
- 7828, 1689, 7829, 1689, 7835, 1585,
- 7840, 1693, 7841, 1693, // NOLINT
- 7842, 1697, 7843, 1697, 7844, 1701,
- 7845, 1701, 7846, 1705, 7847, 1705,
- 7848, 1709, 7849, 1709, // NOLINT
- 7850, 1713, 7851, 1713, 7852, 1717,
- 7853, 1717, 7854, 1721, 7855, 1721,
- 7856, 1725, 7857, 1725, // NOLINT
- 7858, 1729, 7859, 1729, 7860, 1733,
- 7861, 1733, 7862, 1737, 7863, 1737,
- 7864, 1741, 7865, 1741, // NOLINT
- 7866, 1745, 7867, 1745, 7868, 1749,
- 7869, 1749, 7870, 1753, 7871, 1753,
- 7872, 1757, 7873, 1757, // NOLINT
- 7874, 1761, 7875, 1761, 7876, 1765,
- 7877, 1765, 7878, 1769, 7879, 1769,
- 7880, 1773, 7881, 1773, // NOLINT
- 7882, 1777, 7883, 1777, 7884, 1781,
- 7885, 1781, 7886, 1785, 7887, 1785,
- 7888, 1789, 7889, 1789, // NOLINT
- 7890, 1793, 7891, 1793, 7892, 1797,
- 7893, 1797, 7894, 1801, 7895, 1801,
- 7896, 1805, 7897, 1805, // NOLINT
- 7898, 1809, 7899, 1809, 7900, 1813,
- 7901, 1813, 7902, 1817, 7903, 1817,
- 7904, 1821, 7905, 1821, // NOLINT
- 7906, 1825, 7907, 1825, 7908, 1829,
- 7909, 1829, 7910, 1833, 7911, 1833,
- 7912, 1837, 7913, 1837, // NOLINT
- 7914, 1841, 7915, 1841, 7916, 1845,
- 7917, 1845, 7918, 1849, 7919, 1849,
- 7920, 1853, 7921, 1853, // NOLINT
- 7922, 1857, 7923, 1857, 7924, 1861,
- 7925, 1861, 7926, 1865, 7927, 1865,
- 7928, 1869, 7929, 1869, // NOLINT
- 7930, 1873, 7931, 1873, 7932, 1877,
- 7933, 1877, 7934, 1881, 7935, 1881,
- 1073749760, 1885, 7943, 1889, // NOLINT
- 1073749768, 1885, 7951, 1889, 1073749776, 1893,
- 7957, 1897, 1073749784, 1893, 7965, 1897,
- 1073749792, 1901, 7975, 1905, // NOLINT
- 1073749800, 1901, 7983, 1905, 1073749808, 1909,
- 7991, 1913, 1073749816, 1909, 7999, 1913,
- 1073749824, 1917, 8005, 1921, // NOLINT
- 1073749832, 1917, 8013, 1921, 8017, 1925,
- 8019, 1929, 8021, 1933, 8023, 1937,
- 8025, 1925, 8027, 1929, // NOLINT
- 8029, 1933, 8031, 1937, 1073749856, 1941,
- 8039, 1945, 1073749864, 1941, 8047, 1945,
- 1073749872, 1949, 8049, 1953, // NOLINT
- 1073749874, 1957, 8053, 1961, 1073749878, 1965,
- 8055, 1969, 1073749880, 1973, 8057, 1977,
- 1073749882, 1981, 8059, 1985, // NOLINT
- 1073749884, 1989, 8061, 1993, 1073749936, 1997,
- 8113, 2001, 1073749944, 1997, 8121, 2001,
- 1073749946, 1949, 8123, 1953, // NOLINT
- 8126, 749, 1073749960, 1957, 8139, 1961,
- 1073749968, 2005, 8145, 2009, 1073749976, 2005,
- 8153, 2009, 1073749978, 1965, // NOLINT
- 8155, 1969, 1073749984, 2013, 8161, 2017,
- 8165, 2021, 1073749992, 2013, 8169, 2017,
- 1073749994, 1981, 8171, 1985, // NOLINT
- 8172, 2021, 1073750008, 1973, 8185, 1977,
- 1073750010, 1989, 8187, 1993}; // NOLINT
-static const MultiCharacterSpecialCase<2>
- kEcma262UnCanonicalizeMultiStrings1[83] = { // NOLINT
- {{8498, 8526}}, {{8544, 8560}}, {{8559, 8575}},
- {{8579, 8580}}, // NOLINT
- {{9398, 9424}}, {{9423, 9449}}, {{11264, 11312}},
- {{11310, 11358}}, // NOLINT
- {{11360, 11361}}, {{619, 11362}}, {{7549, 11363}},
- {{637, 11364}}, // NOLINT
- {{570, 11365}}, {{574, 11366}}, {{11367, 11368}},
- {{11369, 11370}}, // NOLINT
- {{11371, 11372}}, {{593, 11373}}, {{625, 11374}},
- {{592, 11375}}, // NOLINT
- {{594, 11376}}, {{11378, 11379}}, {{11381, 11382}},
- {{575, 11390}}, // NOLINT
- {{576, 11391}}, {{11392, 11393}}, {{11394, 11395}},
- {{11396, 11397}}, // NOLINT
- {{11398, 11399}}, {{11400, 11401}}, {{11402, 11403}},
- {{11404, 11405}}, // NOLINT
- {{11406, 11407}}, {{11408, 11409}}, {{11410, 11411}},
- {{11412, 11413}}, // NOLINT
- {{11414, 11415}}, {{11416, 11417}}, {{11418, 11419}},
- {{11420, 11421}}, // NOLINT
- {{11422, 11423}}, {{11424, 11425}}, {{11426, 11427}},
- {{11428, 11429}}, // NOLINT
- {{11430, 11431}}, {{11432, 11433}}, {{11434, 11435}},
- {{11436, 11437}}, // NOLINT
- {{11438, 11439}}, {{11440, 11441}}, {{11442, 11443}},
- {{11444, 11445}}, // NOLINT
- {{11446, 11447}}, {{11448, 11449}}, {{11450, 11451}},
- {{11452, 11453}}, // NOLINT
- {{11454, 11455}}, {{11456, 11457}}, {{11458, 11459}},
- {{11460, 11461}}, // NOLINT
- {{11462, 11463}}, {{11464, 11465}}, {{11466, 11467}},
- {{11468, 11469}}, // NOLINT
- {{11470, 11471}}, {{11472, 11473}}, {{11474, 11475}},
- {{11476, 11477}}, // NOLINT
- {{11478, 11479}}, {{11480, 11481}}, {{11482, 11483}},
- {{11484, 11485}}, // NOLINT
- {{11486, 11487}}, {{11488, 11489}}, {{11490, 11491}},
- {{11499, 11500}}, // NOLINT
- {{11501, 11502}}, {{11506, 11507}}, {{4256, 11520}},
- {{4293, 11557}}, // NOLINT
- {{4295, 11559}}, {{4301, 11565}}, {{kSentinel}}}; // NOLINT
-static const uint16_t kEcma262UnCanonicalizeTable1Size = 149; // NOLINT
-static const int32_t kEcma262UnCanonicalizeTable1[298] = {
- 306, 1, 334, 1, 1073742176, 5, 367, 9,
- 1073742192, 5, 383, 9, 387, 13, 388, 13, // NOLINT
- 1073743030, 17, 1231, 21, 1073743056, 17, 1257, 21,
- 1073744896, 25, 3118, 29, 1073744944, 25, 3166, 29, // NOLINT
- 3168, 33, 3169, 33, 3170, 37, 3171, 41,
- 3172, 45, 3173, 49, 3174, 53, 3175, 57, // NOLINT
- 3176, 57, 3177, 61, 3178, 61, 3179, 65,
- 3180, 65, 3181, 69, 3182, 73, 3183, 77, // NOLINT
- 3184, 81, 3186, 85, 3187, 85, 3189, 89,
- 3190, 89, 1073745022, 93, 3199, 97, 3200, 101, // NOLINT
- 3201, 101, 3202, 105, 3203, 105, 3204, 109,
- 3205, 109, 3206, 113, 3207, 113, 3208, 117, // NOLINT
- 3209, 117, 3210, 121, 3211, 121, 3212, 125,
- 3213, 125, 3214, 129, 3215, 129, 3216, 133, // NOLINT
- 3217, 133, 3218, 137, 3219, 137, 3220, 141,
- 3221, 141, 3222, 145, 3223, 145, 3224, 149, // NOLINT
- 3225, 149, 3226, 153, 3227, 153, 3228, 157,
- 3229, 157, 3230, 161, 3231, 161, 3232, 165, // NOLINT
- 3233, 165, 3234, 169, 3235, 169, 3236, 173,
- 3237, 173, 3238, 177, 3239, 177, 3240, 181, // NOLINT
- 3241, 181, 3242, 185, 3243, 185, 3244, 189,
- 3245, 189, 3246, 193, 3247, 193, 3248, 197, // NOLINT
- 3249, 197, 3250, 201, 3251, 201, 3252, 205,
- 3253, 205, 3254, 209, 3255, 209, 3256, 213, // NOLINT
- 3257, 213, 3258, 217, 3259, 217, 3260, 221,
- 3261, 221, 3262, 225, 3263, 225, 3264, 229, // NOLINT
- 3265, 229, 3266, 233, 3267, 233, 3268, 237,
- 3269, 237, 3270, 241, 3271, 241, 3272, 245, // NOLINT
- 3273, 245, 3274, 249, 3275, 249, 3276, 253,
- 3277, 253, 3278, 257, 3279, 257, 3280, 261, // NOLINT
- 3281, 261, 3282, 265, 3283, 265, 3284, 269,
- 3285, 269, 3286, 273, 3287, 273, 3288, 277, // NOLINT
- 3289, 277, 3290, 281, 3291, 281, 3292, 285,
- 3293, 285, 3294, 289, 3295, 289, 3296, 293, // NOLINT
- 3297, 293, 3298, 297, 3299, 297, 3307, 301,
- 3308, 301, 3309, 305, 3310, 305, 3314, 309, // NOLINT
- 3315, 309, 1073745152, 313, 3365, 317, 3367, 321,
- 3373, 325}; // NOLINT
-static const MultiCharacterSpecialCase<2>
- kEcma262UnCanonicalizeMultiStrings5[104] = { // NOLINT
- {{42560, 42561}}, {{42562, 42563}},
- {{42564, 42565}}, {{42566, 42567}}, // NOLINT
- {{42568, 42569}}, {{42570, 42571}},
- {{42572, 42573}}, {{42574, 42575}}, // NOLINT
- {{42576, 42577}}, {{42578, 42579}},
- {{42580, 42581}}, {{42582, 42583}}, // NOLINT
- {{42584, 42585}}, {{42586, 42587}},
- {{42588, 42589}}, {{42590, 42591}}, // NOLINT
- {{42592, 42593}}, {{42594, 42595}},
- {{42596, 42597}}, {{42598, 42599}}, // NOLINT
- {{42600, 42601}}, {{42602, 42603}},
- {{42604, 42605}}, {{42624, 42625}}, // NOLINT
- {{42626, 42627}}, {{42628, 42629}},
- {{42630, 42631}}, {{42632, 42633}}, // NOLINT
- {{42634, 42635}}, {{42636, 42637}},
- {{42638, 42639}}, {{42640, 42641}}, // NOLINT
- {{42642, 42643}}, {{42644, 42645}},
- {{42646, 42647}}, {{42648, 42649}}, // NOLINT
- {{42650, 42651}}, {{42786, 42787}},
- {{42788, 42789}}, {{42790, 42791}}, // NOLINT
- {{42792, 42793}}, {{42794, 42795}},
- {{42796, 42797}}, {{42798, 42799}}, // NOLINT
- {{42802, 42803}}, {{42804, 42805}},
- {{42806, 42807}}, {{42808, 42809}}, // NOLINT
- {{42810, 42811}}, {{42812, 42813}},
- {{42814, 42815}}, {{42816, 42817}}, // NOLINT
- {{42818, 42819}}, {{42820, 42821}},
- {{42822, 42823}}, {{42824, 42825}}, // NOLINT
- {{42826, 42827}}, {{42828, 42829}},
- {{42830, 42831}}, {{42832, 42833}}, // NOLINT
- {{42834, 42835}}, {{42836, 42837}},
- {{42838, 42839}}, {{42840, 42841}}, // NOLINT
- {{42842, 42843}}, {{42844, 42845}},
- {{42846, 42847}}, {{42848, 42849}}, // NOLINT
- {{42850, 42851}}, {{42852, 42853}},
- {{42854, 42855}}, {{42856, 42857}}, // NOLINT
- {{42858, 42859}}, {{42860, 42861}},
- {{42862, 42863}}, {{42873, 42874}}, // NOLINT
- {{42875, 42876}}, {{7545, 42877}},
- {{42878, 42879}}, {{42880, 42881}}, // NOLINT
- {{42882, 42883}}, {{42884, 42885}},
- {{42886, 42887}}, {{42891, 42892}}, // NOLINT
- {{613, 42893}}, {{42896, 42897}},
- {{42898, 42899}}, {{42902, 42903}}, // NOLINT
- {{42904, 42905}}, {{42906, 42907}},
- {{42908, 42909}}, {{42910, 42911}}, // NOLINT
- {{42912, 42913}}, {{42914, 42915}},
- {{42916, 42917}}, {{42918, 42919}}, // NOLINT
- {{42920, 42921}}, {{614, 42922}},
- {{604, 42923}}, {{609, 42924}}, // NOLINT
- {{620, 42925}}, {{670, 42928}},
- {{647, 42929}}, {{kSentinel}}}; // NOLINT
-static const uint16_t kEcma262UnCanonicalizeTable5Size = 198; // NOLINT
-static const int32_t
- kEcma262UnCanonicalizeTable5[396] =
- {1600, 1, 1601, 1, 1602, 5, 1603, 5,
- 1604, 9, 1605, 9, 1606, 13, 1607, 13, // NOLINT
- 1608, 17, 1609, 17, 1610, 21, 1611, 21,
- 1612, 25, 1613, 25, 1614, 29, 1615, 29, // NOLINT
- 1616, 33, 1617, 33, 1618, 37, 1619, 37,
- 1620, 41, 1621, 41, 1622, 45, 1623, 45, // NOLINT
- 1624, 49, 1625, 49, 1626, 53, 1627, 53,
- 1628, 57, 1629, 57, 1630, 61, 1631, 61, // NOLINT
- 1632, 65, 1633, 65, 1634, 69, 1635, 69,
- 1636, 73, 1637, 73, 1638, 77, 1639, 77, // NOLINT
- 1640, 81, 1641, 81, 1642, 85, 1643, 85,
- 1644, 89, 1645, 89, 1664, 93, 1665, 93, // NOLINT
- 1666, 97, 1667, 97, 1668, 101, 1669, 101,
- 1670, 105, 1671, 105, 1672, 109, 1673, 109, // NOLINT
- 1674, 113, 1675, 113, 1676, 117, 1677, 117,
- 1678, 121, 1679, 121, 1680, 125, 1681, 125, // NOLINT
- 1682, 129, 1683, 129, 1684, 133, 1685, 133,
- 1686, 137, 1687, 137, 1688, 141, 1689, 141, // NOLINT
- 1690, 145, 1691, 145, 1826, 149, 1827, 149,
- 1828, 153, 1829, 153, 1830, 157, 1831, 157, // NOLINT
- 1832, 161, 1833, 161, 1834, 165, 1835, 165,
- 1836, 169, 1837, 169, 1838, 173, 1839, 173, // NOLINT
- 1842, 177, 1843, 177, 1844, 181, 1845, 181,
- 1846, 185, 1847, 185, 1848, 189, 1849, 189, // NOLINT
- 1850, 193, 1851, 193, 1852, 197, 1853, 197,
- 1854, 201, 1855, 201, 1856, 205, 1857, 205, // NOLINT
- 1858, 209, 1859, 209, 1860, 213, 1861, 213,
- 1862, 217, 1863, 217, 1864, 221, 1865, 221, // NOLINT
- 1866, 225, 1867, 225, 1868, 229, 1869, 229,
- 1870, 233, 1871, 233, 1872, 237, 1873, 237, // NOLINT
- 1874, 241, 1875, 241, 1876, 245, 1877, 245,
- 1878, 249, 1879, 249, 1880, 253, 1881, 253, // NOLINT
- 1882, 257, 1883, 257, 1884, 261, 1885, 261,
- 1886, 265, 1887, 265, 1888, 269, 1889, 269, // NOLINT
- 1890, 273, 1891, 273, 1892, 277, 1893, 277,
- 1894, 281, 1895, 281, 1896, 285, 1897, 285, // NOLINT
- 1898, 289, 1899, 289, 1900, 293, 1901, 293,
- 1902, 297, 1903, 297, 1913, 301, 1914, 301, // NOLINT
- 1915, 305, 1916, 305, 1917, 309, 1918, 313,
- 1919, 313, 1920, 317, 1921, 317, 1922, 321, // NOLINT
- 1923, 321, 1924, 325, 1925, 325, 1926, 329,
- 1927, 329, 1931, 333, 1932, 333, 1933, 337, // NOLINT
- 1936, 341, 1937, 341, 1938, 345, 1939, 345,
- 1942, 349, 1943, 349, 1944, 353, 1945, 353, // NOLINT
- 1946, 357, 1947, 357, 1948, 361, 1949, 361,
- 1950, 365, 1951, 365, 1952, 369, 1953, 369, // NOLINT
- 1954, 373, 1955, 373, 1956, 377, 1957, 377,
- 1958, 381, 1959, 381, 1960, 385, 1961, 385, // NOLINT
- 1962, 389, 1963, 393, 1964, 397, 1965, 401,
- 1968, 405, 1969, 409}; // NOLINT
-static const MultiCharacterSpecialCase<2>
- kEcma262UnCanonicalizeMultiStrings7[3] = { // NOLINT
- {{65313, 65345}},
- {{65338, 65370}},
- {{kSentinel}}}; // NOLINT
-static const uint16_t kEcma262UnCanonicalizeTable7Size = 4; // NOLINT
-static const int32_t kEcma262UnCanonicalizeTable7[8] = {
- 1073749793, 1, 7994, 5, 1073749825, 1, 8026, 5}; // NOLINT
-int Ecma262UnCanonicalize::Convert(uchar c, uchar n, uchar* result,
- bool* allow_caching_ptr) {
- int chunk_index = c >> 13;
- switch (chunk_index) {
- case 0:
- return LookupMapping<true>(
- kEcma262UnCanonicalizeTable0, kEcma262UnCanonicalizeTable0Size,
- kEcma262UnCanonicalizeMultiStrings0, c, n, result, allow_caching_ptr);
- case 1:
- return LookupMapping<true>(
- kEcma262UnCanonicalizeTable1, kEcma262UnCanonicalizeTable1Size,
- kEcma262UnCanonicalizeMultiStrings1, c, n, result, allow_caching_ptr);
- case 5:
- return LookupMapping<true>(
- kEcma262UnCanonicalizeTable5, kEcma262UnCanonicalizeTable5Size,
- kEcma262UnCanonicalizeMultiStrings5, c, n, result, allow_caching_ptr);
- case 7:
- return LookupMapping<true>(
- kEcma262UnCanonicalizeTable7, kEcma262UnCanonicalizeTable7Size,
- kEcma262UnCanonicalizeMultiStrings7, c, n, result, allow_caching_ptr);
- default:
- return 0;
- }
-}
-
-static const MultiCharacterSpecialCase<1>
- kCanonicalizationRangeMultiStrings0[1] = { // NOLINT
- {{kSentinel}}}; // NOLINT
-static const uint16_t kCanonicalizationRangeTable0Size = 70; // NOLINT
-static const int32_t kCanonicalizationRangeTable0[140] = {
- 1073741889, 100, 90, 0, 1073741921, 100, 122, 0,
- 1073742016, 88, 214, 0, 1073742040, 24, 222, 0, // NOLINT
- 1073742048, 88, 246, 0, 1073742072, 24, 254, 0,
- 1073742715, 8, 893, 0, 1073742728, 8, 906, 0, // NOLINT
- 1073742749, 8, 927, 0, 1073742759, 16, 939, 0,
- 1073742765, 8, 943, 0, 1073742781, 8, 959, 0, // NOLINT
- 1073742791, 16, 971, 0, 1073742845, 8, 1023, 0,
- 1073742848, 60, 1039, 0, 1073742864, 124, 1071, 0, // NOLINT
- 1073742896, 124, 1103, 0, 1073742928, 60, 1119, 0,
- 1073743153, 148, 1366, 0, 1073743201, 148, 1414, 0, // NOLINT
- 1073746080, 148, 4293, 0, 1073749760, 28, 7943, 0,
- 1073749768, 28, 7951, 0, 1073749776, 20, 7957, 0, // NOLINT
- 1073749784, 20, 7965, 0, 1073749792, 28, 7975, 0,
- 1073749800, 28, 7983, 0, 1073749808, 28, 7991, 0, // NOLINT
- 1073749816, 28, 7999, 0, 1073749824, 20, 8005, 0,
- 1073749832, 20, 8013, 0, 1073749856, 28, 8039, 0, // NOLINT
- 1073749864, 28, 8047, 0, 1073749874, 12, 8053, 0,
- 1073749960, 12, 8139, 0}; // NOLINT
-static const MultiCharacterSpecialCase<1>
- kCanonicalizationRangeMultiStrings1[1] = { // NOLINT
- {{kSentinel}}}; // NOLINT
-static const uint16_t kCanonicalizationRangeTable1Size = 14; // NOLINT
-static const int32_t kCanonicalizationRangeTable1[28] = {
- 1073742176, 60, 367, 0, 1073742192, 60, 383, 0,
- 1073743030, 100, 1231, 0, 1073743056, 100, 1257, 0, // NOLINT
- 1073744896, 184, 3118, 0, 1073744944, 184, 3166, 0,
- 1073745152, 148, 3365, 0}; // NOLINT
-static const MultiCharacterSpecialCase<1>
- kCanonicalizationRangeMultiStrings7[1] = { // NOLINT
- {{kSentinel}}}; // NOLINT
-static const uint16_t kCanonicalizationRangeTable7Size = 4; // NOLINT
-static const int32_t kCanonicalizationRangeTable7[8] = {
- 1073749793, 100, 7994, 0, 1073749825, 100, 8026, 0}; // NOLINT
-int CanonicalizationRange::Convert(uchar c, uchar n, uchar* result,
- bool* allow_caching_ptr) {
- int chunk_index = c >> 13;
- switch (chunk_index) {
- case 0:
- return LookupMapping<false>(
- kCanonicalizationRangeTable0, kCanonicalizationRangeTable0Size,
- kCanonicalizationRangeMultiStrings0, c, n, result, allow_caching_ptr);
- case 1:
- return LookupMapping<false>(
- kCanonicalizationRangeTable1, kCanonicalizationRangeTable1Size,
- kCanonicalizationRangeMultiStrings1, c, n, result, allow_caching_ptr);
- case 7:
- return LookupMapping<false>(
- kCanonicalizationRangeTable7, kCanonicalizationRangeTable7Size,
- kCanonicalizationRangeMultiStrings7, c, n, result, allow_caching_ptr);
- default:
- return 0;
- }
-}
-
-#endif // !V8_INTL_SUPPORT
-
-} // namespace unibrow
-} // namespace v8
diff --git a/js/src/new-regexp/util/vector.h b/js/src/new-regexp/util/vector.h
deleted file mode 100644
index 435318ce7..000000000
--- a/js/src/new-regexp/util/vector.h
+++ /dev/null
@@ -1,204 +0,0 @@
-// Copyright 2014 the V8 project authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#ifndef V8_UTIL_VECTOR_H_
-#define V8_UTIL_VECTOR_H_
-
-#include <algorithm>
-#include <cstring>
-#include <iterator>
-#include <memory>
-
-#include "js/Utility.h"
-
-namespace v8 {
-namespace internal {
-
-//////////////////////////////////////////////////
-
-// Adapted from: https://github.com/v8/v8/blob/5f69bbc233c2d1baf149faf869a7901603929914/src/utils/allocation.h#L36-L58
-
-template <typename T>
-T* NewArray(size_t size) {
- static_assert(std::is_pod<T>::value, "");
- js::AutoEnterOOMUnsafeRegion oomUnsafe;
- T* result = static_cast<T*>(js_malloc(size * sizeof(T)));
- if (!result) {
- oomUnsafe.crash("Irregexp NewArray");
- }
- return result;
-}
-
-template <typename T>
-void DeleteArray(T* array) {
- js_free(array);
-}
-
-//////////////////////////////////////////////////
-
-// A non-resizable vector containing a pointer and a length.
-// The Vector may or may not own the pointer, depending on context.
-// Origin:
-// https://github.com/v8/v8/blob/5f69bbc233c2d1baf149faf869a7901603929914/src/utils/vector.h#L20-L134
-
-template <typename T>
-class Vector {
- public:
- Vector() : start_(nullptr), length_(0) {}
-
- Vector(T* data, size_t length) : start_(data), length_(length) {
- MOZ_ASSERT_IF(length != 0, data != nullptr);
- }
-
- static Vector<T> New(size_t length) {
- return Vector<T>(NewArray<T>(length), length);
- }
-
- // Returns a vector using the same backing storage as this one,
- // spanning from and including 'from', to but not including 'to'.
- Vector<T> SubVector(size_t from, size_t to) const {
- MOZ_ASSERT(from < to);
- MOZ_ASSERT(to < length_);
- return Vector<T>(begin() + from, to - from);
- }
-
- // Returns the length of the vector. Only use this if you really need an
- // integer return value. Use {size()} otherwise.
- int length() const {
- MOZ_ASSERT(length_ <= std::numeric_limits<int>::max());
- return static_cast<int>(length_);
- }
-
- // Returns the length of the vector as a size_t.
- constexpr size_t size() const { return length_; }
-
- // Returns whether or not the vector is empty.
- constexpr bool empty() const { return length_ == 0; }
-
- // Access individual vector elements - checks bounds in debug mode.
- T& operator[](size_t index) const {
- MOZ_ASSERT(index < length_);
- return start_[index];
- }
-
- const T& at(size_t index) const { return operator[](index); }
-
- T& first() { return start_[0]; }
-
- T& last() {
- MOZ_ASSERT(length_ > 0);
- return start_[length_ - 1];
- }
-
- // Returns a pointer to the start of the data in the vector.
- constexpr T* begin() const { return start_; }
-
- // Returns a pointer past the end of the data in the vector.
- constexpr T* end() const { return start_ + length_; }
-
- // Returns a clone of this vector with a new backing store.
- Vector<T> Clone() const {
- T* result = NewArray<T>(length_);
- for (size_t i = 0; i < length_; i++) result[i] = start_[i];
- return Vector<T>(result, length_);
- }
-
- void Truncate(size_t length) {
- MOZ_ASSERT(length <= length_);
- length_ = length;
- }
-
- // Releases the array underlying this vector. Once disposed the
- // vector is empty.
- void Dispose() {
- DeleteArray(start_);
- start_ = nullptr;
- length_ = 0;
- }
-
- Vector<T> operator+(size_t offset) {
- MOZ_ASSERT(offset <= length_);
- return Vector<T>(start_ + offset, length_ - offset);
- }
-
- Vector<T> operator+=(size_t offset) {
- MOZ_ASSERT(offset <= length_);
- start_ += offset;
- length_ -= offset;
- return *this;
- }
-
- // Implicit conversion from Vector<T> to Vector<const T>.
- inline operator Vector<const T>() const {
- return Vector<const T>::cast(*this);
- }
-
- template <typename S>
- static constexpr Vector<T> cast(Vector<S> input) {
- return Vector<T>(reinterpret_cast<T*>(input.begin()),
- input.length() * sizeof(S) / sizeof(T));
- }
-
- bool operator==(const Vector<const T> other) const {
- if (length_ != other.length_) return false;
- if (start_ == other.start_) return true;
- for (size_t i = 0; i < length_; ++i) {
- if (start_[i] != other.start_[i]) {
- return false;
- }
- }
- return true;
- }
-
- private:
- T* start_;
- size_t length_;
-};
-
-// The resulting vector does not contain a null-termination byte. If you want
-// the null byte, use ArrayVector("foo").
-inline Vector<const char> CStrVector(const char* data) {
- return Vector<const char>(data, strlen(data));
-}
-
-} // namespace internal
-
-namespace base {
-
-// SmallVector uses inline storage first, and reallocates when full.
-// It is basically equivalent to js::Vector, and is implemented
-// as a thin wrapper.
-// V8's implementation: https://github.com/v8/v8/blob/master/src/base/small-vector.h
-template <typename T, size_t kSize>
-class SmallVector {
-public:
- inline bool empty() const { return inner_.empty(); }
- inline const T& back() const { return inner_.back(); }
- inline void pop_back() { inner_.popBack(); };
- template <typename... Args>
- inline void emplace_back(Args&&... args) {
- js::AutoEnterOOMUnsafeRegion oomUnsafe;
- if (!inner_.emplaceBack(args...)) {
- oomUnsafe.crash("Irregexp SmallVector emplace_back");
- }
- };
- inline size_t size() const { return inner_.length(); }
- inline const T& at(size_t index) const { return inner_[index]; }
-
- void resize_no_init(size_t new_size) {
- js::AutoEnterOOMUnsafeRegion oomUnsafe;
- if (!inner_.resizeUninitialized(new_size)) {
- oomUnsafe.crash("Irregexp SmallVector resize");
- }
- }
-private:
- js::Vector<T, kSize, js::SystemAllocPolicy> inner_;
-};
-
-
-} // namespace base
-
-} // namespace v8
-
-#endif // V8_UTIL_VECTOR_H_
diff --git a/js/src/new-regexp/util/zone.h b/js/src/new-regexp/util/zone.h
deleted file mode 100644
index 7183f77b7..000000000
--- a/js/src/new-regexp/util/zone.h
+++ /dev/null
@@ -1,375 +0,0 @@
-// Copyright 2019 the V8 project authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#ifndef V8_UTIL_ZONE_H_
-#define V8_UTIL_ZONE_H_
-
-#include <list>
-#include <map>
-#include <set>
-#include <unordered_map>
-#include <vector>
-
-#include "ds/LifoAlloc.h"
-#include "ds/Sort.h"
-#include "new-regexp/util/vector.h"
-
-namespace v8 {
-namespace internal {
-
-// V8::Zone ~= LifoAlloc
-class Zone {
- public:
- Zone(size_t defaultChunkSize) : lifoAlloc_(defaultChunkSize) {
- lifoAlloc_.setAsInfallibleByDefault();
- }
-
- void* New(size_t size) {
- js::LifoAlloc::AutoFallibleScope fallible(&lifoAlloc_);
- js::AutoEnterOOMUnsafeRegion oomUnsafe;
- void* result = lifoAlloc_.alloc(size);
- if (!result) {
- oomUnsafe.crash("Irregexp Zone::new");
- }
- return result;
- }
-
- void DeleteAll() { lifoAlloc_.freeAll(); }
-
- // Returns true if the total memory allocated exceeds a threshold.
- static const size_t kExcessLimit = 256 * 1024 * 1024;
- bool excess_allocation() const {
- return lifoAlloc_.computedSizeOfExcludingThis() > kExcessLimit;
- }
-private:
- js::LifoAlloc lifoAlloc_;
-};
-
-// Superclass for classes allocated in a Zone.
-// Origin: https://github.com/v8/v8/blob/7b3332844212d78ee87a9426f3a6f7f781a8fbfa/src/zone/zone.h#L138-L155
-class ZoneObject {
- public:
- // Allocate a new ZoneObject of 'size' bytes in the Zone.
- void* operator new(size_t size, Zone* zone) { return zone->New(size); }
-
- // Ideally, the delete operator should be private instead of
- // public, but unfortunately the compiler sometimes synthesizes
- // (unused) destructors for classes derived from ZoneObject, which
- // require the operator to be visible. MSVC requires the delete
- // operator to be public.
-
- // ZoneObjects should never be deleted individually; use
- // Zone::DeleteAll() to delete all zone objects in one go.
- void operator delete(void*, size_t) { MOZ_CRASH("unreachable"); }
- void operator delete(void* pointer, Zone* zone) { MOZ_CRASH("unreachable"); }
-};
-
-// ZoneLists are growable lists with constant-time access to the
-// elements. The list itself and all its elements are allocated in the
-// Zone. ZoneLists cannot be deleted individually; you can delete all
-// objects in the Zone by calling Zone::DeleteAll().
-// Used throughout irregexp.
-// Origin: https://github.com/v8/v8/blob/5e514a969376dc63517d575b062758efd36cd757/src/zone/zone.h#L173-L318
-// Inlines: https://github.com/v8/v8/blob/5e514a969376dc63517d575b062758efd36cd757/src/zone/zone-list-inl.h#L17-L155
-template <typename T>
-class ZoneList final {
- public:
- // Construct a new ZoneList with the given capacity; the length is
- // always zero. The capacity must be non-negative.
- ZoneList(int capacity, Zone* zone) { Initialize(capacity, zone); }
- // Construct a new ZoneList from a std::initializer_list
- ZoneList(std::initializer_list<T> list, Zone* zone) {
- Initialize(static_cast<int>(list.size()), zone);
- for (auto& i : list) Add(i, zone);
- }
- // Construct a new ZoneList by copying the elements of the given ZoneList.
- ZoneList(const ZoneList<T>& other, Zone* zone) {
- Initialize(other.length(), zone);
- AddAll(other, zone);
- }
-
- void* operator new(size_t size, Zone* zone) { return zone->New(size); }
-
- // Returns a reference to the element at index i. This reference is not safe
- // to use after operations that can change the list's backing store
- // (e.g. Add).
- inline T& operator[](int i) const {
- MOZ_ASSERT(0 < i);
- MOZ_ASSERT(static_cast<unsigned>(i) < static_cast<unsigned>(length_));
- return data_[i];
- }
- inline T& at(int i) const { return operator[](i); }
- inline T& last() const { return at(length_ - 1); }
- inline T& first() const { return at(0); }
-
- using iterator = T*;
- inline iterator begin() const { return &data_[0]; }
- inline iterator end() const { return &data_[length_]; }
-
- inline bool is_empty() const { return length_ == 0; }
- inline int length() const { return length_; }
- inline int capacity() const { return capacity_; }
-
- Vector<T> ToVector() const { return Vector<T>(data_, length_); }
- Vector<T> ToVector(int start, int length) const {
- return Vector<T>(data_ + start, std::min(length_ - start, length));
- }
-
- Vector<const T> ToConstVector() const {
- return Vector<const T>(data_, length_);
- }
-
- inline void Initialize(int capacity, Zone* zone) {
- MOZ_ASSERT(capacity >= 0);
- data_ = (capacity > 0) ? NewData(capacity, zone) : nullptr;
- capacity_ = capacity;
- length_ = 0;
- }
-
- // Adds a copy of the given 'element' to the end of the list,
- // expanding the list if necessary.
- void Add(const T& element, Zone* zone) {
- if (length_ < capacity_) {
- data_[length_++] = element;
- } else {
- ZoneList<T>::ResizeAdd(element, zone);
- }
- }
- // Add all the elements from the argument list to this list.
- void AddAll(const ZoneList<T>& other, Zone* zone) {
- AddAll(other.ToVector(), zone);
- }
- // Add all the elements from the vector to this list.
- void AddAll(const Vector<T>& other, Zone* zone) {
- int result_length = length_ + other.length();
- if (capacity_ < result_length) {
- Resize(result_length, zone);
- }
- if (std::is_fundamental<T>()) {
- memcpy(data_ + length_, other.begin(), sizeof(*data_) * other.length());
- } else {
- for (int i = 0; i < other.length(); i++) {
- data_[length_ + i] = other.at(i);
- }
- }
- length_ = result_length;
- }
-
- // Overwrites the element at the specific index.
- void Set(int index, const T& element) {
- MOZ_ASSERT(index >= 0 && index <= length_);
- data_[index] = element;
- }
-
- // Removes the i'th element without deleting it even if T is a
- // pointer type; moves all elements above i "down". Returns the
- // removed element. This function's complexity is linear in the
- // size of the list.
- T Remove(int i) {
- T element = at(i);
- length_--;
- while (i < length_) {
- data_[i] = data_[i + 1];
- i++;
- }
- return element;
- }
-
- // Removes the last element without deleting it even if T is a
- // pointer type. Returns the removed element.
- inline T RemoveLast() { return Remove(length_ - 1); }
-
- // Clears the list by freeing the storage memory. If you want to keep the
- // memory, use Rewind(0) instead. Be aware, that even if T is a
- // pointer type, clearing the list doesn't delete the entries.
- inline void Clear() {
- data_ = nullptr;
- capacity_ = 0;
- length_ = 0;
- }
-
- // Drops all but the first 'pos' elements from the list.
- inline void Rewind(int pos) {
- MOZ_ASSERT(0 <= pos && pos <= length_);
- length_ = pos;
- }
-
- inline bool Contains(const T& elm) const {
- for (int i = 0; i < length_; i++) {
- if (data_[i] == elm) return true;
- }
- return false;
- }
-
- template <typename CompareFunction>
- void StableSort(CompareFunction cmp, size_t start, size_t length) {
- js::AutoEnterOOMUnsafeRegion oomUnsafe;
- T* scratch = static_cast<T*>(js_malloc(length * sizeof(T)));
- if (!scratch) {
- oomUnsafe.crash("Irregexp stable sort scratch space");
- }
- auto comparator = [cmp](const T& a, const T& b, bool* lessOrEqual) {
- *lessOrEqual = cmp(&a, &b) <= 0;
- return true;
- };
- MOZ_ALWAYS_TRUE(js::MergeSort(begin() + start, length, scratch,
- comparator));
- js_free(scratch);
- }
-
- void operator delete(void* pointer) { MOZ_CRASH("unreachable"); }
- void operator delete(void* pointer, Zone* zone) { MOZ_CRASH("unreachable"); }
-
- private:
- T* data_;
- int capacity_;
- int length_;
-
- inline T* NewData(int n, Zone* zone) {
- return static_cast<T*>(zone->New(n * sizeof(T)));
- }
-
- // Increase the capacity of a full list, and add an element.
- // List must be full already.
- void ResizeAdd(const T& element, Zone* zone) {
- MOZ_ASSERT(length_ >= capacity_);
- // Grow the list capacity by 100%, but make sure to let it grow
- // even when the capacity is zero (possible initial case).
- int new_capacity = 1 + 2 * capacity_;
- // Since the element reference could be an element of the list, copy
- // it out of the old backing storage before resizing.
- T temp = element;
- Resize(new_capacity, zone);
- data_[length_++] = temp;
- }
-
- // Resize the list.
- void Resize(int new_capacity, Zone* zone) {
- MOZ_ASSERT(length_ <= new_capacity);
- T* new_data = NewData(new_capacity, zone);
- if (length_ > 0) {
- memcpy(new_data, data_, length_ * sizeof(T));
- }
- data_ = new_data;
- capacity_ = new_capacity;
- }
-
- ZoneList& operator=(const ZoneList&) = delete;
- ZoneList() = delete;
- ZoneList(const ZoneList&) = delete;
-};
-
-// Origin: https://github.com/v8/v8/blob/5e514a969376dc63517d575b062758efd36cd757/src/zone/zone-allocator.h#L14-L77
-template <typename T>
-class ZoneAllocator {
-public:
- using pointer = T*;
- using const_pointer = const T*;
- using reference = T&;
- using const_reference = const T&;
- using value_type = T;
- using size_type = size_t;
- using difference_type = ptrdiff_t;
- template <class O>
- struct rebind {
- using other = ZoneAllocator<O>;
- };
-
- explicit ZoneAllocator(Zone* zone) : zone_(zone) {}
- template <typename U>
- ZoneAllocator(const ZoneAllocator<U>& other)
- : ZoneAllocator<T>(other.zone_) {}
- template <typename U>
- friend class ZoneAllocator;
-
- T* allocate(size_t n) { return static_cast<T*>(zone_->New(n * sizeof(T))); }
- void deallocate(T* p, size_t) {} // noop for zones
-
- bool operator==(ZoneAllocator const& other) const {
- return zone_ == other.zone_;
- }
- bool operator!=(ZoneAllocator const& other) const {
- return zone_ != other.zone_;
- }
-
-private:
- Zone* zone_;
-};
-
-// Zone wrappers for std containers:
-// Origin: https://github.com/v8/v8/blob/5e514a969376dc63517d575b062758efd36cd757/src/zone/zone-containers.h#L25-L169
-
-// A wrapper subclass for std::vector to make it easy to construct one
-// that uses a zone allocator.
-// Used throughout irregexp
-template <typename T>
-class ZoneVector : public std::vector<T, ZoneAllocator<T>> {
-public:
- ZoneVector(Zone* zone)
- : std::vector<T, ZoneAllocator<T>>(ZoneAllocator<T>(zone)) {}
-
- // Constructs a new vector and fills it with the contents of the range
- // [first, last).
- template <class Iter>
- ZoneVector(Iter first, Iter last, Zone* zone)
- : std::vector<T, ZoneAllocator<T>>(first, last, ZoneAllocator<T>(zone)) {}
-};
-
-// A wrapper subclass for std::list to make it easy to construct one
-// that uses a zone allocator.
-// Used in regexp-bytecode-peephole.cc
-template <typename T>
-class ZoneLinkedList : public std::list<T, ZoneAllocator<T>> {
- public:
- // Constructs an empty list.
- explicit ZoneLinkedList(Zone* zone)
- : std::list<T, ZoneAllocator<T>>(ZoneAllocator<T>(zone)) {}
-};
-
-// A wrapper subclass for std::set to make it easy to construct one that uses
-// a zone allocator.
-// Used in regexp-parser.cc
-template <typename K, typename Compare = std::less<K>>
-class ZoneSet : public std::set<K, Compare, ZoneAllocator<K>> {
- public:
- // Constructs an empty set.
- explicit ZoneSet(Zone* zone)
- : std::set<K, Compare, ZoneAllocator<K>>(Compare(),
- ZoneAllocator<K>(zone)) {}
-};
-
-// A wrapper subclass for std::map to make it easy to construct one that uses
-// a zone allocator.
-// Used in regexp-bytecode-peephole.cc
-template <typename K, typename V, typename Compare = std::less<K>>
-class ZoneMap
- : public std::map<K, V, Compare, ZoneAllocator<std::pair<const K, V>>> {
- public:
- // Constructs an empty map.
- explicit ZoneMap(Zone* zone)
- : std::map<K, V, Compare, ZoneAllocator<std::pair<const K, V>>>(
- Compare(), ZoneAllocator<std::pair<const K, V>>(zone)) {}
-};
-
-// A wrapper subclass for std::unordered_map to make it easy to construct one
-// that uses a zone allocator.
-// Used in regexp-bytecode-peephole.cc
-template <typename K, typename V, typename Hash = std::hash<K>,
- typename KeyEqual = std::equal_to<K>>
-class ZoneUnorderedMap
- : public std::unordered_map<K, V, Hash, KeyEqual,
- ZoneAllocator<std::pair<const K, V>>> {
- public:
- // Constructs an empty map.
- explicit ZoneUnorderedMap(Zone* zone, size_t bucket_count = 100)
- : std::unordered_map<K, V, Hash, KeyEqual,
- ZoneAllocator<std::pair<const K, V>>>(
- bucket_count, Hash(), KeyEqual(),
- ZoneAllocator<std::pair<const K, V>>(zone)) {}
-};
-
-} // namespace internal
-} // namespace v8
-
-#endif // V8_UTIL_FLAG_H_