1 files changed, 0 insertions, 1589 deletions
diff --git a/js/src/new-regexp/regexp-compiler-tonode.cc b/js/src/new-regexp/regexp-compiler-tonode.cc
deleted file mode 100644
index 7de167eef..000000000
--- a/js/src/new-regexp/regexp-compiler-tonode.cc
+++ /dev/null
@@ -1,1589 +0,0 @@
-// Copyright 2019 the V8 project authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#include "new-regexp/regexp-compiler.h"
-
-#include "new-regexp/regexp.h"
-#ifdef V8_INTL_SUPPORT
-#include "new-regexp/special-case.h"
-#endif  // V8_INTL_SUPPORT
-
-#ifdef V8_INTL_SUPPORT
-#include "unicode/locid.h"
-#include "unicode/uniset.h"
-#include "unicode/utypes.h"
-#endif  // V8_INTL_SUPPORT
-
-namespace v8 {
-namespace internal {
-
-using namespace regexp_compiler_constants;  // NOLINT(build/namespaces)
-
-// -------------------------------------------------------------------
-// Tree to graph conversion
-
-RegExpNode* RegExpAtom::ToNode(RegExpCompiler* compiler,
-                               RegExpNode* on_success) {
-  ZoneList<TextElement>* elms =
-      new (compiler->zone()) ZoneList<TextElement>(1, compiler->zone());
-  elms->Add(TextElement::Atom(this), compiler->zone());
-  return new (compiler->zone())
-      TextNode(elms, compiler->read_backward(), on_success);
-}
-
-RegExpNode* RegExpText::ToNode(RegExpCompiler* compiler,
-                               RegExpNode* on_success) {
-  return new (compiler->zone())
-      TextNode(elements(), compiler->read_backward(), on_success);
-}
-
-static bool CompareInverseRanges(ZoneList<CharacterRange>* ranges,
-                                 const int* special_class, int length) {
-  length--;  // Remove final marker.
-  DCHECK_EQ(kRangeEndMarker, special_class[length]);
-  DCHECK_NE(0, ranges->length());
-  DCHECK_NE(0, length);
-  DCHECK_NE(0, special_class[0]);
-  if (ranges->length() != (length >> 1) + 1) {
-    return false;
-  }
-  CharacterRange range = ranges->at(0);
-  if (range.from() != 0) {
-    return false;
-  }
-  for (int i = 0; i < length; i += 2) {
-    if (special_class[i] != (range.to() + 1)) {
-      return false;
-    }
-    range = ranges->at((i >> 1) + 1);
-    if (special_class[i + 1] != range.from()) {
-      return false;
-    }
-  }
-  if (range.to() != String::kMaxCodePoint) {
-    return false;
-  }
-  return true;
-}
-
-static bool CompareRanges(ZoneList<CharacterRange>* ranges,
-                          const int* special_class, int length) {
-  length--;  // Remove final marker.
-  DCHECK_EQ(kRangeEndMarker, special_class[length]);
-  if (ranges->length() * 2 != length) {
-    return false;
-  }
-  for (int i = 0; i < length; i += 2) {
-    CharacterRange range = ranges->at(i >> 1);
-    if (range.from() != special_class[i] ||
-        range.to() != special_class[i + 1] - 1) {
-      return false;
-    }
-  }
-  return true;
-}
-
-bool RegExpCharacterClass::is_standard(Zone* zone) {
-  // TODO(lrn): Remove need for this function, by not throwing away information
-  // along the way.
-  if (is_negated()) {
-    return false;
-  }
-  if (set_.is_standard()) {
-    return true;
-  }
-  if (CompareRanges(set_.ranges(zone), kSpaceRanges, kSpaceRangeCount)) {
-    set_.set_standard_set_type('s');
-    return true;
-  }
-  if (CompareInverseRanges(set_.ranges(zone), kSpaceRanges, kSpaceRangeCount)) {
-    set_.set_standard_set_type('S');
-    return true;
-  }
-  if (CompareInverseRanges(set_.ranges(zone), kLineTerminatorRanges,
-                           kLineTerminatorRangeCount)) {
-    set_.set_standard_set_type('.');
-    return true;
-  }
-  if (CompareRanges(set_.ranges(zone), kLineTerminatorRanges,
-                    kLineTerminatorRangeCount)) {
-    set_.set_standard_set_type('n');
-    return true;
-  }
-  if (CompareRanges(set_.ranges(zone), kWordRanges, kWordRangeCount)) {
-    set_.set_standard_set_type('w');
-    return true;
-  }
-  if (CompareInverseRanges(set_.ranges(zone), kWordRanges, kWordRangeCount)) {
-    set_.set_standard_set_type('W');
-    return true;
-  }
-  return false;
-}
-
-UnicodeRangeSplitter::UnicodeRangeSplitter(ZoneList<CharacterRange>* base) {
-  // The unicode range splitter categorizes given character ranges into:
-  // - Code points from the BMP representable by one code unit.
-  // - Code points outside the BMP that need to be split into surrogate pairs.
-  // - Lone lead surrogates.
-  // - Lone trail surrogates.
-  // Lone surrogates are valid code points, even though no actual characters.
-  // They require special matching to make sure we do not split surrogate pairs.
-
-  for (int i = 0; i < base->length(); i++) AddRange(base->at(i));
-}
-
-void UnicodeRangeSplitter::AddRange(CharacterRange range) {
-  static constexpr uc32 kBmp1Start = 0;
-  static constexpr uc32 kBmp1End = kLeadSurrogateStart - 1;
-  static constexpr uc32 kBmp2Start = kTrailSurrogateEnd + 1;
-  static constexpr uc32 kBmp2End = kNonBmpStart - 1;
-
-  // Ends are all inclusive.
-  STATIC_ASSERT(kBmp1Start == 0);
-  STATIC_ASSERT(kBmp1Start < kBmp1End);
-  STATIC_ASSERT(kBmp1End + 1 == kLeadSurrogateStart);
-  STATIC_ASSERT(kLeadSurrogateStart < kLeadSurrogateEnd);
-  STATIC_ASSERT(kLeadSurrogateEnd + 1 == kTrailSurrogateStart);
-  STATIC_ASSERT(kTrailSurrogateStart < kTrailSurrogateEnd);
-  STATIC_ASSERT(kTrailSurrogateEnd + 1 == kBmp2Start);
-  STATIC_ASSERT(kBmp2Start < kBmp2End);
-  STATIC_ASSERT(kBmp2End + 1 == kNonBmpStart);
-  STATIC_ASSERT(kNonBmpStart < kNonBmpEnd);
-
-  static constexpr uc32 kStarts[] = {
-      kBmp1Start, kLeadSurrogateStart, kTrailSurrogateStart,
-      kBmp2Start, kNonBmpStart,
-  };
-
-  static constexpr uc32 kEnds[] = {
-      kBmp1End, kLeadSurrogateEnd, kTrailSurrogateEnd, kBmp2End, kNonBmpEnd,
-  };
-
-  CharacterRangeVector* const kTargets[] = {
-      &bmp_, &lead_surrogates_, &trail_surrogates_, &bmp_, &non_bmp_,
-  };
-
-  static constexpr int kCount = arraysize(kStarts);
-  STATIC_ASSERT(kCount == arraysize(kEnds));
-  STATIC_ASSERT(kCount == arraysize(kTargets));
-
-  for (int i = 0; i < kCount; i++) {
-    if (kStarts[i] > range.to()) break;
-    const uc32 from = std::max(kStarts[i], range.from());
-    const uc32 to = std::min(kEnds[i], range.to());
-    if (from > to) continue;
-    kTargets[i]->emplace_back(CharacterRange::Range(from, to));
-  }
-}
-
-namespace {
-
-// Translates between new and old V8-isms (SmallVector, ZoneList).
-ZoneList<CharacterRange>* ToCanonicalZoneList(
-    const UnicodeRangeSplitter::CharacterRangeVector* v, Zone* zone) {
-  if (v->empty()) return nullptr;
-
-  ZoneList<CharacterRange>* result =
-      new (zone) ZoneList<CharacterRange>(static_cast<int>(v->size()), zone);
-  for (size_t i = 0; i < v->size(); i++) {
-    result->Add(v->at(i), zone);
-  }
-
-  CharacterRange::Canonicalize(result);
-  return result;
-}
-
-void AddBmpCharacters(RegExpCompiler* compiler, ChoiceNode* result,
-                      RegExpNode* on_success, UnicodeRangeSplitter* splitter) {
-  ZoneList<CharacterRange>* bmp =
-      ToCanonicalZoneList(splitter->bmp(), compiler->zone());
-  if (bmp == nullptr) return;
-  JSRegExp::Flags default_flags = JSRegExp::Flags();
-  result->AddAlternative(GuardedAlternative(TextNode::CreateForCharacterRanges(
-      compiler->zone(), bmp, compiler->read_backward(), on_success,
-      default_flags)));
-}
-
-void AddNonBmpSurrogatePairs(RegExpCompiler* compiler, ChoiceNode* result,
-                             RegExpNode* on_success,
-                             UnicodeRangeSplitter* splitter) {
-  ZoneList<CharacterRange>* non_bmp =
-      ToCanonicalZoneList(splitter->non_bmp(), compiler->zone());
-  if (non_bmp == nullptr) return;
-  DCHECK(!compiler->one_byte());
-  Zone* zone = compiler->zone();
-  JSRegExp::Flags default_flags = JSRegExp::Flags();
-  CharacterRange::Canonicalize(non_bmp);
-  for (int i = 0; i < non_bmp->length(); i++) {
-    // Match surrogate pair.
-    // E.g. [\u10005-\u11005] becomes
-    //      \ud800[\udc05-\udfff]|
-    //      [\ud801-\ud803][\udc00-\udfff]|
-    //      \ud804[\udc00-\udc05]
-    uc32 from = non_bmp->at(i).from();
-    uc32 to = non_bmp->at(i).to();
-    uc16 from_l = unibrow::Utf16::LeadSurrogate(from);
-    uc16 from_t = unibrow::Utf16::TrailSurrogate(from);
-    uc16 to_l = unibrow::Utf16::LeadSurrogate(to);
-    uc16 to_t = unibrow::Utf16::TrailSurrogate(to);
-    if (from_l == to_l) {
-      // The lead surrogate is the same.
-      result->AddAlternative(
-          GuardedAlternative(TextNode::CreateForSurrogatePair(
-              zone, CharacterRange::Singleton(from_l),
-              CharacterRange::Range(from_t, to_t), compiler->read_backward(),
-              on_success, default_flags)));
-    } else {
-      if (from_t != kTrailSurrogateStart) {
-        // Add [from_l][from_t-\udfff]
-        result->AddAlternative(
-            GuardedAlternative(TextNode::CreateForSurrogatePair(
-                zone, CharacterRange::Singleton(from_l),
-                CharacterRange::Range(from_t, kTrailSurrogateEnd),
-                compiler->read_backward(), on_success, default_flags)));
-        from_l++;
-      }
-      if (to_t != kTrailSurrogateEnd) {
-        // Add [to_l][\udc00-to_t]
-        result->AddAlternative(
-            GuardedAlternative(TextNode::CreateForSurrogatePair(
-                zone, CharacterRange::Singleton(to_l),
-                CharacterRange::Range(kTrailSurrogateStart, to_t),
-                compiler->read_backward(), on_success, default_flags)));
-        to_l--;
-      }
-      if (from_l <= to_l) {
-        // Add [from_l-to_l][\udc00-\udfff]
-        result->AddAlternative(
-            GuardedAlternative(TextNode::CreateForSurrogatePair(
-                zone, CharacterRange::Range(from_l, to_l),
-                CharacterRange::Range(kTrailSurrogateStart, kTrailSurrogateEnd),
-                compiler->read_backward(), on_success, default_flags)));
-      }
-    }
-  }
-}
-
-RegExpNode* NegativeLookaroundAgainstReadDirectionAndMatch(
-    RegExpCompiler* compiler, ZoneList<CharacterRange>* lookbehind,
-    ZoneList<CharacterRange>* match, RegExpNode* on_success, bool read_backward,
-    JSRegExp::Flags flags) {
-  Zone* zone = compiler->zone();
-  RegExpNode* match_node = TextNode::CreateForCharacterRanges(
-      zone, match, read_backward, on_success, flags);
-  int stack_register = compiler->UnicodeLookaroundStackRegister();
-  int position_register = compiler->UnicodeLookaroundPositionRegister();
-  RegExpLookaround::Builder lookaround(false, match_node, stack_register,
-                                       position_register);
-  RegExpNode* negative_match = TextNode::CreateForCharacterRanges(
-      zone, lookbehind, !read_backward, lookaround.on_match_success(), flags);
-  return lookaround.ForMatch(negative_match);
-}
-
-RegExpNode* MatchAndNegativeLookaroundInReadDirection(
-    RegExpCompiler* compiler, ZoneList<CharacterRange>* match,
-    ZoneList<CharacterRange>* lookahead, RegExpNode* on_success,
-    bool read_backward, JSRegExp::Flags flags) {
-  Zone* zone = compiler->zone();
-  int stack_register = compiler->UnicodeLookaroundStackRegister();
-  int position_register = compiler->UnicodeLookaroundPositionRegister();
-  RegExpLookaround::Builder lookaround(false, on_success, stack_register,
-                                       position_register);
-  RegExpNode* negative_match = TextNode::CreateForCharacterRanges(
-      zone, lookahead, read_backward, lookaround.on_match_success(), flags);
-  return TextNode::CreateForCharacterRanges(
-      zone, match, read_backward, lookaround.ForMatch(negative_match), flags);
-}
-
-void AddLoneLeadSurrogates(RegExpCompiler* compiler, ChoiceNode* result,
-                           RegExpNode* on_success,
-                           UnicodeRangeSplitter* splitter) {
-  JSRegExp::Flags default_flags = JSRegExp::Flags();
-  ZoneList<CharacterRange>* lead_surrogates =
-      ToCanonicalZoneList(splitter->lead_surrogates(), compiler->zone());
-  if (lead_surrogates == nullptr) return;
-  Zone* zone = compiler->zone();
-  // E.g. \ud801 becomes \ud801(?![\udc00-\udfff]).
-  ZoneList<CharacterRange>* trail_surrogates = CharacterRange::List(
-      zone, CharacterRange::Range(kTrailSurrogateStart, kTrailSurrogateEnd));
-
-  RegExpNode* match;
-  if (compiler->read_backward()) {
-    // Reading backward. Assert that reading forward, there is no trail
-    // surrogate, and then backward match the lead surrogate.
-    match = NegativeLookaroundAgainstReadDirectionAndMatch(
-        compiler, trail_surrogates, lead_surrogates, on_success, true,
-        default_flags);
-  } else {
-    // Reading forward. Forward match the lead surrogate and assert that
-    // no trail surrogate follows.
-    match = MatchAndNegativeLookaroundInReadDirection(
-        compiler, lead_surrogates, trail_surrogates, on_success, false,
-        default_flags);
-  }
-  result->AddAlternative(GuardedAlternative(match));
-}
-
-void AddLoneTrailSurrogates(RegExpCompiler* compiler, ChoiceNode* result,
-                            RegExpNode* on_success,
-                            UnicodeRangeSplitter* splitter) {
-  JSRegExp::Flags default_flags = JSRegExp::Flags();
-  ZoneList<CharacterRange>* trail_surrogates =
-      ToCanonicalZoneList(splitter->trail_surrogates(), compiler->zone());
-  if (trail_surrogates == nullptr) return;
-  Zone* zone = compiler->zone();
-  // E.g. \udc01 becomes (?<![\ud800-\udbff])\udc01
-  ZoneList<CharacterRange>* lead_surrogates = CharacterRange::List(
-      zone, CharacterRange::Range(kLeadSurrogateStart, kLeadSurrogateEnd));
-
-  RegExpNode* match;
-  if (compiler->read_backward()) {
-    // Reading backward. Backward match the trail surrogate and assert that no
-    // lead surrogate precedes it.
-    match = MatchAndNegativeLookaroundInReadDirection(
-        compiler, trail_surrogates, lead_surrogates, on_success, true,
-        default_flags);
-  } else {
-    // Reading forward. Assert that reading backward, there is no lead
-    // surrogate, and then forward match the trail surrogate.
-    match = NegativeLookaroundAgainstReadDirectionAndMatch(
-        compiler, lead_surrogates, trail_surrogates, on_success, false,
-        default_flags);
-  }
-  result->AddAlternative(GuardedAlternative(match));
-}
-
-RegExpNode* UnanchoredAdvance(RegExpCompiler* compiler,
-                              RegExpNode* on_success) {
-  // This implements ES2015 21.2.5.2.3, AdvanceStringIndex.
-  DCHECK(!compiler->read_backward());
-  Zone* zone = compiler->zone();
-  // Advance any character. If the character happens to be a lead surrogate and
-  // we advanced into the middle of a surrogate pair, it will work out, as
-  // nothing will match from there. We will have to advance again, consuming
-  // the associated trail surrogate.
-  ZoneList<CharacterRange>* range = CharacterRange::List(
-      zone, CharacterRange::Range(0, String::kMaxUtf16CodeUnit));
-  JSRegExp::Flags default_flags = JSRegExp::Flags();
-  return TextNode::CreateForCharacterRanges(zone, range, false, on_success,
-                                            default_flags);
-}
-
-void AddUnicodeCaseEquivalents(ZoneList<CharacterRange>* ranges, Zone* zone) {
-#ifdef V8_INTL_SUPPORT
-  DCHECK(CharacterRange::IsCanonical(ranges));
-
-  // Micro-optimization to avoid passing large ranges to UnicodeSet::closeOver.
-  // See also https://crbug.com/v8/6727.
-  // TODO(jgruber): This only covers the special case of the {0,0x10FFFF} range,
-  // which we use frequently internally. But large ranges can also easily be
-  // created by the user. We might want to have a more general caching mechanism
-  // for such ranges.
-  if (ranges->length() == 1 && ranges->at(0).IsEverything(kNonBmpEnd)) return;
-
-  // Use ICU to compute the case fold closure over the ranges.
-  icu::UnicodeSet set;
-  for (int i = 0; i < ranges->length(); i++) {
-    set.add(ranges->at(i).from(), ranges->at(i).to());
-  }
-  ranges->Clear();
-  set.closeOver(USET_CASE_INSENSITIVE);
-  // Full case mapping map single characters to multiple characters.
-  // Those are represented as strings in the set. Remove them so that
-  // we end up with only simple and common case mappings.
-  set.removeAllStrings();
-  for (int i = 0; i < set.getRangeCount(); i++) {
-    ranges->Add(CharacterRange::Range(set.getRangeStart(i), set.getRangeEnd(i)),
-                zone);
-  }
-  // No errors and everything we collected have been ranges.
-  CharacterRange::Canonicalize(ranges);
-#endif  // V8_INTL_SUPPORT
-}
-
-}  // namespace
-
-RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler,
-                                         RegExpNode* on_success) {
-  set_.Canonicalize();
-  Zone* zone = compiler->zone();
-  ZoneList<CharacterRange>* ranges = this->ranges(zone);
-  if (NeedsUnicodeCaseEquivalents(flags_)) {
-    AddUnicodeCaseEquivalents(ranges, zone);
-  }
-  if (IsUnicode(flags_) && !compiler->one_byte() &&
-      !contains_split_surrogate()) {
-    if (is_negated()) {
-      ZoneList<CharacterRange>* negated =
-          new (zone) ZoneList<CharacterRange>(2, zone);
-      CharacterRange::Negate(ranges, negated, zone);
-      ranges = negated;
-    }
-    if (ranges->length() == 0) {
-      JSRegExp::Flags default_flags;
-      RegExpCharacterClass* fail =
-          new (zone) RegExpCharacterClass(zone, ranges, default_flags);
-      return new (zone) TextNode(fail, compiler->read_backward(), on_success);
-    }
-    if (standard_type() == '*') {
-      return UnanchoredAdvance(compiler, on_success);
-    } else {
-      ChoiceNode* result = new (zone) ChoiceNode(2, zone);
-      UnicodeRangeSplitter splitter(ranges);
-      AddBmpCharacters(compiler, result, on_success, &splitter);
-      AddNonBmpSurrogatePairs(compiler, result, on_success, &splitter);
-      AddLoneLeadSurrogates(compiler, result, on_success, &splitter);
-      AddLoneTrailSurrogates(compiler, result, on_success, &splitter);
-      return result;
-    }
-  } else {
-    return new (zone) TextNode(this, compiler->read_backward(), on_success);
-  }
-}
-
-int CompareFirstChar(RegExpTree* const* a, RegExpTree* const* b) {
-  RegExpAtom* atom1 = (*a)->AsAtom();
-  RegExpAtom* atom2 = (*b)->AsAtom();
-  uc16 character1 = atom1->data().at(0);
-  uc16 character2 = atom2->data().at(0);
-  if (character1 < character2) return -1;
-  if (character1 > character2) return 1;
-  return 0;
-}
-
-#ifdef V8_INTL_SUPPORT
-
-// Case Insensitve comparesion
-int CompareFirstCharCaseInsensitve(RegExpTree* const* a, RegExpTree* const* b) {
-  RegExpAtom* atom1 = (*a)->AsAtom();
-  RegExpAtom* atom2 = (*b)->AsAtom();
-  icu::UnicodeString character1(atom1->data().at(0));
-  return character1.caseCompare(atom2->data().at(0), U_FOLD_CASE_DEFAULT);
-}
-
-#else
-
-static unibrow::uchar Canonical(
-    unibrow::Mapping<unibrow::Ecma262Canonicalize>* canonicalize,
-    unibrow::uchar c) {
-  unibrow::uchar chars[unibrow::Ecma262Canonicalize::kMaxWidth];
-  int length = canonicalize->get(c, '\0', chars);
-  DCHECK_LE(length, 1);
-  unibrow::uchar canonical = c;
-  if (length == 1) canonical = chars[0];
-  return canonical;
-}
-
-int CompareFirstCharCaseIndependent(
-    unibrow::Mapping<unibrow::Ecma262Canonicalize>* canonicalize,
-    RegExpTree* const* a, RegExpTree* const* b) {
-  RegExpAtom* atom1 = (*a)->AsAtom();
-  RegExpAtom* atom2 = (*b)->AsAtom();
-  unibrow::uchar character1 = atom1->data().at(0);
-  unibrow::uchar character2 = atom2->data().at(0);
-  if (character1 == character2) return 0;
-  if (character1 >= 'a' || character2 >= 'a') {
-    character1 = Canonical(canonicalize, character1);
-    character2 = Canonical(canonicalize, character2);
-  }
-  return static_cast<int>(character1) - static_cast<int>(character2);
-}
-#endif  // V8_INTL_SUPPORT
-
-// We can stable sort runs of atoms, since the order does not matter if they
-// start with different characters.
-// Returns true if any consecutive atoms were found.
-bool RegExpDisjunction::SortConsecutiveAtoms(RegExpCompiler* compiler) {
-  ZoneList<RegExpTree*>* alternatives = this->alternatives();
-  int length = alternatives->length();
-  bool found_consecutive_atoms = false;
-  for (int i = 0; i < length; i++) {
-    while (i < length) {
-      RegExpTree* alternative = alternatives->at(i);
-      if (alternative->IsAtom()) break;
-      i++;
-    }
-    // i is length or it is the index of an atom.
-    if (i == length) break;
-    int first_atom = i;
-    JSRegExp::Flags flags = alternatives->at(i)->AsAtom()->flags();
-    i++;
-    while (i < length) {
-      RegExpTree* alternative = alternatives->at(i);
-      if (!alternative->IsAtom()) break;
-      if (alternative->AsAtom()->flags() != flags) break;
-      i++;
-    }
-    // Sort atoms to get ones with common prefixes together.
-    // This step is more tricky if we are in a case-independent regexp,
-    // because it would change /is|I/ to /I|is/, and order matters when
-    // the regexp parts don't match only disjoint starting points. To fix
-    // this we have a version of CompareFirstChar that uses case-
-    // independent character classes for comparison.
-    DCHECK_LT(first_atom, alternatives->length());
-    DCHECK_LE(i, alternatives->length());
-    DCHECK_LE(first_atom, i);
-    if (IgnoreCase(flags)) {
-#ifdef V8_INTL_SUPPORT
-      alternatives->StableSort(CompareFirstCharCaseInsensitve, first_atom,
-                               i - first_atom);
-#else
-      unibrow::Mapping<unibrow::Ecma262Canonicalize>* canonicalize =
-          compiler->isolate()->regexp_macro_assembler_canonicalize();
-      auto compare_closure = [canonicalize](RegExpTree* const* a,
-                                            RegExpTree* const* b) {
-        return CompareFirstCharCaseIndependent(canonicalize, a, b);
-      };
-      alternatives->StableSort(compare_closure, first_atom, i - first_atom);
-#endif  // V8_INTL_SUPPORT
-    } else {
-      alternatives->StableSort(CompareFirstChar, first_atom, i - first_atom);
-    }
-    if (i - first_atom > 1) found_consecutive_atoms = true;
-  }
-  return found_consecutive_atoms;
-}
-
-// Optimizes ab|ac|az to a(?:b|c|d).
-void RegExpDisjunction::RationalizeConsecutiveAtoms(RegExpCompiler* compiler) {
-  Zone* zone = compiler->zone();
-  ZoneList<RegExpTree*>* alternatives = this->alternatives();
-  int length = alternatives->length();
-
-  int write_posn = 0;
-  int i = 0;
-  while (i < length) {
-    RegExpTree* alternative = alternatives->at(i);
-    if (!alternative->IsAtom()) {
-      alternatives->at(write_posn++) = alternatives->at(i);
-      i++;
-      continue;
-    }
-    RegExpAtom* const atom = alternative->AsAtom();
-    JSRegExp::Flags flags = atom->flags();
-#ifdef V8_INTL_SUPPORT
-    icu::UnicodeString common_prefix(atom->data().at(0));
-#else
-    unibrow::uchar common_prefix = atom->data().at(0);
-#endif  // V8_INTL_SUPPORT
-    int first_with_prefix = i;
-    int prefix_length = atom->length();
-    i++;
-    while (i < length) {
-      alternative = alternatives->at(i);
-      if (!alternative->IsAtom()) break;
-      RegExpAtom* const atom = alternative->AsAtom();
-      if (atom->flags() != flags) break;
-#ifdef V8_INTL_SUPPORT
-      icu::UnicodeString new_prefix(atom->data().at(0));
-      if (new_prefix != common_prefix) {
-        if (!IgnoreCase(flags)) break;
-        if (common_prefix.caseCompare(new_prefix, U_FOLD_CASE_DEFAULT) != 0)
-          break;
-      }
-#else
-      unibrow::uchar new_prefix = atom->data().at(0);
-      if (new_prefix != common_prefix) {
-        if (!IgnoreCase(flags)) break;
-        unibrow::Mapping<unibrow::Ecma262Canonicalize>* canonicalize =
-            compiler->isolate()->regexp_macro_assembler_canonicalize();
-        new_prefix = Canonical(canonicalize, new_prefix);
-        common_prefix = Canonical(canonicalize, common_prefix);
-        if (new_prefix != common_prefix) break;
-      }
-#endif  // V8_INTL_SUPPORT
-      prefix_length = Min(prefix_length, atom->length());
-      i++;
-    }
-    if (i > first_with_prefix + 2) {
-      // Found worthwhile run of alternatives with common prefix of at least one
-      // character.  The sorting function above did not sort on more than one
-      // character for reasons of correctness, but there may still be a longer
-      // common prefix if the terms were similar or presorted in the input.
-      // Find out how long the common prefix is.
-      int run_length = i - first_with_prefix;
-      RegExpAtom* const atom = alternatives->at(first_with_prefix)->AsAtom();
-      for (int j = 1; j < run_length && prefix_length > 1; j++) {
-        RegExpAtom* old_atom =
-            alternatives->at(j + first_with_prefix)->AsAtom();
-        for (int k = 1; k < prefix_length; k++) {
-          if (atom->data().at(k) != old_atom->data().at(k)) {
-            prefix_length = k;
-            break;
-          }
-        }
-      }
-      RegExpAtom* prefix = new (zone)
-          RegExpAtom(atom->data().SubVector(0, prefix_length), flags);
-      ZoneList<RegExpTree*>* pair = new (zone) ZoneList<RegExpTree*>(2, zone);
-      pair->Add(prefix, zone);
-      ZoneList<RegExpTree*>* suffixes =
-          new (zone) ZoneList<RegExpTree*>(run_length, zone);
-      for (int j = 0; j < run_length; j++) {
-        RegExpAtom* old_atom =
-            alternatives->at(j + first_with_prefix)->AsAtom();
-        int len = old_atom->length();
-        if (len == prefix_length) {
-          suffixes->Add(new (zone) RegExpEmpty(), zone);
-        } else {
-          RegExpTree* suffix = new (zone) RegExpAtom(
-              old_atom->data().SubVector(prefix_length, old_atom->length()),
-              flags);
-          suffixes->Add(suffix, zone);
-        }
-      }
-      pair->Add(new (zone) RegExpDisjunction(suffixes), zone);
-      alternatives->at(write_posn++) = new (zone) RegExpAlternative(pair);
-    } else {
-      // Just copy any non-worthwhile alternatives.
-      for (int j = first_with_prefix; j < i; j++) {
-        alternatives->at(write_posn++) = alternatives->at(j);
-      }
-    }
-  }
-  alternatives->Rewind(write_posn);  // Trim end of array.
-}
-
-// Optimizes b|c|z to [bcz].
-void RegExpDisjunction::FixSingleCharacterDisjunctions(
-    RegExpCompiler* compiler) {
-  Zone* zone = compiler->zone();
-  ZoneList<RegExpTree*>* alternatives = this->alternatives();
-  int length = alternatives->length();
-
-  int write_posn = 0;
-  int i = 0;
-  while (i < length) {
-    RegExpTree* alternative = alternatives->at(i);
-    if (!alternative->IsAtom()) {
-      alternatives->at(write_posn++) = alternatives->at(i);
-      i++;
-      continue;
-    }
-    RegExpAtom* const atom = alternative->AsAtom();
-    if (atom->length() != 1) {
-      alternatives->at(write_posn++) = alternatives->at(i);
-      i++;
-      continue;
-    }
-    JSRegExp::Flags flags = atom->flags();
-    DCHECK_IMPLIES(IsUnicode(flags),
-                   !unibrow::Utf16::IsLeadSurrogate(atom->data().at(0)));
-    bool contains_trail_surrogate =
-        unibrow::Utf16::IsTrailSurrogate(atom->data().at(0));
-    int first_in_run = i;
-    i++;
-    // Find a run of single-character atom alternatives that have identical
-    // flags (case independence and unicode-ness).
-    while (i < length) {
-      alternative = alternatives->at(i);
-      if (!alternative->IsAtom()) break;
-      RegExpAtom* const atom = alternative->AsAtom();
-      if (atom->length() != 1) break;
-      if (atom->flags() != flags) break;
-      DCHECK_IMPLIES(IsUnicode(flags),
-                     !unibrow::Utf16::IsLeadSurrogate(atom->data().at(0)));
-      contains_trail_surrogate |=
-          unibrow::Utf16::IsTrailSurrogate(atom->data().at(0));
-      i++;
-    }
-    if (i > first_in_run + 1) {
-      // Found non-trivial run of single-character alternatives.
-      int run_length = i - first_in_run;
-      ZoneList<CharacterRange>* ranges =
-          new (zone) ZoneList<CharacterRange>(2, zone);
-      for (int j = 0; j < run_length; j++) {
-        RegExpAtom* old_atom = alternatives->at(j + first_in_run)->AsAtom();
-        DCHECK_EQ(old_atom->length(), 1);
-        ranges->Add(CharacterRange::Singleton(old_atom->data().at(0)), zone);
-      }
-      RegExpCharacterClass::CharacterClassFlags character_class_flags;
-      if (IsUnicode(flags) && contains_trail_surrogate) {
-        character_class_flags = RegExpCharacterClass::CONTAINS_SPLIT_SURROGATE;
-      }
-      alternatives->at(write_posn++) = new (zone)
-          RegExpCharacterClass(zone, ranges, flags, character_class_flags);
-    } else {
-      // Just copy any trivial alternatives.
-      for (int j = first_in_run; j < i; j++) {
-        alternatives->at(write_posn++) = alternatives->at(j);
-      }
-    }
-  }
-  alternatives->Rewind(write_posn);  // Trim end of array.
-}
-
-RegExpNode* RegExpDisjunction::ToNode(RegExpCompiler* compiler,
-                                      RegExpNode* on_success) {
-  ZoneList<RegExpTree*>* alternatives = this->alternatives();
-
-  if (alternatives->length() > 2) {
-    bool found_consecutive_atoms = SortConsecutiveAtoms(compiler);
-    if (found_consecutive_atoms) RationalizeConsecutiveAtoms(compiler);
-    FixSingleCharacterDisjunctions(compiler);
-    if (alternatives->length() == 1) {
-      return alternatives->at(0)->ToNode(compiler, on_success);
-    }
-  }
-
-  int length = alternatives->length();
-
-  ChoiceNode* result =
-      new (compiler->zone()) ChoiceNode(length, compiler->zone());
-  for (int i = 0; i < length; i++) {
-    GuardedAlternative alternative(
-        alternatives->at(i)->ToNode(compiler, on_success));
-    result->AddAlternative(alternative);
-  }
-  return result;
-}
-
-RegExpNode* RegExpQuantifier::ToNode(RegExpCompiler* compiler,
-                                     RegExpNode* on_success) {
-  return ToNode(min(), max(), is_greedy(), body(), compiler, on_success);
-}
-
-namespace {
-// Desugar \b to (?<=\w)(?=\W)|(?<=\W)(?=\w) and
-//         \B to (?<=\w)(?=\w)|(?<=\W)(?=\W)
-RegExpNode* BoundaryAssertionAsLookaround(RegExpCompiler* compiler,
-                                          RegExpNode* on_success,
-                                          RegExpAssertion::AssertionType type,
-                                          JSRegExp::Flags flags) {
-  DCHECK(NeedsUnicodeCaseEquivalents(flags));
-  Zone* zone = compiler->zone();
-  ZoneList<CharacterRange>* word_range =
-      new (zone) ZoneList<CharacterRange>(2, zone);
-  CharacterRange::AddClassEscape('w', word_range, true, zone);
-  int stack_register = compiler->UnicodeLookaroundStackRegister();
-  int position_register = compiler->UnicodeLookaroundPositionRegister();
-  ChoiceNode* result = new (zone) ChoiceNode(2, zone);
-  // Add two choices. The (non-)boundary could start with a word or
-  // a non-word-character.
-  for (int i = 0; i < 2; i++) {
-    bool lookbehind_for_word = i == 0;
-    bool lookahead_for_word =
-        (type == RegExpAssertion::BOUNDARY) ^ lookbehind_for_word;
-    // Look to the left.
-    RegExpLookaround::Builder lookbehind(lookbehind_for_word, on_success,
-                                         stack_register, position_register);
-    RegExpNode* backward = TextNode::CreateForCharacterRanges(
-        zone, word_range, true, lookbehind.on_match_success(), flags);
-    // Look to the right.
-    RegExpLookaround::Builder lookahead(lookahead_for_word,
-                                        lookbehind.ForMatch(backward),
-                                        stack_register, position_register);
-    RegExpNode* forward = TextNode::CreateForCharacterRanges(
-        zone, word_range, false, lookahead.on_match_success(), flags);
-    result->AddAlternative(GuardedAlternative(lookahead.ForMatch(forward)));
-  }
-  return result;
-}
-}  // anonymous namespace
-
-RegExpNode* RegExpAssertion::ToNode(RegExpCompiler* compiler,
-                                    RegExpNode* on_success) {
-  NodeInfo info;
-  Zone* zone = compiler->zone();
-
-  switch (assertion_type()) {
-    case START_OF_LINE:
-      return AssertionNode::AfterNewline(on_success);
-    case START_OF_INPUT:
-      return AssertionNode::AtStart(on_success);
-    case BOUNDARY:
-      return NeedsUnicodeCaseEquivalents(flags_)
-                 ? BoundaryAssertionAsLookaround(compiler, on_success, BOUNDARY,
-                                                 flags_)
-                 : AssertionNode::AtBoundary(on_success);
-    case NON_BOUNDARY:
-      return NeedsUnicodeCaseEquivalents(flags_)
-                 ? BoundaryAssertionAsLookaround(compiler, on_success,
-                                                 NON_BOUNDARY, flags_)
-                 : AssertionNode::AtNonBoundary(on_success);
-    case END_OF_INPUT:
-      return AssertionNode::AtEnd(on_success);
-    case END_OF_LINE: {
-      // Compile $ in multiline regexps as an alternation with a positive
-      // lookahead in one side and an end-of-input on the other side.
-      // We need two registers for the lookahead.
-      int stack_pointer_register = compiler->AllocateRegister();
-      int position_register = compiler->AllocateRegister();
-      // The ChoiceNode to distinguish between a newline and end-of-input.
-      ChoiceNode* result = new (zone) ChoiceNode(2, zone);
-      // Create a newline atom.
-      ZoneList<CharacterRange>* newline_ranges =
-          new (zone) ZoneList<CharacterRange>(3, zone);
-      CharacterRange::AddClassEscape('n', newline_ranges, false, zone);
-      JSRegExp::Flags default_flags = JSRegExp::Flags();
-      RegExpCharacterClass* newline_atom =
-          new (zone) RegExpCharacterClass('n', default_flags);
-      TextNode* newline_matcher =
-          new (zone) TextNode(newline_atom, false,
-                              ActionNode::PositiveSubmatchSuccess(
-                                  stack_pointer_register, position_register,
-                                  0,   // No captures inside.
-                                  -1,  // Ignored if no captures.
-                                  on_success));
-      // Create an end-of-input matcher.
-      RegExpNode* end_of_line = ActionNode::BeginSubmatch(
-          stack_pointer_register, position_register, newline_matcher);
-      // Add the two alternatives to the ChoiceNode.
-      GuardedAlternative eol_alternative(end_of_line);
-      result->AddAlternative(eol_alternative);
-      GuardedAlternative end_alternative(AssertionNode::AtEnd(on_success));
-      result->AddAlternative(end_alternative);
-      return result;
-    }
-    default:
-      UNREACHABLE();
-  }
-  return on_success;
-}
-
-RegExpNode* RegExpBackReference::ToNode(RegExpCompiler* compiler,
-                                        RegExpNode* on_success) {
-  return new (compiler->zone())
-      BackReferenceNode(RegExpCapture::StartRegister(index()),
-                        RegExpCapture::EndRegister(index()), flags_,
-                        compiler->read_backward(), on_success);
-}
-
-RegExpNode* RegExpEmpty::ToNode(RegExpCompiler* compiler,
-                                RegExpNode* on_success) {
-  return on_success;
-}
-
-RegExpLookaround::Builder::Builder(bool is_positive, RegExpNode* on_success,
-                                   int stack_pointer_register,
-                                   int position_register,
-                                   int capture_register_count,
-                                   int capture_register_start)
-    : is_positive_(is_positive),
-      on_success_(on_success),
-      stack_pointer_register_(stack_pointer_register),
-      position_register_(position_register) {
-  if (is_positive_) {
-    on_match_success_ = ActionNode::PositiveSubmatchSuccess(
-        stack_pointer_register, position_register, capture_register_count,
-        capture_register_start, on_success_);
-  } else {
-    Zone* zone = on_success_->zone();
-    on_match_success_ = new (zone) NegativeSubmatchSuccess(
-        stack_pointer_register, position_register, capture_register_count,
-        capture_register_start, zone);
-  }
-}
-
-RegExpNode* RegExpLookaround::Builder::ForMatch(RegExpNode* match) {
-  if (is_positive_) {
-    return ActionNode::BeginSubmatch(stack_pointer_register_,
-                                     position_register_, match);
-  } else {
-    Zone* zone = on_success_->zone();
-    // We use a ChoiceNode to represent the negative lookaround. The first
-    // alternative is the negative match. On success, the end node backtracks.
-    // On failure, the second alternative is tried and leads to success.
-    // NegativeLookaheadChoiceNode is a special ChoiceNode that ignores the
-    // first exit when calculating quick checks.
-    ChoiceNode* choice_node = new (zone) NegativeLookaroundChoiceNode(
-        GuardedAlternative(match), GuardedAlternative(on_success_), zone);
-    return ActionNode::BeginSubmatch(stack_pointer_register_,
-                                     position_register_, choice_node);
-  }
-}
-
-RegExpNode* RegExpLookaround::ToNode(RegExpCompiler* compiler,
-                                     RegExpNode* on_success) {
-  int stack_pointer_register = compiler->AllocateRegister();
-  int position_register = compiler->AllocateRegister();
-
-  const int registers_per_capture = 2;
-  const int register_of_first_capture = 2;
-  int register_count = capture_count_ * registers_per_capture;
-  int register_start =
-      register_of_first_capture + capture_from_ * registers_per_capture;
-
-  RegExpNode* result;
-  bool was_reading_backward = compiler->read_backward();
-  compiler->set_read_backward(type() == LOOKBEHIND);
-  Builder builder(is_positive(), on_success, stack_pointer_register,
-                  position_register, register_count, register_start);
-  RegExpNode* match = body_->ToNode(compiler, builder.on_match_success());
-  result = builder.ForMatch(match);
-  compiler->set_read_backward(was_reading_backward);
-  return result;
-}
-
-RegExpNode* RegExpCapture::ToNode(RegExpCompiler* compiler,
-                                  RegExpNode* on_success) {
-  return ToNode(body(), index(), compiler, on_success);
-}
-
-RegExpNode* RegExpCapture::ToNode(RegExpTree* body, int index,
-                                  RegExpCompiler* compiler,
-                                  RegExpNode* on_success) {
-  DCHECK_NOT_NULL(body);
-  int start_reg = RegExpCapture::StartRegister(index);
-  int end_reg = RegExpCapture::EndRegister(index);
-  if (compiler->read_backward()) std::swap(start_reg, end_reg);
-  RegExpNode* store_end = ActionNode::StorePosition(end_reg, true, on_success);
-  RegExpNode* body_node = body->ToNode(compiler, store_end);
-  return ActionNode::StorePosition(start_reg, true, body_node);
-}
-
-namespace {
-
-class AssertionSequenceRewriter final {
- public:
-  // TODO(jgruber): Consider moving this to a separate AST tree rewriter pass
-  // instead of sprinkling rewrites into the AST->Node conversion process.
-  static void MaybeRewrite(ZoneList<RegExpTree*>* terms, Zone* zone) {
-    AssertionSequenceRewriter rewriter(terms, zone);
-
-    static constexpr int kNoIndex = -1;
-    int from = kNoIndex;
-
-    for (int i = 0; i < terms->length(); i++) {
-      RegExpTree* t = terms->at(i);
-      if (from == kNoIndex && t->IsAssertion()) {
-        from = i;  // Start a sequence.
-      } else if (from != kNoIndex && !t->IsAssertion()) {
-        // Terminate and process the sequence.
-        if (i - from > 1) rewriter.Rewrite(from, i);
-        from = kNoIndex;
-      }
-    }
-
-    if (from != kNoIndex && terms->length() - from > 1) {
-      rewriter.Rewrite(from, terms->length());
-    }
-  }
-
-  // All assertions are zero width. A consecutive sequence of assertions is
-  // order-independent. There's two ways we can optimize here:
-  // 1. fold all identical assertions.
-  // 2. if any assertion combinations are known to fail (e.g. \b\B), the entire
-  //    sequence fails.
-  void Rewrite(int from, int to) {
-    DCHECK_GT(to, from + 1);
-
-    // Bitfield of all seen assertions.
-    uint32_t seen_assertions = 0;
-    STATIC_ASSERT(RegExpAssertion::LAST_TYPE < kUInt32Size * kBitsPerByte);
-
-    // Flags must match for folding.
-    JSRegExp::Flags flags = terms_->at(from)->AsAssertion()->flags();
-    bool saw_mismatched_flags = false;
-
-    for (int i = from; i < to; i++) {
-      RegExpAssertion* t = terms_->at(i)->AsAssertion();
-      if (t->flags() != flags) saw_mismatched_flags = true;
-      const uint32_t bit = 1 << t->assertion_type();
-
-      if ((seen_assertions & bit) && !saw_mismatched_flags) {
-        // Fold duplicates.
-        terms_->Set(i, new (zone_) RegExpEmpty());
-      }
-
-      seen_assertions |= bit;
-    }
-
-    // Collapse failures.
-    const uint32_t always_fails_mask =
-        1 << RegExpAssertion::BOUNDARY | 1 << RegExpAssertion::NON_BOUNDARY;
-    if ((seen_assertions & always_fails_mask) == always_fails_mask) {
-      ReplaceSequenceWithFailure(from, to);
-    }
-  }
-
-  void ReplaceSequenceWithFailure(int from, int to) {
-    // Replace the entire sequence with a single node that always fails.
-    // TODO(jgruber): Consider adding an explicit Fail kind. Until then, the
-    // negated '*' (everything) range serves the purpose.
-    ZoneList<CharacterRange>* ranges =
-        new (zone_) ZoneList<CharacterRange>(0, zone_);
-    RegExpCharacterClass* cc =
-        new (zone_) RegExpCharacterClass(zone_, ranges, JSRegExp::Flags());
-    terms_->Set(from, cc);
-
-    // Zero out the rest.
-    RegExpEmpty* empty = new (zone_) RegExpEmpty();
-    for (int i = from + 1; i < to; i++) terms_->Set(i, empty);
-  }
-
- private:
-  AssertionSequenceRewriter(ZoneList<RegExpTree*>* terms, Zone* zone)
-      : zone_(zone), terms_(terms) {}
-
-  Zone* zone_;
-  ZoneList<RegExpTree*>* terms_;
-};
-
-}  // namespace
-
-RegExpNode* RegExpAlternative::ToNode(RegExpCompiler* compiler,
-                                      RegExpNode* on_success) {
-  ZoneList<RegExpTree*>* children = nodes();
-
-  AssertionSequenceRewriter::MaybeRewrite(children, compiler->zone());
-
-  RegExpNode* current = on_success;
-  if (compiler->read_backward()) {
-    for (int i = 0; i < children->length(); i++) {
-      current = children->at(i)->ToNode(compiler, current);
-    }
-  } else {
-    for (int i = children->length() - 1; i >= 0; i--) {
-      current = children->at(i)->ToNode(compiler, current);
-    }
-  }
-  return current;
-}
-
-static void AddClass(const int* elmv, int elmc,
-                     ZoneList<CharacterRange>* ranges, Zone* zone) {
-  elmc--;
-  DCHECK_EQ(kRangeEndMarker, elmv[elmc]);
-  for (int i = 0; i < elmc; i += 2) {
-    DCHECK(elmv[i] < elmv[i + 1]);
-    ranges->Add(CharacterRange::Range(elmv[i], elmv[i + 1] - 1), zone);
-  }
-}
-
-static void AddClassNegated(const int* elmv, int elmc,
-                            ZoneList<CharacterRange>* ranges, Zone* zone) {
-  elmc--;
-  DCHECK_EQ(kRangeEndMarker, elmv[elmc]);
-  DCHECK_NE(0x0000, elmv[0]);
-  DCHECK_NE(String::kMaxCodePoint, elmv[elmc - 1]);
-  uc16 last = 0x0000;
-  for (int i = 0; i < elmc; i += 2) {
-    DCHECK(last <= elmv[i] - 1);
-    DCHECK(elmv[i] < elmv[i + 1]);
-    ranges->Add(CharacterRange::Range(last, elmv[i] - 1), zone);
-    last = elmv[i + 1];
-  }
-  ranges->Add(CharacterRange::Range(last, String::kMaxCodePoint), zone);
-}
-
-void CharacterRange::AddClassEscape(char type, ZoneList<CharacterRange>* ranges,
-                                    bool add_unicode_case_equivalents,
-                                    Zone* zone) {
-  if (add_unicode_case_equivalents && (type == 'w' || type == 'W')) {
-    // See #sec-runtime-semantics-wordcharacters-abstract-operation
-    // In case of unicode and ignore_case, we need to create the closure over
-    // case equivalent characters before negating.
-    ZoneList<CharacterRange>* new_ranges =
-        new (zone) ZoneList<CharacterRange>(2, zone);
-    AddClass(kWordRanges, kWordRangeCount, new_ranges, zone);
-    AddUnicodeCaseEquivalents(new_ranges, zone);
-    if (type == 'W') {
-      ZoneList<CharacterRange>* negated =
-          new (zone) ZoneList<CharacterRange>(2, zone);
-      CharacterRange::Negate(new_ranges, negated, zone);
-      new_ranges = negated;
-    }
-    ranges->AddAll(*new_ranges, zone);
-    return;
-  }
-  AddClassEscape(type, ranges, zone);
-}
-
-void CharacterRange::AddClassEscape(char type, ZoneList<CharacterRange>* ranges,
-                                    Zone* zone) {
-  switch (type) {
-    case 's':
-      AddClass(kSpaceRanges, kSpaceRangeCount, ranges, zone);
-      break;
-    case 'S':
-      AddClassNegated(kSpaceRanges, kSpaceRangeCount, ranges, zone);
-      break;
-    case 'w':
-      AddClass(kWordRanges, kWordRangeCount, ranges, zone);
-      break;
-    case 'W':
-      AddClassNegated(kWordRanges, kWordRangeCount, ranges, zone);
-      break;
-    case 'd':
-      AddClass(kDigitRanges, kDigitRangeCount, ranges, zone);
-      break;
-    case 'D':
-      AddClassNegated(kDigitRanges, kDigitRangeCount, ranges, zone);
-      break;
-    case '.':
-      AddClassNegated(kLineTerminatorRanges, kLineTerminatorRangeCount, ranges,
-                      zone);
-      break;
-    // This is not a character range as defined by the spec but a
-    // convenient shorthand for a character class that matches any
-    // character.
-    case '*':
-      ranges->Add(CharacterRange::Everything(), zone);
-      break;
-    // This is the set of characters matched by the $ and ^ symbols
-    // in multiline mode.
-    case 'n':
-      AddClass(kLineTerminatorRanges, kLineTerminatorRangeCount, ranges, zone);
-      break;
-    default:
-      UNREACHABLE();
-  }
-}
-
-Vector<const int> CharacterRange::GetWordBounds() {
-  return Vector<const int>(kWordRanges, kWordRangeCount - 1);
-}
-
-// static
-void CharacterRange::AddCaseEquivalents(Isolate* isolate, Zone* zone,
-                                        ZoneList<CharacterRange>* ranges,
-                                        bool is_one_byte) {
-  CharacterRange::Canonicalize(ranges);
-  int range_count = ranges->length();
-#ifdef V8_INTL_SUPPORT
-  icu::UnicodeSet others;
-  for (int i = 0; i < range_count; i++) {
-    CharacterRange range = ranges->at(i);
-    uc32 from = range.from();
-    if (from > String::kMaxUtf16CodeUnit) continue;
-    uc32 to = Min(range.to(), String::kMaxUtf16CodeUnit);
-    // Nothing to be done for surrogates.
-    if (from >= kLeadSurrogateStart && to <= kTrailSurrogateEnd) continue;
-    if (is_one_byte && !RangeContainsLatin1Equivalents(range)) {
-      if (from > String::kMaxOneByteCharCode) continue;
-      if (to > String::kMaxOneByteCharCode) to = String::kMaxOneByteCharCode;
-    }
-    others.add(from, to);
-  }
-
-  // Compute the set of additional characters that should be added,
-  // using UnicodeSet::closeOver. ECMA 262 defines slightly different
-  // case-folding rules than Unicode, so some characters that are
-  // added by closeOver do not match anything other than themselves in
-  // JS. For example, 'ſ' (U+017F LATIN SMALL LETTER LONG S) is the
-  // same case-insensitive character as 's' or 'S' according to
-  // Unicode, but does not match any other character in JS. To handle
-  // this case, we add such characters to the IgnoreSet and filter
-  // them out. We filter twice: once before calling closeOver (to
-  // prevent 'ſ' from adding 's'), and once after calling closeOver
-  // (to prevent 's' from adding 'ſ'). See regexp/special-case.h for
-  // more information.
-  icu::UnicodeSet already_added(others);
-  others.removeAll(RegExpCaseFolding::IgnoreSet());
-  others.closeOver(USET_CASE_INSENSITIVE);
-  others.removeAll(RegExpCaseFolding::IgnoreSet());
-  others.removeAll(already_added);
-
-  // Add others to the ranges
-  for (int32_t i = 0; i < others.getRangeCount(); i++) {
-    UChar32 from = others.getRangeStart(i);
-    UChar32 to = others.getRangeEnd(i);
-    if (from == to) {
-      ranges->Add(CharacterRange::Singleton(from), zone);
-    } else {
-      ranges->Add(CharacterRange::Range(from, to), zone);
-    }
-  }
-#else
-  for (int i = 0; i < range_count; i++) {
-    CharacterRange range = ranges->at(i);
-    uc32 bottom = range.from();
-    if (bottom > String::kMaxUtf16CodeUnit) continue;
-    uc32 top = Min(range.to(), String::kMaxUtf16CodeUnit);
-    // Nothing to be done for surrogates.
-    if (bottom >= kLeadSurrogateStart && top <= kTrailSurrogateEnd) continue;
-    if (is_one_byte && !RangeContainsLatin1Equivalents(range)) {
-      if (bottom > String::kMaxOneByteCharCode) continue;
-      if (top > String::kMaxOneByteCharCode) top = String::kMaxOneByteCharCode;
-    }
-    unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
-    if (top == bottom) {
-      // If this is a singleton we just expand the one character.
-      int length = isolate->jsregexp_uncanonicalize()->get(bottom, '\0', chars);
-      for (int i = 0; i < length; i++) {
-        uc32 chr = chars[i];
-        if (chr != bottom) {
-          ranges->Add(CharacterRange::Singleton(chars[i]), zone);
-        }
-      }
-    } else {
-      // If this is a range we expand the characters block by block, expanding
-      // contiguous subranges (blocks) one at a time.  The approach is as
-      // follows.  For a given start character we look up the remainder of the
-      // block that contains it (represented by the end point), for instance we
-      // find 'z' if the character is 'c'.  A block is characterized by the
-      // property that all characters uncanonicalize in the same way, except
-      // that each entry in the result is incremented by the distance from the
-      // first element.  So a-z is a block because 'a' uncanonicalizes to ['a',
-      // 'A'] and the k'th letter uncanonicalizes to ['a' + k, 'A' + k].  Once
-      // we've found the end point we look up its uncanonicalization and
-      // produce a range for each element.  For instance for [c-f] we look up
-      // ['z', 'Z'] and produce [c-f] and [C-F].  We then only add a range if
-      // it is not already contained in the input, so [c-f] will be skipped but
-      // [C-F] will be added.  If this range is not completely contained in a
-      // block we do this for all the blocks covered by the range (handling
-      // characters that is not in a block as a "singleton block").
-      unibrow::uchar equivalents[unibrow::Ecma262UnCanonicalize::kMaxWidth];
-      int pos = bottom;
-      while (pos <= top) {
-        int length =
-            isolate->jsregexp_canonrange()->get(pos, '\0', equivalents);
-        uc32 block_end;
-        if (length == 0) {
-          block_end = pos;
-        } else {
-          DCHECK_EQ(1, length);
-          block_end = equivalents[0];
-        }
-        int end = (block_end > top) ? top : block_end;
-        length = isolate->jsregexp_uncanonicalize()->get(block_end, '\0',
-                                                         equivalents);
-        for (int i = 0; i < length; i++) {
-          uc32 c = equivalents[i];
-          uc32 range_from = c - (block_end - pos);
-          uc32 range_to = c - (block_end - end);
-          if (!(bottom <= range_from && range_to <= top)) {
-            ranges->Add(CharacterRange::Range(range_from, range_to), zone);
-          }
-        }
-        pos = end + 1;
-      }
-    }
-  }
-#endif  // V8_INTL_SUPPORT
-}
-
-bool CharacterRange::IsCanonical(ZoneList<CharacterRange>* ranges) {
-  DCHECK_NOT_NULL(ranges);
-  int n = ranges->length();
-  if (n <= 1) return true;
-  int max = ranges->at(0).to();
-  for (int i = 1; i < n; i++) {
-    CharacterRange next_range = ranges->at(i);
-    if (next_range.from() <= max + 1) return false;
-    max = next_range.to();
-  }
-  return true;
-}
-
-ZoneList<CharacterRange>* CharacterSet::ranges(Zone* zone) {
-  if (ranges_ == nullptr) {
-    ranges_ = new (zone) ZoneList<CharacterRange>(2, zone);
-    CharacterRange::AddClassEscape(standard_set_type_, ranges_, false, zone);
-  }
-  return ranges_;
-}
-
-// Move a number of elements in a zonelist to another position
-// in the same list. Handles overlapping source and target areas.
-static void MoveRanges(ZoneList<CharacterRange>* list, int from, int to,
-                       int count) {
-  // Ranges are potentially overlapping.
-  if (from < to) {
-    for (int i = count - 1; i >= 0; i--) {
-      list->at(to + i) = list->at(from + i);
-    }
-  } else {
-    for (int i = 0; i < count; i++) {
-      list->at(to + i) = list->at(from + i);
-    }
-  }
-}
-
-static int InsertRangeInCanonicalList(ZoneList<CharacterRange>* list, int count,
-                                      CharacterRange insert) {
-  // Inserts a range into list[0..count[, which must be sorted
-  // by from value and non-overlapping and non-adjacent, using at most
-  // list[0..count] for the result. Returns the number of resulting
-  // canonicalized ranges. Inserting a range may collapse existing ranges into
-  // fewer ranges, so the return value can be anything in the range 1..count+1.
-  uc32 from = insert.from();
-  uc32 to = insert.to();
-  int start_pos = 0;
-  int end_pos = count;
-  for (int i = count - 1; i >= 0; i--) {
-    CharacterRange current = list->at(i);
-    if (current.from() > to + 1) {
-      end_pos = i;
-    } else if (current.to() + 1 < from) {
-      start_pos = i + 1;
-      break;
-    }
-  }
-
-  // Inserted range overlaps, or is adjacent to, ranges at positions
-  // [start_pos..end_pos[. Ranges before start_pos or at or after end_pos are
-  // not affected by the insertion.
-  // If start_pos == end_pos, the range must be inserted before start_pos.
-  // if start_pos < end_pos, the entire range from start_pos to end_pos
-  // must be merged with the insert range.
-
-  if (start_pos == end_pos) {
-    // Insert between existing ranges at position start_pos.
-    if (start_pos < count) {
-      MoveRanges(list, start_pos, start_pos + 1, count - start_pos);
-    }
-    list->at(start_pos) = insert;
-    return count + 1;
-  }
-  if (start_pos + 1 == end_pos) {
-    // Replace single existing range at position start_pos.
-    CharacterRange to_replace = list->at(start_pos);
-    int new_from = Min(to_replace.from(), from);
-    int new_to = Max(to_replace.to(), to);
-    list->at(start_pos) = CharacterRange::Range(new_from, new_to);
-    return count;
-  }
-  // Replace a number of existing ranges from start_pos to end_pos - 1.
-  // Move the remaining ranges down.
-
-  int new_from = Min(list->at(start_pos).from(), from);
-  int new_to = Max(list->at(end_pos - 1).to(), to);
-  if (end_pos < count) {
-    MoveRanges(list, end_pos, start_pos + 1, count - end_pos);
-  }
-  list->at(start_pos) = CharacterRange::Range(new_from, new_to);
-  return count - (end_pos - start_pos) + 1;
-}
-
-void CharacterSet::Canonicalize() {
-  // Special/default classes are always considered canonical. The result
-  // of calling ranges() will be sorted.
-  if (ranges_ == nullptr) return;
-  CharacterRange::Canonicalize(ranges_);
-}
-
-void CharacterRange::Canonicalize(ZoneList<CharacterRange>* character_ranges) {
-  if (character_ranges->length() <= 1) return;
-  // Check whether ranges are already canonical (increasing, non-overlapping,
-  // non-adjacent).
-  int n = character_ranges->length();
-  int max = character_ranges->at(0).to();
-  int i = 1;
-  while (i < n) {
-    CharacterRange current = character_ranges->at(i);
-    if (current.from() <= max + 1) {
-      break;
-    }
-    max = current.to();
-    i++;
-  }
-  // Canonical until the i'th range. If that's all of them, we are done.
-  if (i == n) return;
-
-  // The ranges at index i and forward are not canonicalized. Make them so by
-  // doing the equivalent of insertion sort (inserting each into the previous
-  // list, in order).
-  // Notice that inserting a range can reduce the number of ranges in the
-  // result due to combining of adjacent and overlapping ranges.
-  int read = i;           // Range to insert.
-  int num_canonical = i;  // Length of canonicalized part of list.
-  do {
-    num_canonical = InsertRangeInCanonicalList(character_ranges, num_canonical,
-                                               character_ranges->at(read));
-    read++;
-  } while (read < n);
-  character_ranges->Rewind(num_canonical);
-
-  DCHECK(CharacterRange::IsCanonical(character_ranges));
-}
-
-void CharacterRange::Negate(ZoneList<CharacterRange>* ranges,
-                            ZoneList<CharacterRange>* negated_ranges,
-                            Zone* zone) {
-  DCHECK(CharacterRange::IsCanonical(ranges));
-  DCHECK_EQ(0, negated_ranges->length());
-  int range_count = ranges->length();
-  uc32 from = 0;
-  int i = 0;
-  if (range_count > 0 && ranges->at(0).from() == 0) {
-    from = ranges->at(0).to() + 1;
-    i = 1;
-  }
-  while (i < range_count) {
-    CharacterRange range = ranges->at(i);
-    negated_ranges->Add(CharacterRange::Range(from, range.from() - 1), zone);
-    from = range.to() + 1;
-    i++;
-  }
-  if (from < String::kMaxCodePoint) {
-    negated_ranges->Add(CharacterRange::Range(from, String::kMaxCodePoint),
-                        zone);
-  }
-}
-
-// Scoped object to keep track of how much we unroll quantifier loops in the
-// regexp graph generator.
-class RegExpExpansionLimiter {
- public:
-  static const int kMaxExpansionFactor = 6;
-  RegExpExpansionLimiter(RegExpCompiler* compiler, int factor)
-      : compiler_(compiler),
-        saved_expansion_factor_(compiler->current_expansion_factor()),
-        ok_to_expand_(saved_expansion_factor_ <= kMaxExpansionFactor) {
-    DCHECK_LT(0, factor);
-    if (ok_to_expand_) {
-      if (factor > kMaxExpansionFactor) {
-        // Avoid integer overflow of the current expansion factor.
-        ok_to_expand_ = false;
-        compiler->set_current_expansion_factor(kMaxExpansionFactor + 1);
-      } else {
-        int new_factor = saved_expansion_factor_ * factor;
-        ok_to_expand_ = (new_factor <= kMaxExpansionFactor);
-        compiler->set_current_expansion_factor(new_factor);
-      }
-    }
-  }
-
-  ~RegExpExpansionLimiter() {
-    compiler_->set_current_expansion_factor(saved_expansion_factor_);
-  }
-
-  bool ok_to_expand() { return ok_to_expand_; }
-
- private:
-  RegExpCompiler* compiler_;
-  int saved_expansion_factor_;
-  bool ok_to_expand_;
-
-  DISALLOW_IMPLICIT_CONSTRUCTORS(RegExpExpansionLimiter);
-};
-
-RegExpNode* RegExpQuantifier::ToNode(int min, int max, bool is_greedy,
-                                     RegExpTree* body, RegExpCompiler* compiler,
-                                     RegExpNode* on_success,
-                                     bool not_at_start) {
-  // x{f, t} becomes this:
-  //
-  //             (r++)<-.
-  //               |     `
-  //               |     (x)
-  //               v     ^
-  //      (r=0)-->(?)---/ [if r < t]
-  //               |
-  //   [if r >= f] \----> ...
-  //
-
-  // 15.10.2.5 RepeatMatcher algorithm.
-  // The parser has already eliminated the case where max is 0.  In the case
-  // where max_match is zero the parser has removed the quantifier if min was
-  // > 0 and removed the atom if min was 0.  See AddQuantifierToAtom.
-
-  // If we know that we cannot match zero length then things are a little
-  // simpler since we don't need to make the special zero length match check
-  // from step 2.1.  If the min and max are small we can unroll a little in
-  // this case.
-  static const int kMaxUnrolledMinMatches = 3;  // Unroll (foo)+ and (foo){3,}
-  static const int kMaxUnrolledMaxMatches = 3;  // Unroll (foo)? and (foo){x,3}
-  if (max == 0) return on_success;  // This can happen due to recursion.
-  bool body_can_be_empty = (body->min_match() == 0);
-  int body_start_reg = RegExpCompiler::kNoRegister;
-  Interval capture_registers = body->CaptureRegisters();
-  bool needs_capture_clearing = !capture_registers.is_empty();
-  Zone* zone = compiler->zone();
-
-  if (body_can_be_empty) {
-    body_start_reg = compiler->AllocateRegister();
-  } else if (compiler->optimize() && !needs_capture_clearing) {
-    // Only unroll if there are no captures and the body can't be
-    // empty.
-    {
-      RegExpExpansionLimiter limiter(compiler, min + ((max != min) ? 1 : 0));
-      if (min > 0 && min <= kMaxUnrolledMinMatches && limiter.ok_to_expand()) {
-        int new_max = (max == kInfinity) ? max : max - min;
-        // Recurse once to get the loop or optional matches after the fixed
-        // ones.
-        RegExpNode* answer =
-            ToNode(0, new_max, is_greedy, body, compiler, on_success, true);
-        // Unroll the forced matches from 0 to min.  This can cause chains of
-        // TextNodes (which the parser does not generate).  These should be
-        // combined if it turns out they hinder good code generation.
-        for (int i = 0; i < min; i++) {
-          answer = body->ToNode(compiler, answer);
-        }
-        return answer;
-      }
-    }
-    if (max <= kMaxUnrolledMaxMatches && min == 0) {
-      DCHECK_LT(0, max);  // Due to the 'if' above.
-      RegExpExpansionLimiter limiter(compiler, max);
-      if (limiter.ok_to_expand()) {
-        // Unroll the optional matches up to max.
-        RegExpNode* answer = on_success;
-        for (int i = 0; i < max; i++) {
-          ChoiceNode* alternation = new (zone) ChoiceNode(2, zone);
-          if (is_greedy) {
-            alternation->AddAlternative(
-                GuardedAlternative(body->ToNode(compiler, answer)));
-            alternation->AddAlternative(GuardedAlternative(on_success));
-          } else {
-            alternation->AddAlternative(GuardedAlternative(on_success));
-            alternation->AddAlternative(
-                GuardedAlternative(body->ToNode(compiler, answer)));
-          }
-          answer = alternation;
-          if (not_at_start && !compiler->read_backward()) {
-            alternation->set_not_at_start();
-          }
-        }
-        return answer;
-      }
-    }
-  }
-  bool has_min = min > 0;
-  bool has_max = max < RegExpTree::kInfinity;
-  bool needs_counter = has_min || has_max;
-  int reg_ctr = needs_counter ? compiler->AllocateRegister()
-                              : RegExpCompiler::kNoRegister;
-  LoopChoiceNode* center = new (zone) LoopChoiceNode(
-      body->min_match() == 0, compiler->read_backward(), min, zone);
-  if (not_at_start && !compiler->read_backward()) center->set_not_at_start();
-  RegExpNode* loop_return =
-      needs_counter ? static_cast<RegExpNode*>(
-                          ActionNode::IncrementRegister(reg_ctr, center))
-                    : static_cast<RegExpNode*>(center);
-  if (body_can_be_empty) {
-    // If the body can be empty we need to check if it was and then
-    // backtrack.
-    loop_return =
-        ActionNode::EmptyMatchCheck(body_start_reg, reg_ctr, min, loop_return);
-  }
-  RegExpNode* body_node = body->ToNode(compiler, loop_return);
-  if (body_can_be_empty) {
-    // If the body can be empty we need to store the start position
-    // so we can bail out if it was empty.
-    body_node = ActionNode::StorePosition(body_start_reg, false, body_node);
-  }
-  if (needs_capture_clearing) {
-    // Before entering the body of this loop we need to clear captures.
-    body_node = ActionNode::ClearCaptures(capture_registers, body_node);
-  }
-  GuardedAlternative body_alt(body_node);
-  if (has_max) {
-    Guard* body_guard = new (zone) Guard(reg_ctr, Guard::LT, max);
-    body_alt.AddGuard(body_guard, zone);
-  }
-  GuardedAlternative rest_alt(on_success);
-  if (has_min) {
-    Guard* rest_guard = new (compiler->zone()) Guard(reg_ctr, Guard::GEQ, min);
-    rest_alt.AddGuard(rest_guard, zone);
-  }
-  if (is_greedy) {
-    center->AddLoopAlternative(body_alt);
-    center->AddContinueAlternative(rest_alt);
-  } else {
-    center->AddContinueAlternative(rest_alt);
-    center->AddLoopAlternative(body_alt);
-  }
-  if (needs_counter) {
-    return ActionNode::SetRegisterForLoop(reg_ctr, 0, center);
-  } else {
-    return center;
-  }
-}
-
-}  // namespace internal
-}  // namespace v8