diff options
30 files changed, 4166 insertions, 1417 deletions
diff --git a/js/src/builtin/RegExp.cpp b/js/src/builtin/RegExp.cpp index 2456ef065d..9b0e1a7cb6 100644 --- a/js/src/builtin/RegExp.cpp +++ b/js/src/builtin/RegExp.cpp @@ -21,6 +21,7 @@ #include "vm/NativeObject-inl.h" + using namespace js; using namespace js::unicode; @@ -28,12 +29,15 @@ using mozilla::ArrayLength; using mozilla::CheckedInt; using mozilla::Maybe; +using CapturesVector = GCVector<Value, 4>; + /* - * ES 2017 draft rev 6a13789aa9e7c6de4e96b7d3e24d9e6eba6584ad 21.2.5.2.2 - * steps 3, 16-25. + * ES 2021 draft 21.2.5.2.2: Steps 16-28 + * https://tc39.es/ecma262/#sec-regexpbuiltinexec */ bool -js::CreateRegExpMatchResult(JSContext* cx, HandleString input, const MatchPairs& matches, +js::CreateRegExpMatchResult(JSContext* cx, RegExpShared& re, + HandleString input, const MatchPairs& matches, MutableHandleValue rval) { MOZ_ASSERT(input); @@ -46,6 +50,7 @@ js::CreateRegExpMatchResult(JSContext* cx, HandleString input, const MatchPairs& * 1..pairCount-1: paren matches * input: input string * index: start index for the match + * groups: named capture groups for the match */ /* Get the templateObject that defines the shape and type of the output object */ @@ -53,15 +58,16 @@ js::CreateRegExpMatchResult(JSContext* cx, HandleString input, const MatchPairs& if (!templateObject) return false; + // Step 16 size_t numPairs = matches.length(); MOZ_ASSERT(numPairs > 0); - /* Step 17. */ + /* Step 18-19. */ RootedArrayObject arr(cx, NewDenseFullyAllocatedArrayWithTemplate(cx, numPairs, templateObject)); if (!arr) return false; - /* Steps 22-24. + /* Steps 22-23 and 27 a-e * Store a Value for each pair. */ for (size_t i = 0; i < numPairs; i++) { const MatchPair& pair = matches[i]; @@ -79,6 +85,40 @@ js::CreateRegExpMatchResult(JSContext* cx, HandleString input, const MatchPairs& } } + // Step 24 (reordered) + RootedPlainObject groups(cx); + if (re.numNamedCaptures() > 0) { + // construct a new object from the template saved on RegExpShared + RootedPlainObject groupsTemplate(cx, re.getGroupsTemplate()); + groups = NewObjectWithGivenProto<PlainObject>(cx, nullptr); + groups->setGroup(groupsTemplate->group()); + + // Step 27 f. + // The groups template object stores the names of the named captures in the + // the order in which they are defined. + // Grab the index into the match vector from the template object and define the + // corresponding property on the result + AutoIdVector keys(cx); + if (!GetPropertyKeys(cx, groupsTemplate, 0, &keys)) { + return false; + } + MOZ_ASSERT(keys.length() == re.numNamedCaptures()); + RootedId key(cx); + RootedValue ival(cx); + RootedValue val(cx); + for (size_t i = 0; i < keys.length(); i++) { + key = keys[i]; + // fetch the group's match index... + if (!NativeGetProperty(cx, groupsTemplate, key, &ival)) + return false; + // ... and set it on groups + val = arr->getDenseElement(ival.toInt32()); + if (!NativeDefineProperty(cx, groups, key, val, nullptr, nullptr, JSPROP_ENUMERATE)) { + return false; + } + } + } + /* Step 20 (reordered). * Set the |index| property. (TemplateObject positions it in slot 0) */ arr->setSlot(0, Int32Value(matches[0].start)); @@ -87,6 +127,10 @@ js::CreateRegExpMatchResult(JSContext* cx, HandleString input, const MatchPairs& * Set the |input| property. (TemplateObject positions it in slot 1) */ arr->setSlot(1, StringValue(input)); + // Steps 25-26 (reordered) + // Set the |groups| property. + arr->setSlot(2, groups ? ObjectValue(*groups) : UndefinedValue()); + #ifdef DEBUG RootedValue test(cx); RootedId id(cx, NameToId(cx->names().index)); @@ -168,7 +212,7 @@ js::ExecuteRegExpLegacy(JSContext* cx, RegExpStatics* res, Handle<RegExpObject*> return true; } - return CreateRegExpMatchResult(cx, input, matches, rval); + return CreateRegExpMatchResult(cx, *shared, input, matches, rval); } static bool @@ -1025,7 +1069,11 @@ RegExpMatcherImpl(JSContext* cx, HandleObject regexp, HandleString string, } /* Steps 16-25 */ - return CreateRegExpMatchResult(cx, string, matches, rval); + Rooted<RegExpObject*> reobj(cx, ®exp->as<RegExpObject>()); + RegExpGuard shared(cx); + if (!RegExpObject::getShared(cx, reobj, &shared)) + return false; + return CreateRegExpMatchResult(cx, *shared, string, matches, rval); } /* @@ -1067,8 +1115,13 @@ js::RegExpMatcherRaw(JSContext* cx, HandleObject regexp, HandleString input, // The MatchPairs will always be passed in, but RegExp execution was // successful only if the pairs have actually been filled in. - if (maybeMatches && maybeMatches->pairsRaw()[0] >= 0) - return CreateRegExpMatchResult(cx, input, *maybeMatches, output); + if (maybeMatches && maybeMatches->pairsRaw()[0] >= 0) { + Rooted<RegExpObject*> reobj(cx, ®exp->as<RegExpObject>()); + RegExpGuard shared(cx); + if (!RegExpObject::getShared(cx, reobj, &shared)) + return false; + return CreateRegExpMatchResult(cx, *shared, input, *maybeMatches, output); + } return RegExpMatcherImpl(cx, regexp, input, lastIndex, UpdateRegExpStatics, output); } @@ -1265,10 +1318,10 @@ GetParen(JSLinearString* matched, const JS::Value& capture, JSSubString* out) template <typename CharT> static bool InterpretDollar(JSLinearString* matched, JSLinearString* string, size_t position, size_t tailPos, - MutableHandle<GCVector<Value>> captures, JSLinearString* replacement, + Handle<CapturesVector> captures, Handle<CapturesVector> namedCaptures, JSLinearString* replacement, const CharT* replacementBegin, const CharT* currentDollar, const CharT* replacementEnd, - JSSubString* out, size_t* skip) + JSSubString* out, size_t* skip, uint32_t* currentNamedCapture) { MOZ_ASSERT(*currentDollar == '$'); @@ -1310,6 +1363,35 @@ InterpretDollar(JSLinearString* matched, JSLinearString* string, size_t position return true; } + // '$<': Named Captures + if (c == '<') { + // Step 1. + if (namedCaptures.length() == 0) { + return false; + } + + // Step 2.b + const CharT* nameStart = currentDollar + 2; + const CharT* nameEnd = js_strchr_limit(nameStart, '>', replacementEnd); + + // Step 2.c + if (!nameEnd) { + return false; + } + + // Step 2.d + // We precompute named capture replacements in InitNamedCaptures. + // They are stored in the order in which we will need them, so here + // we can just take the next one in the list. + size_t nameLength = nameEnd - nameStart; + *skip = nameLength + 3; // $<...> + + // Steps 2.d.iii-iv + GetParen(matched, namedCaptures[*currentNamedCapture], out); + *currentNamedCapture += 1; + return true; + } + *skip = 2; switch (c) { default: @@ -1340,8 +1422,9 @@ InterpretDollar(JSLinearString* matched, JSLinearString* string, size_t position template <typename CharT> static bool FindReplaceLengthString(JSContext* cx, HandleLinearString matched, HandleLinearString string, - size_t position, size_t tailPos, MutableHandle<GCVector<Value>> captures, - HandleLinearString replacement, size_t firstDollarIndex, size_t* sizep) + size_t position, size_t tailPos, Handle<CapturesVector> captures, + Handle<CapturesVector> namedCaptures, HandleLinearString replacement, + size_t firstDollarIndex, size_t* sizep) { CheckedInt<uint32_t> replen = replacement->length(); @@ -1350,11 +1433,13 @@ FindReplaceLengthString(JSContext* cx, HandleLinearString matched, HandleLinearS const CharT* replacementBegin = replacement->chars<CharT>(nogc); const CharT* currentDollar = replacementBegin + firstDollarIndex; const CharT* replacementEnd = replacementBegin + replacement->length(); + uint32_t currentNamedCapture = 0; do { JSSubString sub; size_t skip; - if (InterpretDollar(matched, string, position, tailPos, captures, replacement, - replacementBegin, currentDollar, replacementEnd, &sub, &skip)) + if (InterpretDollar(matched, string, position, tailPos, captures, namedCaptures, + replacement, replacementBegin, currentDollar, replacementEnd, + &sub, &skip, ¤tNamedCapture)) { if (sub.length > skip) replen += sub.length - skip; @@ -1379,14 +1464,14 @@ FindReplaceLengthString(JSContext* cx, HandleLinearString matched, HandleLinearS static bool FindReplaceLength(JSContext* cx, HandleLinearString matched, HandleLinearString string, - size_t position, size_t tailPos, MutableHandle<GCVector<Value>> captures, + size_t position, size_t tailPos, Handle<CapturesVector> captures, Handle<CapturesVector> namedCaptures, HandleLinearString replacement, size_t firstDollarIndex, size_t* sizep) { return replacement->hasLatin1Chars() ? FindReplaceLengthString<Latin1Char>(cx, matched, string, position, tailPos, captures, - replacement, firstDollarIndex, sizep) + namedCaptures, replacement, firstDollarIndex, sizep) : FindReplaceLengthString<char16_t>(cx, matched, string, position, tailPos, captures, - replacement, firstDollarIndex, sizep); + namedCaptures, replacement, firstDollarIndex, sizep); } /* @@ -1397,7 +1482,7 @@ FindReplaceLength(JSContext* cx, HandleLinearString matched, HandleLinearString template <typename CharT> static void DoReplace(HandleLinearString matched, HandleLinearString string, - size_t position, size_t tailPos, MutableHandle<GCVector<Value>> captures, + size_t position, size_t tailPos, Handle<CapturesVector> captures, Handle<CapturesVector> namedCaptures, HandleLinearString replacement, size_t firstDollarIndex, StringBuffer &sb) { JS::AutoCheckCannotGC nogc; @@ -1407,6 +1492,7 @@ DoReplace(HandleLinearString matched, HandleLinearString string, MOZ_ASSERT(firstDollarIndex < replacement->length()); const CharT* currentDollar = replacementBegin + firstDollarIndex; const CharT* replacementEnd = replacementBegin + replacement->length(); + uint32_t currentNamedCapture = 0; do { /* Move one of the constant portions of the replacement value. */ size_t len = currentDollar - currentChar; @@ -1415,8 +1501,8 @@ DoReplace(HandleLinearString matched, HandleLinearString string, JSSubString sub; size_t skip; - if (InterpretDollar(matched, string, position, tailPos, captures, replacement, - replacementBegin, currentDollar, replacementEnd, &sub, &skip)) + if (InterpretDollar(matched, string, position, tailPos, captures, namedCaptures, replacement, + replacementBegin, currentDollar, replacementEnd, &sub, &skip, ¤tNamedCapture)) { sb.infallibleAppendSubstring(sub.base, sub.offset, sub.length); currentChar += skip; @@ -1430,9 +1516,117 @@ DoReplace(HandleLinearString matched, HandleLinearString string, sb.infallibleAppend(currentChar, replacement->length() - (currentChar - replacementBegin)); } +/* + * This function finds the list of named captures of the form + * "$<name>" in a replacement string and converts them into jsids, for + * use in InitNamedReplacements. + */ +template <typename CharT> +static bool CollectNames(JSContext* cx, HandleLinearString replacement, + size_t firstDollarIndex, + MutableHandle<GCVector<jsid>> names) { + JS::AutoCheckCannotGC nogc; + MOZ_ASSERT(firstDollarIndex < replacement->length()); + + const CharT* replacementBegin = replacement->chars<CharT>(nogc); + const CharT* currentDollar = replacementBegin + firstDollarIndex; + const CharT* replacementEnd = replacementBegin + replacement->length(); + + // https://tc39.es/ecma262/#table-45, "$<" section + while (currentDollar && currentDollar + 1 < replacementEnd) { + if (currentDollar[1] == '<') { + // Step 2.b + const CharT* nameStart = currentDollar + 2; + const CharT* nameEnd = js_strchr_limit(nameStart, '>', replacementEnd); + + // Step 2.c + if (!nameEnd) { + return true; + } + + // Step 2.d.i + size_t nameLength = nameEnd - nameStart; + JSAtom* atom = AtomizeChars(cx, nameStart, nameLength); + if (!atom || !names.append(AtomToId(atom))) { + return false; + } + currentDollar = nameEnd + 1; + } else { + currentDollar += 2; + } + currentDollar = js_strchr_limit(currentDollar, '$', replacementEnd); + } + return true; +} + +/* + * When replacing named captures, the spec requires us to perform + * `Get(match.groups, name)` for each "$<name>". These `Get`s can be + * script-visible; for example, RegExp can be extended with an `exec` + * method that wraps `groups` in a proxy. To make sure that we do the + * right thing, if a regexp has named captures, we find the named + * capture replacements before beginning the actual replacement. + * This guarantees that we will call GetProperty once and only once for + * each "$<name>" in the replacement string, in the correct order. + * + * This function precomputes the results of step 2 of the '$<' case + * here: https://tc39.es/proposal-regexp-named-groups/#table-45, so + * that when we need to access the nth named capture in InterpretDollar, + * we can just use the nth value stored in namedCaptures. + */ +static bool InitNamedCaptures(JSContext* cx, HandleLinearString replacement, + HandleObject groups, size_t firstDollarIndex, + MutableHandle<CapturesVector> namedCaptures) { + Rooted<GCVector<jsid>> names(cx, GCVector<jsid>(cx)); + if (replacement->hasLatin1Chars()) { + if (!CollectNames<Latin1Char>(cx, replacement, firstDollarIndex, &names)) { + return false; + } + } else { + if (!CollectNames<char16_t>(cx, replacement, firstDollarIndex, &names)) { + return false; + } + } + + // https://tc39.es/ecma262/#table-45, "$<" section + RootedId id(cx); + RootedValue capture(cx); + for (uint32_t i = 0; i < names.length(); i++) { + // Step 2.d.i + id = names[i]; + + // Step 2.d.ii + if (!GetProperty(cx, groups, groups, id, &capture)) { + return false; + } + + // Step 2.d.iii + if (capture.isUndefined()) { + if (!namedCaptures.append(capture)) { + return false; + } + } else { + // Step 2.d.iv + JSString* str = ToString<CanGC>(cx, capture); + if (!str) { + return false; + } + JSLinearString* linear = str->ensureLinear(cx); + if (!linear) { + return false; + } + if (!namedCaptures.append(StringValue(linear))) { + return false; + } + } + } + + return true; +} + static bool -NeedTwoBytes(HandleLinearString string, HandleLinearString replacement, - HandleLinearString matched, Handle<GCVector<Value>> captures) +NeedTwoBytes(HandleLinearString string, HandleLinearString replacement, HandleLinearString matched, + Handle<CapturesVector> captures, Handle<CapturesVector> namedCaptures) { if (string->hasTwoByteChars()) return true; @@ -1449,19 +1643,38 @@ NeedTwoBytes(HandleLinearString string, HandleLinearString replacement, return true; } + for (size_t i = 0, len = namedCaptures.length(); i < len; i++) { + Value capture = namedCaptures[i]; + if (capture.isUndefined()) + continue; + if (capture.toString()->hasTwoByteChars()) + return true; + } + return false; } /* ES 2016 draft Mar 25, 2016 21.1.3.14.1. */ bool -js::RegExpGetSubstitution(JSContext* cx, HandleLinearString matched, HandleLinearString string, - size_t position, HandleObject capturesObj, HandleLinearString replacement, - size_t firstDollarIndex, MutableHandleValue rval) +js::RegExpGetSubstitution(JSContext* cx, HandleArrayObject matchResult, HandleLinearString string, + size_t position, HandleLinearString replacement, size_t firstDollarIndex, + HandleValue groups, MutableHandleValue rval) { MOZ_ASSERT(firstDollarIndex < replacement->length()); // Step 1 (skipped). + // Step 10 (reordered). + uint32_t matchResultLength = matchResult->length(); + MOZ_ASSERT(matchResultLength > 0); + MOZ_ASSERT(matchResultLength == matchResult->getDenseInitializedLength()); + + const Value& matchedValue = matchResult->getDenseElement(0); + RootedLinearString matched(cx, matchedValue.toString()->ensureLinear(cx)); + if (!matched) + return false; + + // Step 2. size_t matchLength = matched->length(); @@ -1470,33 +1683,36 @@ js::RegExpGetSubstitution(JSContext* cx, HandleLinearString matched, HandleLinea // Step 6. MOZ_ASSERT(position <= string->length()); - // Step 10 (reordered). - uint32_t nCaptures; - if (!GetLengthProperty(cx, capturesObj, &nCaptures)) - return false; - - Rooted<GCVector<Value>> captures(cx, GCVector<Value>(cx)); + uint32_t nCaptures = matchResultLength - 1; + Rooted<CapturesVector> captures(cx, CapturesVector(cx)); if (!captures.reserve(nCaptures)) return false; // Step 7. - RootedValue capture(cx); - for (uint32_t i = 0; i < nCaptures; i++) { - if (!GetElement(cx, capturesObj, capturesObj, i, &capture)) - return false; + for (uint32_t i = 1; i <= nCaptures; i++) { + const Value& capture = matchResult->getDenseElement(i); if (capture.isUndefined()) { captures.infallibleAppend(capture); continue; } - MOZ_ASSERT(capture.isString()); - RootedLinearString captureLinear(cx, capture.toString()->ensureLinear(cx)); + JSLinearString* captureLinear = capture.toString()->ensureLinear(cx); if (!captureLinear) return false; captures.infallibleAppend(StringValue(captureLinear)); } + Rooted<CapturesVector> namedCaptures(cx, CapturesVector(cx)); + if (groups.isObject()) { + RootedObject groupsObj(cx, &groups.toObject()); + if (!InitNamedCaptures(cx, replacement, groupsObj, firstDollarIndex, &namedCaptures)) { + return false; + } + } else { + MOZ_ASSERT(groups.isUndefined()); + } + // Step 8 (skipped). // Step 9. @@ -1511,14 +1727,14 @@ js::RegExpGetSubstitution(JSContext* cx, HandleLinearString matched, HandleLinea // Step 11. size_t reserveLength; - if (!FindReplaceLength(cx, matched, string, position, tailPos, &captures, replacement, - firstDollarIndex, &reserveLength)) + if (!FindReplaceLength(cx, matched, string, position, tailPos, captures, namedCaptures, + replacement, firstDollarIndex, &reserveLength)) { return false; } StringBuffer result(cx); - if (NeedTwoBytes(string, replacement, matched, captures)) { + if (NeedTwoBytes(string, replacement, matched, captures, namedCaptures)) { if (!result.ensureTwoByteChars()) return false; } @@ -1527,10 +1743,10 @@ js::RegExpGetSubstitution(JSContext* cx, HandleLinearString matched, HandleLinea return false; if (replacement->hasLatin1Chars()) { - DoReplace<Latin1Char>(matched, string, position, tailPos, &captures, + DoReplace<Latin1Char>(matched, string, position, tailPos, captures, namedCaptures, replacement, firstDollarIndex, result); } else { - DoReplace<char16_t>(matched, string, position, tailPos, &captures, + DoReplace<char16_t>(matched, string, position, tailPos, captures, namedCaptures, replacement, firstDollarIndex, result); } diff --git a/js/src/builtin/RegExp.h b/js/src/builtin/RegExp.h index 275efd7ce3..f66c9b1b81 100644 --- a/js/src/builtin/RegExp.h +++ b/js/src/builtin/RegExp.h @@ -36,7 +36,8 @@ ExecuteRegExpLegacy(JSContext* cx, RegExpStatics* res, Handle<RegExpObject*> reo /* Translation from MatchPairs to a JS array in regexp_exec()'s output format. */ MOZ_MUST_USE bool -CreateRegExpMatchResult(JSContext* cx, HandleString input, const MatchPairs& matches, +CreateRegExpMatchResult(JSContext* cx, RegExpShared& re, + HandleString input, const MatchPairs& matches, MutableHandleValue rval); extern MOZ_MUST_USE bool @@ -121,9 +122,9 @@ extern MOZ_MUST_USE bool RegExpInstanceOptimizableRaw(JSContext* cx, JSObject* obj, JSObject* proto); extern MOZ_MUST_USE bool -RegExpGetSubstitution(JSContext* cx, HandleLinearString matched, HandleLinearString string, - size_t position, HandleObject capturesObj, HandleLinearString replacement, - size_t firstDollarIndex, MutableHandleValue rval); +RegExpGetSubstitution(JSContext* cx, HandleArrayObject matchResult, HandleLinearString string, + size_t position, HandleLinearString replacement, size_t firstDollarIndex, + HandleValue namedCaptures, MutableHandleValue rval); extern MOZ_MUST_USE bool GetFirstDollarIndex(JSContext* cx, unsigned argc, Value* vp); diff --git a/js/src/builtin/RegExp.js b/js/src/builtin/RegExp.js index 879375b988..ab4d76f4ca 100644 --- a/js/src/builtin/RegExp.js +++ b/js/src/builtin/RegExp.js @@ -395,9 +395,8 @@ function RegExpReplaceSlowPath(rx, S, lengthS, replaceValue, var n, capN, replacement; if (functionalReplace || firstDollarIndex !== -1) { - // Steps 14.g-j. + // Steps 14.g-k. replacement = RegExpGetComplexReplacement(result, matched, S, position, - nCaptures, replaceValue, functionalReplace, firstDollarIndex); } else { @@ -411,16 +410,21 @@ function RegExpReplaceSlowPath(rx, S, lengthS, replaceValue, if (capN !== undefined) ToString(capN); } + // Step 14.j, 14.l., GetSubstitution Step 11. + // We don't need namedCaptures, but ToObject is visible to script. + var namedCaptures = result.groups; + if (namedCaptures !== undefined) + ToObject(namedCaptures); replacement = replaceValue; } - // Step 14.l. + // Step 14.m. if (position >= nextSourcePosition) { - // Step 14.l.ii. + // Step 14.m.ii. accumulatedResult += Substring(S, nextSourcePosition, position - nextSourcePosition) + replacement; - // Step 14.l.iii. + // Step 14.m.iii. nextSourcePosition = position + matchLength; } } @@ -433,15 +437,14 @@ function RegExpReplaceSlowPath(rx, S, lengthS, replaceValue, return accumulatedResult + Substring(S, nextSourcePosition, lengthS - nextSourcePosition); } -// ES 2017 draft rev 03bfda119d060aca4099d2b77cf43f6d4f11cfa2 21.2.5.8 -// steps 14.g-k. +// ES 2021 draft 21.2.5.10 +// https://tc39.es/ecma262/#sec-regexp.prototype-@@replace +// steps 14.g-l. // Calculates functional/substitution replaceement from match result. // Used in the following functions: // * RegExpGlobalReplaceOptFunc // * RegExpGlobalReplaceOptElemBase -// * RegExpGlobalReplaceOptSubst // * RegExpLocalReplaceOptFunc -// * RegExpLocalReplaceOptSubst // * RegExpReplaceSlowPath function RegExpGetComplexReplacement(result, matched, S, position, nCaptures, replaceValue, @@ -451,13 +454,8 @@ function RegExpGetComplexReplacement(result, matched, S, position, var captures = []; var capturesLength = 0; - // Step 14.j.i (reordered). - // For `nCaptures` <= 4 case, call `replaceValue` directly, otherwise - // use `std_Function_apply` with all arguments stored in `captures`. - // In latter case, store `matched` as the first element here, to - // avoid unshift later. - if (functionalReplace && nCaptures > 4) - _DefineDataProperty(captures, capturesLength++, matched); + // Step 14.k.i (reordered). + _DefineDataProperty(captures, capturesLength++, matched); // Step 14.g, 14.i, 14.i.iv. for (var n = 1; n <= nCaptures; n++) { @@ -473,29 +471,41 @@ function RegExpGetComplexReplacement(result, matched, S, position, } // Step 14.j. + var namedCaptures = result.groups; + + // Step 14.k. if (functionalReplace) { - switch (nCaptures) { - case 0: - return ToString(replaceValue(matched, position, S)); - case 1: - return ToString(replaceValue(matched, SPREAD(captures, 1), position, S)); - case 2: - return ToString(replaceValue(matched, SPREAD(captures, 2), position, S)); - case 3: - return ToString(replaceValue(matched, SPREAD(captures, 3), position, S)); - case 4: - return ToString(replaceValue(matched, SPREAD(captures, 4), position, S)); - default: - // Steps 14.j.ii-v. - _DefineDataProperty(captures, capturesLength++, position); - _DefineDataProperty(captures, capturesLength++, S); - return ToString(callFunction(std_Function_apply, replaceValue, null, captures)); + // For `nCaptures` <= 4 case, call `replaceValue` directly, otherwise + // use `std_Function_apply` with all arguments stored in `captures`. + if (namedCaptures === undefined) { + switch (nCaptures) { + case 0: + return ToString(replaceValue(SPREAD(captures, 1), position, S)); + case 1: + return ToString(replaceValue(SPREAD(captures, 2), position, S)); + case 2: + return ToString(replaceValue(SPREAD(captures, 3), position, S)); + case 3: + return ToString(replaceValue(SPREAD(captures, 4), position, S)); + case 4: + return ToString(replaceValue(SPREAD(captures, 5), position, S)); + } + } + // Steps 14.k.ii-v. + _DefineDataProperty(captures, capturesLength++, position); + _DefineDataProperty(captures, capturesLength++, S); + if (namedCaptures !== undefined) { + _DefineDataProperty(captures, capturesLength++, namedCaptures); } + return ToString(callFunction(std_Function_apply, replaceValue, undefined, captures)); } - // Steps 14.k.i. - return RegExpGetSubstitution(matched, S, position, captures, replaceValue, - firstDollarIndex); + // Step 14.l. + if (namedCaptures !== undefined) { + namedCaptures = ToObject(namedCaptures); + } + return RegExpGetSubstitution(captures, S, position, replaceValue, firstDollarIndex, + namedCaptures); } // ES 2017 draft rev 03bfda119d060aca4099d2b77cf43f6d4f11cfa2 21.2.5.8 diff --git a/js/src/builtin/RegExpGlobalReplaceOpt.h.js b/js/src/builtin/RegExpGlobalReplaceOpt.h.js index fbe50a3f9c..8b82fc31d4 100644 --- a/js/src/builtin/RegExpGlobalReplaceOpt.h.js +++ b/js/src/builtin/RegExpGlobalReplaceOpt.h.js @@ -53,7 +53,7 @@ function FUNC_NAME(rx, S, lengthS, replaceValue, fullUnicode break; var nCaptures; -#if defined(FUNCTIONAL) || defined(SUBSTITUTION) +#if defined(FUNCTIONAL) // Steps 14.a-b. nCaptures = std_Math_max(result.length - 1, 0); #endif @@ -68,18 +68,19 @@ function FUNC_NAME(rx, S, lengthS, replaceValue, fullUnicode var position = result.index; lastIndex = position + matchLength; - // Steps g-j. + // Steps g-l. var replacement; #if defined(FUNCTIONAL) replacement = RegExpGetComplexReplacement(result, matched, S, position, - nCaptures, replaceValue, true, -1); -#elif defined(SUBSTITUTION) - replacement = RegExpGetComplexReplacement(result, matched, S, position, - - nCaptures, replaceValue, - false, firstDollarIndex); +#elif defined(SUBSTITUTION) // Step l.i + var namedCaptures = result.groups; + if (namedCaptures !== undefined) { + namedCaptures = ToObject(namedCaptures); + } + // Step l.ii + replacement = RegExpGetSubstitution(result, S, position, replaceValue, firstDollarIndex, namedCaptures); #elif defined(ELEMBASE) if (IsObject(elemBase)) { var prop = GetStringDataProperty(elemBase, matched); @@ -96,7 +97,6 @@ function FUNC_NAME(rx, S, lengthS, replaceValue, fullUnicode nCaptures = std_Math_max(result.length - 1, 0); replacement = RegExpGetComplexReplacement(result, matched, S, position, - nCaptures, replaceValue, true, -1); } @@ -104,11 +104,11 @@ function FUNC_NAME(rx, S, lengthS, replaceValue, fullUnicode replacement = replaceValue; #endif - // Step 14.l.ii. + // Step 14.m.ii. accumulatedResult += Substring(S, nextSourcePosition, position - nextSourcePosition) + replacement; - // Step 14.l.iii. + // Step 14.m.iii. nextSourcePosition = lastIndex; // Step 11.c.iii.2. @@ -116,6 +116,7 @@ function FUNC_NAME(rx, S, lengthS, replaceValue, fullUnicode lastIndex = fullUnicode ? AdvanceStringIndex(S, lastIndex) : lastIndex + 1; if (lastIndex > lengthS) break; + lastIndex |= 0; } } diff --git a/js/src/builtin/RegExpLocalReplaceOpt.h.js b/js/src/builtin/RegExpLocalReplaceOpt.h.js index 1acd6a73a4..ac74d17ada 100644 --- a/js/src/builtin/RegExpLocalReplaceOpt.h.js +++ b/js/src/builtin/RegExpLocalReplaceOpt.h.js @@ -60,9 +60,9 @@ function FUNC_NAME(rx, S, lengthS, replaceValue return S; } - // Steps 11.c, 12-13, 14.a-b (skipped). + // Steps 11.c, 12-13 (skipped). -#if defined(FUNCTIONAL) || defined(SUBSTITUTION) +#if defined(FUNCTIONAL) // Steps 14.a-b. var nCaptures = std_Math_max(result.length - 1, 0); #endif @@ -88,19 +88,21 @@ function FUNC_NAME(rx, S, lengthS, replaceValue // Steps g-j. #if defined(FUNCTIONAL) replacement = RegExpGetComplexReplacement(result, matched, S, position, - nCaptures, replaceValue, true, -1); #elif defined(SUBSTITUTION) - replacement = RegExpGetComplexReplacement(result, matched, S, position, - - nCaptures, replaceValue, - false, firstDollarIndex); + // Step l.i + var namedCaptures = result.groups; + if (namedCaptures !== undefined) { + namedCaptures = ToObject(namedCaptures); + } + // Step l.ii + replacement = RegExpGetSubstitution(result, S, position, replaceValue, firstDollarIndex, namedCaptures); #else replacement = replaceValue; #endif - // Step 14.l.ii. + // Step 14.m.ii. var accumulatedResult = Substring(S, 0, position) + replacement; // Step 15. diff --git a/js/src/builtin/TestingFunctions.cpp b/js/src/builtin/TestingFunctions.cpp index 8bcae4d826..cb691893f2 100644 --- a/js/src/builtin/TestingFunctions.cpp +++ b/js/src/builtin/TestingFunctions.cpp @@ -3827,10 +3827,10 @@ ConvertRegExpTreeToObject(JSContext* cx, irregexp::RegExpTree* tree) return nullptr; return obj; } - if (tree->IsLookahead()) { - if (!StringProp(cx, obj, "type", "Lookahead")) + if (tree->IsLookaround()) { + if (!StringProp(cx, obj, "type", "Lookaround")) return nullptr; - irregexp::RegExpLookahead* t = tree->AsLookahead(); + irregexp::RegExpLookaround* t = tree->AsLookaround(); if (!BooleanProp(cx, obj, "is_positive", t->is_positive())) return nullptr; if (!TreeProp(cx, obj, "body", t->body())) diff --git a/js/src/frontend/TokenStream.cpp b/js/src/frontend/TokenStream.cpp index b464b23048..2539249ad9 100644 --- a/js/src/frontend/TokenStream.cpp +++ b/js/src/frontend/TokenStream.cpp @@ -24,6 +24,7 @@ #include "frontend/BytecodeCompiler.h" #include "frontend/ReservedWords.h" +#include "irregexp/FeatureFlags.h" #include "js/CharacterEncoding.h" #include "js/UniquePtr.h" #include "vm/HelperThreads.h" @@ -1942,6 +1943,8 @@ TokenStream::getTokenInternal(TokenKind* ttp, Modifier modifier) reflags = RegExpFlag(reflags | UnicodeFlag); else if (c == 's' && !(reflags & DotAllFlag)) reflags = RegExpFlag(reflags | DotAllFlag); + else if (c == 'v' && irregexp::kParseFlagUnicodeSetsAsUnicode && !(reflags & UnicodeFlag)) + reflags = RegExpFlag(reflags | UnicodeFlag); else break; getChar(); diff --git a/js/src/irregexp/FeatureFlags.h b/js/src/irregexp/FeatureFlags.h new file mode 100644 index 0000000000..1e0178b926 --- /dev/null +++ b/js/src/irregexp/FeatureFlags.h @@ -0,0 +1,20 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef irregexp_FeatureFlags_h +#define irregexp_FeatureFlags_h + +namespace js { + +namespace irregexp { + +// Feature flag to treat /../v as /../u (https://v8.dev/features/regexp-v-flag) +// We don't support Set Notation or the changed Case Insenstive handling +// but we have Property Sequences and want them in unit test runs. +static const bool kParseFlagUnicodeSetsAsUnicode = false; + +} } // namespace js::irregexp + +#endif // irregexp_FeatureFlags_h diff --git a/js/src/irregexp/InfallibleVector.h b/js/src/irregexp/InfallibleVector.h new file mode 100644 index 0000000000..7363ecb1e1 --- /dev/null +++ b/js/src/irregexp/InfallibleVector.h @@ -0,0 +1,103 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+
+// Copyright 2012 the V8 project authors. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef V8_INFALLIBLEVECTOR_H_
+#define V8_INFALLIBLEVECTOR_H_
+
+namespace js {
+namespace irregexp {
+
+// InfallibleVector is like Vector, but all its methods are infallible (they
+// crash on OOM). We use this class instead of Vector to avoid a ton of
+// MOZ_MUST_USE warnings in irregexp code (imported from V8).
+template<typename T, size_t N>
+class InfallibleVector
+{
+ Vector<T, N, LifoAllocPolicy<Infallible>> vector_;
+
+ InfallibleVector(const InfallibleVector&) = delete;
+ void operator=(const InfallibleVector&) = delete;
+
+ public:
+ explicit InfallibleVector(const LifoAllocPolicy<Infallible>& alloc) : vector_(alloc) {}
+
+ void append(const T& t) { MOZ_ALWAYS_TRUE(vector_.append(t)); }
+ void append(const T* begin, size_t length) { MOZ_ALWAYS_TRUE(vector_.append(begin, length)); }
+
+ // Move a number of elements in a zonelist to another position
+ // in the same list. Handles overlapping source and target areas.
+ void moveReplace(int from, int to, int count)
+ {
+ T* array = begin();
+ if (from < to) {
+ for (int i = count - 1; i >= 0; i--)
+ array[to + i] = array[from + i];
+ } else {
+ for (int i = 0; i < count; i++)
+ array[to + i] = array[from + i];
+ }
+ }
+
+ void clear() { vector_.clear(); }
+ void popBack() { vector_.popBack(); }
+ void reserve(size_t n) { MOZ_ALWAYS_TRUE(vector_.reserve(n)); }
+
+
+ size_t length() const { return vector_.length(); }
+ T popCopy() { return vector_.popCopy(); }
+
+ T* begin() { return vector_.begin(); }
+ const T* begin() const { return vector_.begin(); }
+
+ T* end() { return vector_.end(); }
+ const T* end() const { return vector_.end(); }
+
+ T& operator[](size_t index) { return vector_[index]; }
+ const T& operator[](size_t index) const { return vector_[index]; }
+
+ InfallibleVector& operator=(InfallibleVector&& rhs) { vector_ = Move(rhs.vector_); return *this; }
+
+ bool equals(const InfallibleVector& other) const {
+ if (length() != other.length()) {
+ return false;
+ }
+ return 0 == memcmp(begin(), other.begin(), length() * sizeof(T));
+ }
+ inline bool operator==(const InfallibleVector& rhs) const {
+ return equals(rhs);
+ }
+};
+
+typedef InfallibleVector<char16_t, 10> CharacterVector;
+typedef InfallibleVector<CharacterVector*, 1> CharacterVectorVector;
+typedef InfallibleVector<int32_t, 10> IntegerVector;
+
+} } // namespace js::irregexp
+
+#endif // V8_INFALLIBLEVECTOR_H_
\ No newline at end of file diff --git a/js/src/irregexp/NativeRegExpMacroAssembler.cpp b/js/src/irregexp/NativeRegExpMacroAssembler.cpp index a3756f5fff..41c1951bc2 100644 --- a/js/src/irregexp/NativeRegExpMacroAssembler.cpp +++ b/js/src/irregexp/NativeRegExpMacroAssembler.cpp @@ -71,13 +71,13 @@ NativeRegExpMacroAssembler::NativeRegExpMacroAssembler(LifoAlloc* alloc, RegExpS // Find physical registers for each compiler register. AllocatableGeneralRegisterSet regs(GeneralRegisterSet::All()); + temp0 = regs.takeAny(); + temp1 = regs.takeAny(); + temp2 = regs.takeAny(); input_end_pointer = regs.takeAny(); current_character = regs.takeAny(); current_position = regs.takeAny(); backtrack_stack_pointer = regs.takeAny(); - temp0 = regs.takeAny(); - temp1 = regs.takeAny(); - temp2 = regs.takeAny(); JitSpew(JitSpew_Codegen, "Starting RegExp (input_end_pointer %s) (current_character %s)" @@ -548,39 +548,28 @@ NativeRegExpMacroAssembler::Bind(Label* label) } void -NativeRegExpMacroAssembler::CheckAtStart(Label* on_at_start) -{ - JitSpew(SPEW_PREFIX "CheckAtStart"); - - Label not_at_start; - - // Did we start the match at the start of the string at all? - Address startIndex(masm.getStackPointer(), offsetof(FrameData, startIndex)); - masm.branchPtr(Assembler::NotEqual, startIndex, ImmWord(0), ¬_at_start); - - // If we did, are we still at the start of the input? - masm.computeEffectiveAddress(BaseIndex(input_end_pointer, current_position, TimesOne), temp0); +NativeRegExpMacroAssembler::CheckAtStartImpl(int cp_offset, Label* on_cond, + Assembler::Condition cond) { + masm.computeEffectiveAddress(BaseIndex(input_end_pointer, current_position, TimesOne, cp_offset * char_size()), temp0); Address inputStart(masm.getStackPointer(), offsetof(FrameData, inputStart)); - masm.branchPtr(Assembler::Equal, inputStart, temp0, BranchOrBacktrack(on_at_start)); + masm.branchPtr(cond, inputStart, temp0, BranchOrBacktrack(on_cond)); +} + +void +NativeRegExpMacroAssembler::CheckAtStart(int cp_offset, Label* on_at_start) +{ + JitSpew(SPEW_PREFIX "CheckAtStart"); - masm.bind(¬_at_start); + CheckAtStartImpl(cp_offset, on_at_start, Assembler::Equal); } void -NativeRegExpMacroAssembler::CheckNotAtStart(Label* on_not_at_start) +NativeRegExpMacroAssembler::CheckNotAtStart(int cp_offset, Label* on_not_at_start) { JitSpew(SPEW_PREFIX "CheckNotAtStart"); - // Did we start the match at the start of the string at all? - Address startIndex(masm.getStackPointer(), offsetof(FrameData, startIndex)); - masm.branchPtr(Assembler::NotEqual, startIndex, ImmWord(0), BranchOrBacktrack(on_not_at_start)); - - // If we did, are we still at the start of the input? - masm.computeEffectiveAddress(BaseIndex(input_end_pointer, current_position, TimesOne), temp0); - - Address inputStart(masm.getStackPointer(), offsetof(FrameData, inputStart)); - masm.branchPtr(Assembler::NotEqual, inputStart, temp0, BranchOrBacktrack(on_not_at_start)); + CheckAtStartImpl(cp_offset, on_not_at_start, Assembler::NotEqual); } void @@ -659,211 +648,204 @@ NativeRegExpMacroAssembler::CheckGreedyLoop(Label* on_tos_equals_current_positio } void -NativeRegExpMacroAssembler::CheckNotBackReference(int start_reg, Label* on_no_match) +NativeRegExpMacroAssembler::CheckNotBackReferenceImpl(int start_reg, bool read_backward, + Label* on_no_match, + bool unicode, bool ignore_case) { - JitSpew(SPEW_PREFIX "CheckNotBackReference(%d)", start_reg); - Label fallthrough; - Label success; - Label fail; - - // Find length of back-referenced capture. - masm.loadPtr(register_location(start_reg), current_character); - masm.loadPtr(register_location(start_reg + 1), temp0); - masm.subPtr(current_character, temp0); // Length to check. - // Fail on partial or illegal capture (start of capture after end of capture). - masm.branchPtr(Assembler::LessThan, temp0, ImmWord(0), BranchOrBacktrack(on_no_match)); + // Captures are stored as a sequential pair of registers. + // Find the length of the back-referenced capture and load the + // capture's start index into current_character_ + masm.loadPtr(register_location(start_reg), current_character); // Index of start of capture + masm.loadPtr(register_location(start_reg + 1), temp0); // Index of end of capture + masm.subPtr(current_character, temp0); // Length of capture. - // Succeed on empty capture (including no capture). + // If length is zero, either the capture is empty or it is completely + // uncaptured. In either case succeed immediately. masm.branchPtr(Assembler::Equal, temp0, ImmWord(0), &fallthrough); // Check that there are sufficient characters left in the input. - masm.movePtr(current_position, temp1); - masm.addPtr(temp0, temp1); - masm.branchPtr(Assembler::GreaterThan, temp1, ImmWord(0), BranchOrBacktrack(on_no_match)); - - // Save register to make it available below. - masm.push(backtrack_stack_pointer); - - // Compute pointers to match string and capture string - masm.computeEffectiveAddress(BaseIndex(input_end_pointer, current_position, TimesOne), temp1); // Start of match. - masm.addPtr(input_end_pointer, current_character); // Start of capture. - masm.computeEffectiveAddress(BaseIndex(temp0, temp1, TimesOne), backtrack_stack_pointer); // End of match. - - Label loop; - masm.bind(&loop); - if (mode_ == ASCII) { - masm.load8ZeroExtend(Address(current_character, 0), temp0); - masm.load8ZeroExtend(Address(temp1, 0), temp2); + if (read_backward) { + // If start + len > current, there isn't enough room for a + // lookbehind backreference. + Address inputStart(masm.getStackPointer(), offsetof(FrameData, inputStart)); + masm.loadPtr(inputStart, temp1); + masm.subPtr(input_end_pointer, temp1); + masm.addPtr(temp0, temp1); + masm.branchPtr(Assembler::GreaterThan, temp1, current_position, + BranchOrBacktrack(on_no_match)); } else { - MOZ_ASSERT(mode_ == CHAR16); - masm.load16ZeroExtend(Address(current_character, 0), temp0); - masm.load16ZeroExtend(Address(temp1, 0), temp2); + // current_position is the negative offset from the end. + // If current + len > 0, there isn't enough room for a backreference. + masm.movePtr(current_position, temp1); + masm.addPtr(temp0, temp1); + masm.branchPtr(Assembler::GreaterThan, temp1, ImmWord(0), + BranchOrBacktrack(on_no_match)); } - masm.branch32(Assembler::NotEqual, temp0, temp2, &fail); - // Increment pointers into capture and match string. - masm.addPtr(Imm32(char_size()), current_character); - masm.addPtr(Imm32(char_size()), temp1); - - // Check if we have reached end of match area. - masm.branchPtr(Assembler::Below, temp1, backtrack_stack_pointer, &loop); - masm.jump(&success); - - masm.bind(&fail); - - // Restore backtrack stack pointer. - masm.pop(backtrack_stack_pointer); - JumpOrBacktrack(on_no_match); - - masm.bind(&success); - - // Move current character position to position after match. - masm.movePtr(backtrack_stack_pointer, current_position); - masm.subPtr(input_end_pointer, current_position); - - // Restore backtrack stack pointer. - masm.pop(backtrack_stack_pointer); - - masm.bind(&fallthrough); -} - -void -NativeRegExpMacroAssembler::CheckNotBackReferenceIgnoreCase(int start_reg, Label* on_no_match, - bool unicode) -{ - JitSpew(SPEW_PREFIX "CheckNotBackReferenceIgnoreCase(%d, %d)", start_reg, unicode); - - Label fallthrough; + if (mode_ == CHAR16 && ignore_case) { + // We call a helper function for case-insensitive non-latin1 strings. + // Save volatile regs. temp1, temp2, and current_character + // don't need to be saved. current_position needs to be saved + // even if it's non-volatile, because we modify it to use as an argument. + LiveGeneralRegisterSet volatileRegs(GeneralRegisterSet::Volatile()); + volatileRegs.addUnchecked(current_position); + volatileRegs.takeUnchecked(temp1); + volatileRegs.takeUnchecked(temp2); + volatileRegs.takeUnchecked(current_character); + masm.PushRegsInMask(volatileRegs); - masm.loadPtr(register_location(start_reg), current_character); // Index of start of capture - masm.loadPtr(register_location(start_reg + 1), temp1); // Index of end of capture - masm.subPtr(current_character, temp1); // Length of capture. + // Parameters are + // Address byte_offset1 - Address captured substring's start. + // Address byte_offset2 - Address of current character position. + // size_t byte_length - length of capture in bytes(!) - // The length of a capture should not be negative. This can only happen - // if the end of the capture is unrecorded, or at a point earlier than - // the start of the capture. - masm.branchPtr(Assembler::LessThan, temp1, ImmWord(0), BranchOrBacktrack(on_no_match)); + // Set byte_offset1. + // Start of capture, where current_character already holds string-end negative offset. + masm.addPtr(input_end_pointer, current_character); - // If length is zero, either the capture is empty or it is completely - // uncaptured. In either case succeed immediately. - masm.branchPtr(Assembler::Equal, temp1, ImmWord(0), &fallthrough); + // Set byte_offset2. + // Found by adding negative string-end offset of current position + // to end of string. + masm.addPtr(input_end_pointer, current_position); + if (read_backward) { + // Offset by length when matching backwards. + masm.subPtr(temp1, current_position); + } - // Check that there are sufficient characters left in the input. - masm.movePtr(current_position, temp0); - masm.addPtr(temp1, temp0); - masm.branchPtr(Assembler::GreaterThan, temp0, ImmWord(0), BranchOrBacktrack(on_no_match)); + masm.setupUnalignedABICall(temp1); + masm.passABIArg(current_character); + masm.passABIArg(current_position); + masm.passABIArg(temp0); + if (unicode) { + int (*fun)(const char16_t*, const char16_t*, size_t) = CaseInsensitiveCompareUCStrings; + masm.callWithABI(JS_FUNC_TO_DATA_PTR(void*, fun)); + } else { + int (*fun)(const char16_t*, const char16_t*, size_t) = CaseInsensitiveCompareStrings; + masm.callWithABI(JS_FUNC_TO_DATA_PTR(void*, fun)); + } + masm.storeCallInt32Result(temp1); + masm.PopRegsInMask(volatileRegs); + // Check if function returned non-zero for success or zero for failure. + masm.branchTest32(Assembler::Zero, temp1, temp1, BranchOrBacktrack(on_no_match)); - if (mode_ == ASCII) { - Label success, fail; + // On success, advance position by length of capture + if (read_backward) { + masm.subPtr(temp0, current_position); + } else { + masm.addPtr(temp0, current_position); + } + } else { + MOZ_ASSERT(mode_ == ASCII || !ignore_case); // Save register contents to make the registers available below. After - // this, the temp0, temp2, and current_position registers are available. + // this, the temp1, temp2, and current_position registers are available. masm.push(current_position); + // Make offset values into pointers masm.addPtr(input_end_pointer, current_character); // Start of capture. masm.addPtr(input_end_pointer, current_position); // Start of text to match against capture. - masm.addPtr(current_position, temp1); // End of text to match against capture. - - Label loop, loop_increment; - masm.bind(&loop); - masm.load8ZeroExtend(Address(current_position, 0), temp0); - masm.load8ZeroExtend(Address(current_character, 0), temp2); - masm.branch32(Assembler::Equal, temp0, temp2, &loop_increment); - - // Mismatch, try case-insensitive match (converting letters to lower-case). - masm.or32(Imm32(0x20), temp0); // Convert match character to lower-case. - - // Is temp0 a lowercase letter? - Label convert_capture; - masm.computeEffectiveAddress(Address(temp0, -'a'), temp2); - masm.branch32(Assembler::BelowOrEqual, temp2, Imm32(static_cast<int32_t>('z' - 'a')), - &convert_capture); - // Latin-1: Check for values in range [224,254] but not 247. - masm.sub32(Imm32(224 - 'a'), temp2); - masm.branch32(Assembler::Above, temp2, Imm32(254 - 224), &fail); - - // Check for 247. - masm.branch32(Assembler::Equal, temp2, Imm32(247 - 224), &fail); + if (read_backward) { + // Offset by length when matching backwards. + masm.subPtr(temp0, current_position); + } - masm.bind(&convert_capture); + // End of text to match against capture (temp0 is pointer now) + masm.addPtr(current_position, temp0); - // Also convert capture character. - masm.load8ZeroExtend(Address(current_character, 0), temp2); - masm.or32(Imm32(0x20), temp2); + Label success, fail, loop; + masm.bind(&loop); - masm.branch32(Assembler::NotEqual, temp0, temp2, &fail); + // Load next character from each string. + if (mode_ == ASCII) { + masm.load8ZeroExtend(Address(current_character, 0), temp1); + masm.load8ZeroExtend(Address(current_position, 0), temp2); + } else { + masm.load16ZeroExtend(Address(current_character, 0), temp1); + masm.load16ZeroExtend(Address(current_position, 0), temp2); + } - masm.bind(&loop_increment); + if (ignore_case) { + MOZ_ASSERT(mode_ == ASCII); + Label loop_increment, convert_match; + + // Try exact match. + masm.branch32(Assembler::Equal, temp1, temp2, &loop_increment); + + // Mismatch, try case-insensitive match (converting letters to lower-case). + masm.or32(Imm32(0x20), temp1); // Convert match character to lower-case. + + // Is temp1 a lowercase letter [a,z]? + masm.computeEffectiveAddress(Address(temp1, -'a'), temp2); + masm.branch32(Assembler::BelowOrEqual, temp2, Imm32(static_cast<int32_t>('z' - 'a')), + &convert_match); + // Latin-1: Check for values in range [224,254] but not 247 (U+00F7 DIVISION SIGN). + masm.sub32(Imm32(224 - 'a'), temp2); + masm.branch32(Assembler::Above, temp2, Imm32(254 - 224), &fail); + // Check for 247. + masm.branch32(Assembler::Equal, temp2, Imm32(247 - 224), &fail); + + // Capture character is lower case. Convert match character to lower case and compare + masm.bind(&convert_match); + // Reload latin1 character since temp2 was clobbered above + masm.load8ZeroExtend(Address(current_position, 0), temp2); + masm.or32(Imm32(0x20), temp2); + masm.branch32(Assembler::NotEqual, temp1, temp2, &fail); + + masm.bind(&loop_increment); + } else { + // Fail if characters do not match. + masm.branch32(Assembler::NotEqual, temp1, temp2, &fail); + } // Increment pointers into match and capture strings. - masm.addPtr(Imm32(1), current_character); - masm.addPtr(Imm32(1), current_position); + masm.addPtr(Imm32(char_size()), current_character); + masm.addPtr(Imm32(char_size()), current_position); - // Compare to end of match, and loop if not done. - masm.branchPtr(Assembler::Below, current_position, temp1, &loop); + // Loop if we have not reached the end of the match string. + masm.branchPtr(Assembler::Below, current_position, temp0, &loop); masm.jump(&success); - masm.bind(&fail); - // Restore original values before failing. + masm.bind(&fail); masm.pop(current_position); JumpOrBacktrack(on_no_match); masm.bind(&success); - // Drop original character position value. - masm.addToStackPtr(Imm32(sizeof(uintptr_t))); + masm.pop(temp0); - // Compute new value of character position after the matched part. + // current_position is a pointer (now at the end of the consumed characters). Convert it back to an offset. masm.subPtr(input_end_pointer, current_position); - } else { - MOZ_ASSERT(mode_ == CHAR16); - - // Note: temp1 needs to be saved/restored if it is volatile, as it is used after the call. - LiveGeneralRegisterSet volatileRegs(GeneralRegisterSet::Volatile()); - volatileRegs.takeUnchecked(temp0); - volatileRegs.takeUnchecked(temp2); - masm.PushRegsInMask(volatileRegs); - - // Set byte_offset1. - // Start of capture, where current_character already holds string-end negative offset. - masm.addPtr(input_end_pointer, current_character); - // Set byte_offset2. - // Found by adding negative string-end offset of current position - // to end of string. - masm.addPtr(input_end_pointer, current_position); - - // Parameters are - // Address byte_offset1 - Address captured substring's start. - // Address byte_offset2 - Address of current character position. - // size_t byte_length - length of capture in bytes(!) - masm.setupUnalignedABICall(temp0); - masm.passABIArg(current_character); - masm.passABIArg(current_position); - masm.passABIArg(temp1); - if (!unicode) { - int (*fun)(const char16_t*, const char16_t*, size_t) = CaseInsensitiveCompareStrings; - masm.callWithABI(JS_FUNC_TO_DATA_PTR(void*, fun)); - } else { - int (*fun)(const char16_t*, const char16_t*, size_t) = CaseInsensitiveCompareUCStrings; - masm.callWithABI(JS_FUNC_TO_DATA_PTR(void*, fun)); + if (read_backward) { + // Subtract match length if we matched backward + masm.addPtr(register_location(start_reg), current_position); + masm.subPtr(register_location(start_reg + 1), current_position); } - masm.storeCallInt32Result(temp0); + } - masm.PopRegsInMask(volatileRegs); + // Fallthrough if capture length was zero + masm.bind(&fallthrough); +} - // Check if function returned non-zero for success or zero for failure. - masm.branchTest32(Assembler::Zero, temp0, temp0, BranchOrBacktrack(on_no_match)); +void +NativeRegExpMacroAssembler::CheckNotBackReference(int start_reg, bool read_backward, Label* on_no_match) +{ + JitSpew(SPEW_PREFIX "CheckNotBackReference(%d)", start_reg); - // On success, increment position by length of capture. - masm.addPtr(temp1, current_position); - } + CheckNotBackReferenceImpl(start_reg, read_backward, on_no_match, /*unicode = */ false, /*ignore_case = */ false); +} - masm.bind(&fallthrough); +void +NativeRegExpMacroAssembler::CheckNotBackReferenceIgnoreCase(int start_reg, bool read_backward, + Label* on_no_match, bool unicode) +{ + JitSpew(SPEW_PREFIX "CheckNotBackReferenceIgnoreCase(%d, %d)", start_reg, unicode); + + CheckNotBackReferenceImpl(start_reg, read_backward, on_no_match, unicode, /*ignore_case = */ true); } void @@ -961,10 +943,13 @@ NativeRegExpMacroAssembler::LoadCurrentCharacter(int cp_offset, Label* on_end_of { JitSpew(SPEW_PREFIX "LoadCurrentCharacter(%d, %d)", cp_offset, characters); - MOZ_ASSERT(cp_offset >= -1); // ^ and \b can look behind one character. MOZ_ASSERT(cp_offset < (1<<30)); // Be sane! (And ensure negation works) if (check_bounds) - CheckPosition(cp_offset + characters - 1, on_end_of_input); + if (cp_offset >= 0) { + CheckPosition(cp_offset + characters - 1, on_end_of_input); + } else { + CheckPosition(cp_offset, on_end_of_input); + } LoadCurrentCharacterUnchecked(cp_offset, characters); } @@ -972,9 +957,8 @@ void NativeRegExpMacroAssembler::LoadCurrentCharacterUnchecked(int cp_offset, int characters) { JitSpew(SPEW_PREFIX "LoadCurrentCharacterUnchecked(%d, %d)", cp_offset, characters); - + BaseIndex address(input_end_pointer, current_position, TimesOne, cp_offset * char_size()); if (mode_ == ASCII) { - BaseIndex address(input_end_pointer, current_position, TimesOne, cp_offset); if (characters == 4) { masm.load32(address, current_character); } else if (characters == 2) { @@ -986,7 +970,6 @@ NativeRegExpMacroAssembler::LoadCurrentCharacterUnchecked(int cp_offset, int cha } else { MOZ_ASSERT(mode_ == CHAR16); MOZ_ASSERT(characters <= 2); - BaseIndex address(input_end_pointer, current_position, TimesOne, cp_offset * sizeof(char16_t)); if (characters == 2) masm.load32(address, current_character); else @@ -1096,10 +1079,11 @@ NativeRegExpMacroAssembler::CheckBacktrackStackLimit() masm.moveStackPtrTo(temp2); masm.call(&stack_overflow_label_); - masm.bind(&no_stack_overflow); // Exit with an exception if the call failed. masm.branchTest32(Assembler::Zero, temp0, temp0, &exit_with_exception_label_); + + masm.bind(&no_stack_overflow); } void @@ -1213,8 +1197,21 @@ void NativeRegExpMacroAssembler::CheckPosition(int cp_offset, Label* on_outside_input) { JitSpew(SPEW_PREFIX "CheckPosition(%d)", cp_offset); - masm.branchPtr(Assembler::GreaterThanOrEqual, current_position, - ImmWord(-cp_offset * char_size()), BranchOrBacktrack(on_outside_input)); + if (cp_offset >= 0) { + // end + current + offset >= end + // <=> current + offset >= 0 + // <=> current >= -offset + masm.branchPtr(Assembler::GreaterThanOrEqual, current_position, + ImmWord(-cp_offset * char_size()), BranchOrBacktrack(on_outside_input)); + } else { + // negative cp_offset means we're reading backwards, check against start of string + // Compute offset address + masm.computeEffectiveAddress(BaseIndex(input_end_pointer, current_position, TimesOne, cp_offset * char_size()), temp0); + + // Compare to start of input. + Address inputStart(masm.getStackPointer(), offsetof(FrameData, inputStart)); + masm.branchPtr(Assembler::GreaterThan, inputStart, temp0, BranchOrBacktrack(on_outside_input)); + } } Label* diff --git a/js/src/irregexp/NativeRegExpMacroAssembler.h b/js/src/irregexp/NativeRegExpMacroAssembler.h index 6bb14ab662..857900cabf 100644 --- a/js/src/irregexp/NativeRegExpMacroAssembler.h +++ b/js/src/irregexp/NativeRegExpMacroAssembler.h @@ -98,15 +98,16 @@ class MOZ_STACK_CLASS NativeRegExpMacroAssembler final : public RegExpMacroAssem void AdvanceRegister(int reg, int by); void Backtrack(); void Bind(jit::Label* label); - void CheckAtStart(jit::Label* on_at_start); + void CheckAtStart(int cp_offset, jit::Label* on_at_start); void CheckCharacter(unsigned c, jit::Label* on_equal); void CheckCharacterAfterAnd(unsigned c, unsigned and_with, jit::Label* on_equal); void CheckCharacterGT(char16_t limit, jit::Label* on_greater); void CheckCharacterLT(char16_t limit, jit::Label* on_less); void CheckGreedyLoop(jit::Label* on_tos_equals_current_position); - void CheckNotAtStart(jit::Label* on_not_at_start); - void CheckNotBackReference(int start_reg, jit::Label* on_no_match); - void CheckNotBackReferenceIgnoreCase(int start_reg, jit::Label* on_no_match, bool unicode); + void CheckNotAtStart(int cp_offset, jit::Label* on_not_at_start); + void CheckNotBackReference(int start_reg, bool read_backward, jit::Label* on_no_match); + void CheckNotBackReferenceIgnoreCase(int start_reg, bool read_backward, + jit::Label* on_no_match, bool unicode); void CheckNotCharacter(unsigned c, jit::Label* on_not_equal); void CheckNotCharacterAfterAnd(unsigned c, unsigned and_with, jit::Label* on_not_equal); void CheckNotCharacterAfterMinusAnd(char16_t c, char16_t minus, char16_t and_with, @@ -202,13 +203,17 @@ class MOZ_STACK_CLASS NativeRegExpMacroAssembler final : public RegExpMacroAssem Vector<LabelPatch, 4, SystemAllocPolicy> labelPatches; - // See RegExpMacroAssembler.cpp for the meaning of these registers. + // See NativeRegExpMacroAssembler.cpp for the meaning of these registers. jit::Register input_end_pointer; jit::Register current_character; jit::Register current_position; jit::Register backtrack_stack_pointer; jit::Register temp0, temp1, temp2; + void CheckAtStartImpl(int cp_offset, jit::Label* on_cond, jit::Assembler::Condition cond); + void CheckNotBackReferenceImpl(int start_reg, bool read_backward, jit::Label* on_no_match, + bool unicode, bool ignore_case); + // The frame_pointer-relative location of a regexp register. jit::Address register_location(int register_index) { checkRegister(register_index); diff --git a/js/src/irregexp/RegExpAST.cpp b/js/src/irregexp/RegExpAST.cpp index 14dfe8cea5..dc8d3b4c2c 100644 --- a/js/src/irregexp/RegExpAST.cpp +++ b/js/src/irregexp/RegExpAST.cpp @@ -249,16 +249,16 @@ RegExpCapture::CaptureRegisters() } // ---------------------------------------------------------------------------- -// RegExpLookahead +// RegExpLookaround Interval -RegExpLookahead::CaptureRegisters() +RegExpLookaround::CaptureRegisters() { return body()->CaptureRegisters(); } bool -RegExpLookahead::IsAnchoredAtStart() +RegExpLookaround::IsAnchoredAtStart() { - return is_positive() && body()->IsAnchoredAtStart(); + return is_positive() && type() == LOOKAHEAD && body()->IsAnchoredAtStart(); } diff --git a/js/src/irregexp/RegExpAST.h b/js/src/irregexp/RegExpAST.h index bff4ee81dd..9e023d537f 100644 --- a/js/src/irregexp/RegExpAST.h +++ b/js/src/irregexp/RegExpAST.h @@ -234,8 +234,6 @@ class RegExpCharacterClass : public RegExpTree bool is_negated_; }; -typedef InfallibleVector<char16_t, 10> CharacterVector; - class RegExpAtom : public RegExpTree { public: @@ -341,7 +339,7 @@ class RegExpCapture : public RegExpTree { public: explicit RegExpCapture(RegExpTree* body, int index) - : body_(body), index_(index) + : body_(body), index_(index), name_(nullptr) {} virtual void* Accept(RegExpVisitor* visitor, void* data); @@ -359,34 +357,42 @@ class RegExpCapture : public RegExpTree virtual int min_match() { return body_->min_match(); } virtual int max_match() { return body_->max_match(); } RegExpTree* body() { return body_; } + void set_body(RegExpTree* body) { body_ = body; } int index() { return index_; } + const CharacterVector* name() const { return name_; } + void set_name(const CharacterVector* name) { name_ = name; } static int StartRegister(int index) { return index * 2; } static int EndRegister(int index) { return index * 2 + 1; } private: RegExpTree* body_; int index_; + const CharacterVector* name_; }; -class RegExpLookahead : public RegExpTree +class RegExpLookaround : public RegExpTree { public: - RegExpLookahead(RegExpTree* body, - bool is_positive, - int capture_count, - int capture_from) + enum Type { LOOKAHEAD, LOOKBEHIND }; + + RegExpLookaround(RegExpTree* body, + bool is_positive, + int capture_count, + int capture_from, + Type type) : body_(body), is_positive_(is_positive), capture_count_(capture_count), - capture_from_(capture_from) + capture_from_(capture_from), + type_(type) {} virtual void* Accept(RegExpVisitor* visitor, void* data); virtual RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success); - virtual RegExpLookahead* AsLookahead(); + virtual RegExpLookaround* AsLookaround(); virtual Interval CaptureRegisters(); - virtual bool IsLookahead(); + virtual bool IsLookaround(); virtual bool IsAnchoredAtStart(); virtual int min_match() { return 0; } virtual int max_match() { return 0; } @@ -394,12 +400,14 @@ class RegExpLookahead : public RegExpTree bool is_positive() { return is_positive_; } int capture_count() { return capture_count_; } int capture_from() { return capture_from_; } + Type type() { return type_; } private: RegExpTree* body_; bool is_positive_; int capture_count_; int capture_from_; + Type type_; }; typedef InfallibleVector<RegExpCapture*, 1> RegExpCaptureVector; @@ -408,7 +416,7 @@ class RegExpBackReference : public RegExpTree { public: explicit RegExpBackReference(RegExpCapture* capture) - : capture_(capture) + : capture_(capture), name_(nullptr) {} virtual void* Accept(RegExpVisitor* visitor, void* data); @@ -416,14 +424,22 @@ class RegExpBackReference : public RegExpTree RegExpNode* on_success); virtual RegExpBackReference* AsBackReference(); virtual bool IsBackReference(); - virtual int min_match() { return 0; } - virtual int max_match() { return capture_->max_match(); } + virtual int min_match() override { return 0; } + // The back reference may be recursive, e.g. /(\2)(\1)/. To avoid infinite + // recursion, we give up. Ignorance is bliss. + int max_match() override { return kInfinity; } int index() { return capture_->index(); } RegExpCapture* capture() { return capture_; } + void set_capture(RegExpCapture* capture) { capture_ = capture; } + const CharacterVector* name() const { return name_; } + void set_name(const CharacterVector* name) { name_ = name; } private: RegExpCapture* capture_; + const CharacterVector* name_; }; +typedef InfallibleVector<RegExpBackReference*, 1> RegExpBackReferenceVector; + class RegExpEmpty : public RegExpTree { public: diff --git a/js/src/irregexp/RegExpBytecode.h b/js/src/irregexp/RegExpBytecode.h index 7454f88f73..42326b3d47 100644 --- a/js/src/irregexp/RegExpBytecode.h +++ b/js/src/irregexp/RegExpBytecode.h @@ -81,16 +81,19 @@ V(CHECK_LT, 35, 8) /* bc8 pad8 uc16 addr32 */ \ V(CHECK_GT, 36, 8) /* bc8 pad8 uc16 addr32 */ \ V(CHECK_NOT_BACK_REF, 37, 8) /* bc8 reg_idx24 addr32 */ \ V(CHECK_NOT_BACK_REF_NO_CASE, 38, 8) /* bc8 reg_idx24 addr32 */ \ -V(CHECK_NOT_REGS_EQUAL, 39, 12) /* bc8 regidx24 reg_idx32 addr32 */ \ -V(CHECK_REGISTER_LT, 40, 12) /* bc8 reg_idx24 value32 addr32 */ \ -V(CHECK_REGISTER_GE, 41, 12) /* bc8 reg_idx24 value32 addr32 */ \ -V(CHECK_REGISTER_EQ_POS, 42, 8) /* bc8 reg_idx24 addr32 */ \ -V(CHECK_AT_START, 43, 8) /* bc8 pad24 addr32 */ \ -V(CHECK_NOT_AT_START, 44, 8) /* bc8 pad24 addr32 */ \ -V(CHECK_GREEDY, 45, 8) /* bc8 pad24 addr32 */ \ -V(ADVANCE_CP_AND_GOTO, 46, 8) /* bc8 offset24 addr32 */ \ -V(SET_CURRENT_POSITION_FROM_END, 47, 4) /* bc8 idx24 */ \ -V(CHECK_NOT_BACK_REF_NO_CASE_UNICODE, 48, 8) /* bc8 reg_idx24 addr32 */ +V(CHECK_NOT_BACK_REF_BACKWARD, 39, 8) /* bc8 reg_idx24 addr32 */ \ +V(CHECK_NOT_BACK_REF_NO_CASE_BACKWARD, 40, 8) /* bc8 reg_idx24 addr32 */ \ +V(CHECK_NOT_REGS_EQUAL, 41, 12) /* bc8 regidx24 reg_idx32 addr32 */ \ +V(CHECK_REGISTER_LT, 42, 12) /* bc8 reg_idx24 value32 addr32 */ \ +V(CHECK_REGISTER_GE, 43, 12) /* bc8 reg_idx24 value32 addr32 */ \ +V(CHECK_REGISTER_EQ_POS, 44, 8) /* bc8 reg_idx24 addr32 */ \ +V(CHECK_AT_START, 45, 8) /* bc8 pad24 addr32 */ \ +V(CHECK_NOT_AT_START, 46, 8) /* bc8 pad24 addr32 */ \ +V(CHECK_GREEDY, 47, 8) /* bc8 pad24 addr32 */ \ +V(ADVANCE_CP_AND_GOTO, 48, 8) /* bc8 offset24 addr32 */ \ +V(SET_CURRENT_POSITION_FROM_END, 49, 4) /* bc8 idx24 */ \ +V(CHECK_NOT_BACK_REF_NO_CASE_UNICODE, 50, 8) /* bc8 reg_idx24 addr32 */ \ +V(CHECK_NOT_BACK_REF_NO_CASE_BACKWARD_UNICODE, 51, 8) /* bc8 reg_idx24 addr32 */ #define DECLARE_BYTECODES(name, code, length) \ static const int BC_##name = code; diff --git a/js/src/irregexp/RegExpCharRanges.cpp b/js/src/irregexp/RegExpCharRanges.cpp new file mode 100644 index 0000000000..87a4f94aa1 --- /dev/null +++ b/js/src/irregexp/RegExpCharRanges.cpp @@ -0,0 +1,2069 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ + +// Copyright 2012 the V8 project authors. All rights reserved. +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "irregexp/RegExpCharRanges.h" + +#include "unicode/uniset.h" + +// Generated table +#include "irregexp/RegExpCharacters-inl.h" + +using namespace js::irregexp; + +using mozilla::ArrayLength; + +void +CharacterRange::AddCaseEquivalents(bool is_ascii, bool unicode, CharacterRangeVector* ranges) +{ + char16_t bottom = from(); + char16_t top = to(); + + if (is_ascii && !RangeContainsLatin1Equivalents(*this, unicode)) { + if (bottom > kMaxOneByteCharCode) + return; + if (top > kMaxOneByteCharCode) + top = kMaxOneByteCharCode; + } + + for (char16_t c = bottom;; c++) { + char16_t chars[kEcma262UnCanonicalizeMaxWidth]; + size_t length = GetCaseIndependentLetters(c, is_ascii, unicode, chars); + + for (size_t i = 0; i < length; i++) { + char16_t other = chars[i]; + if (other == c) + continue; + + // Try to combine with an existing range. + bool found = false; + for (size_t i = 0; i < ranges->length(); i++) { + CharacterRange& range = (*ranges)[i]; + if (range.Contains(other)) { + found = true; + break; + } else if (other == range.from() - 1) { + range.set_from(other); + found = true; + break; + } else if (other == range.to() + 1) { + range.set_to(other); + found = true; + break; + } + } + + if (!found) + ranges->append(CharacterRange::Singleton(other)); + } + + if (c == top) + break; + } +} + +/* static */ +void +CharacterRange::AddClass(const int* elmv, int elmc, CharacterRangeVector* ranges) +{ + elmc--; + MOZ_ASSERT(elmv[elmc] == 0x10000); + for (int i = 0; i < elmc; i += 2) { + MOZ_ASSERT(elmv[i] < elmv[i + 1]); + ranges->append(CharacterRange(elmv[i], elmv[i + 1] - 1)); + } +} + +/* static */ void +CharacterRange::AddClassNegated(const int* elmv, int elmc, CharacterRangeVector* ranges) +{ + elmc--; + MOZ_ASSERT(elmv[elmc] == 0x10000); + MOZ_ASSERT(elmv[0] != 0x0000); + MOZ_ASSERT(elmv[elmc-1] != kMaxUtf16CodeUnit); + char16_t last = 0x0000; + for (int i = 0; i < elmc; i += 2) { + MOZ_ASSERT(last <= elmv[i] - 1); + MOZ_ASSERT(elmv[i] < elmv[i + 1]); + ranges->append(CharacterRange(last, elmv[i] - 1)); + last = elmv[i + 1]; + } + ranges->append(CharacterRange(last, kMaxUtf16CodeUnit)); +} + +/* static */ void +CharacterRange::AddClassEscape(LifoAlloc* alloc, char16_t type, + CharacterRangeVector* ranges) +{ + switch (type) { + case 's': + AddClass(kSpaceRanges, kSpaceRangeCount, ranges); + break; + case 'S': + AddClassNegated(kSpaceRanges, kSpaceRangeCount, ranges); + break; + case 'w': + AddClass(kWordRanges, kWordRangeCount, ranges); + break; + case 'W': + AddClassNegated(kWordRanges, kWordRangeCount, ranges); + break; + case 'd': + AddClass(kDigitRanges, kDigitRangeCount, ranges); + break; + case 'D': + AddClassNegated(kDigitRanges, kDigitRangeCount, ranges); + break; + case '.': + AddClassNegated(kLineTerminatorRanges, kLineTerminatorRangeCount, ranges); + break; + // This is not a character range as defined by the spec but a + // convenient shorthand for a character class that matches any + // character. + case '*': + ranges->append(CharacterRange::Everything()); + break; + // This is the set of characters matched by the $ and ^ symbols + // in multiline mode. + case 'n': + AddClass(kLineTerminatorRanges, kLineTerminatorRangeCount, ranges); + break; + default: + MOZ_CRASH("Bad character class escape"); + } +} + +// Add class escape, excluding surrogate pair range. +/* static */ void +CharacterRange::AddClassEscapeUnicode(LifoAlloc* alloc, char16_t type, + CharacterRangeVector* ranges, bool ignore_case) +{ + switch (type) { + case 's': + case 'd': + return AddClassEscape(alloc, type, ranges); + break; + case 'S': + AddClassNegated(kSpaceAndSurrogateRanges, kSpaceAndSurrogateRangeCount, ranges); + break; + case 'w': + if (ignore_case) + AddClass(kIgnoreCaseWordRanges, kIgnoreCaseWordRangeCount, ranges); + else + AddClassEscape(alloc, type, ranges); + break; + case 'W': + if (ignore_case) { + AddClass(kNegatedIgnoreCaseWordAndSurrogateRanges, + kNegatedIgnoreCaseWordAndSurrogateRangeCount, ranges); + } else { + AddClassNegated(kWordAndSurrogateRanges, kWordAndSurrogateRangeCount, ranges); + } + break; + case 'D': + AddClassNegated(kDigitAndSurrogateRanges, kDigitAndSurrogateRangeCount, ranges); + break; + default: + MOZ_CRASH("Bad type!"); + } +} + +/* static */ void +CharacterRange::AddCharOrEscape(LifoAlloc* alloc, CharacterRangeVector* ranges, + char16_t char_class, widechar c) +{ + if (char_class != kNoCharClass) + AddClassEscape(alloc, char_class, ranges); + else + ranges->append(CharacterRange::Singleton(c)); +} + +/* static */ void +CharacterRange::AddCharOrEscapeUnicode(LifoAlloc* alloc, + CharacterRangeVector* ranges, + CharacterRangeVector* lead_ranges, + CharacterRangeVector* trail_ranges, + WideCharRangeVector* wide_ranges, + char16_t char_class, + widechar c, + bool ignore_case) +{ + if (char_class != kNoCharClass) { + AddClassEscapeUnicode(alloc, char_class, ranges, ignore_case); + switch (char_class) { + case 'S': + case 'W': + case 'D': + lead_ranges->append(CharacterRange::LeadSurrogate()); + trail_ranges->append(CharacterRange::TrailSurrogate()); + wide_ranges->append(WideCharRange::NonBMP()); + break; + case '.': + MOZ_CRASH("Bad char_class!"); + } + return; + } + + if (unicode::IsLeadSurrogate(c)) + lead_ranges->append(CharacterRange::Singleton(c)); + else if (unicode::IsTrailSurrogate(c)) + trail_ranges->append(CharacterRange::Singleton(c)); + else if (c >= unicode::NonBMPMin) + wide_ranges->append(WideCharRange::Singleton(c)); + else + ranges->append(CharacterRange::Singleton(c)); +} + +/* static */ void +CharacterRange::AddCharUnicode(LifoAlloc* alloc, + CharacterRangeVector* ranges, + CharacterRangeVector* lead_ranges, + CharacterRangeVector* trail_ranges, + WideCharRangeVector* wide_ranges, + widechar c) +{ + if (unicode::IsLeadSurrogate(c)) + lead_ranges->append(CharacterRange::Singleton(c)); + else if (unicode::IsTrailSurrogate(c)) + trail_ranges->append(CharacterRange::Singleton(c)); + else if (c >= unicode::NonBMPMin) + wide_ranges->append(WideCharRange::Singleton(c)); + else + ranges->append(CharacterRange::Singleton(c)); +} + +/* static */ void +CharacterRange::AddUnicodeRange(LifoAlloc* alloc, + CharacterRangeVector* ranges, + CharacterRangeVector* lead_ranges, + CharacterRangeVector* trail_ranges, + WideCharRangeVector* wide_ranges, + widechar first, + widechar next) +{ + MOZ_ASSERT(first <= next); + if (first < unicode::LeadSurrogateMin) { + if (next < unicode::LeadSurrogateMin) { + ranges->append(CharacterRange::Range(first, next)); + return; + } + ranges->append(CharacterRange::Range(first, unicode::LeadSurrogateMin - 1)); + first = unicode::LeadSurrogateMin; + } + if (first <= unicode::LeadSurrogateMax) { + if (next <= unicode::LeadSurrogateMax) { + lead_ranges->append(CharacterRange::Range(first, next)); + return; + } + lead_ranges->append(CharacterRange::Range(first, unicode::LeadSurrogateMax)); + first = unicode::LeadSurrogateMax + 1; + } + MOZ_ASSERT(unicode::LeadSurrogateMax + 1 == unicode::TrailSurrogateMin); + if (first <= unicode::TrailSurrogateMax) { + if (next <= unicode::TrailSurrogateMax) { + trail_ranges->append(CharacterRange::Range(first, next)); + return; + } + trail_ranges->append(CharacterRange::Range(first, unicode::TrailSurrogateMax)); + first = unicode::TrailSurrogateMax + 1; + } + if (first <= unicode::UTF16Max) { + if (next <= unicode::UTF16Max) { + ranges->append(CharacterRange::Range(first, next)); + return; + } + ranges->append(CharacterRange::Range(first, unicode::UTF16Max)); + first = unicode::NonBMPMin; + } + MOZ_ASSERT(unicode::UTF16Max + 1 == unicode::NonBMPMin); + wide_ranges->append(WideCharRange::Range(first, next)); +} + +/* static */ bool +CharacterRange::RangesContainLatin1Equivalents(const CharacterRangeVector& ranges, bool unicode) +{ + for (size_t i = 0; i < ranges.length(); i++) { + // TODO(dcarney): this could be a lot more efficient. + if (RangeContainsLatin1Equivalents(ranges[i], unicode)) + return true; + } + return false; +} + +/* static */ bool +CharacterRange::CompareRanges(const CharacterRangeVector& ranges, const int* special_class, size_t length) +{ + length--; // Remove final 0x10000. + MOZ_ASSERT(special_class[length] == 0x10000); + if (ranges.length() * 2 != length) + return false; + for (size_t i = 0; i < length; i += 2) { + CharacterRange range = ranges[i >> 1]; + if (range.from() != special_class[i] || range.to() != special_class[i + 1] - 1) + return false; + } + return true; +} + +/* static */ bool +CharacterRange::CompareInverseRanges(const CharacterRangeVector& ranges, const int* special_class, size_t length) +{ + length--; // Remove final 0x10000. + MOZ_ASSERT(special_class[length] == 0x10000); + MOZ_ASSERT(ranges.length() != 0); + MOZ_ASSERT(length != 0); + MOZ_ASSERT(special_class[0] != 0); + if (ranges.length() != (length >> 1) + 1) + return false; + CharacterRange range = ranges[0]; + if (range.from() != 0) + return false; + for (size_t i = 0; i < length; i += 2) { + if (special_class[i] != (range.to() + 1)) + return false; + range = ranges[(i >> 1) + 1]; + if (special_class[i+1] != range.from()) + return false; + } + if (range.to() != 0xffff) + return false; + return true; +} + +template <typename RangeType> +/* static */ void +CharacterRange::NegateUnicodeRanges(LifoAlloc* alloc, InfallibleVector<RangeType, 1>** ranges, + RangeType full_range) +{ + typedef InfallibleVector<RangeType, 1> RangeVector; + RangeVector* tmp_ranges = alloc->newInfallible<RangeVector>(*alloc); + tmp_ranges->append(full_range); + RangeVector* result_ranges = alloc->newInfallible<RangeVector>(*alloc); + + // Perform the following calculation: + // result_ranges = tmp_ranges - ranges + // with the following steps: + // result_ranges = tmp_ranges - ranges[0] + // SWAP(result_ranges, tmp_ranges) + // result_ranges = tmp_ranges - ranges[1] + // SWAP(result_ranges, tmp_ranges) + // ... + // result_ranges = tmp_ranges - ranges[N-1] + // SWAP(result_ranges, tmp_ranges) + // The last SWAP is just for simplicity of the loop. + for (size_t i = 0; i < (*ranges)->length(); i++) { + result_ranges->clear(); + + const RangeType& range = (**ranges)[i]; + for (size_t j = 0; j < tmp_ranges->length(); j++) { + const RangeType& tmpRange = (*tmp_ranges)[j]; + auto from1 = tmpRange.from(); + auto to1 = tmpRange.to(); + auto from2 = range.from(); + auto to2 = range.to(); + + if (from1 < from2) { + if (to1 < from2) { + result_ranges->append(tmpRange); + } else if (to1 <= to2) { + result_ranges->append(RangeType::Range(from1, from2 - 1)); + } else { + result_ranges->append(RangeType::Range(from1, from2 - 1)); + result_ranges->append(RangeType::Range(to2 + 1, to1)); + } + } else if (from1 <= to2) { + if (to1 > to2) + result_ranges->append(RangeType::Range(to2 + 1, to1)); + } else { + result_ranges->append(tmpRange); + } + } + + auto tmp = tmp_ranges; + tmp_ranges = result_ranges; + result_ranges = tmp; + } + + // After the loop, result is pointed at by tmp_ranges, instead of + // result_ranges. + *ranges = tmp_ranges; +} + +// Explicit specialization for NegateUnicodeRanges +template void CharacterRange::NegateUnicodeRanges<CharacterRange>(LifoAlloc* alloc, InfallibleVector<CharacterRange, 1>** ranges, CharacterRange full_range); +template void CharacterRange::NegateUnicodeRanges<WideCharRange>(LifoAlloc* alloc, InfallibleVector<WideCharRange, 1>** ranges, WideCharRange full_range); + + +namespace { +// private namespace to not pollute js::irregexp + +bool IsExactPropertyAlias(const std::string& property_name, UProperty property) { + const char* short_name = u_getPropertyName(property, U_SHORT_PROPERTY_NAME); + if (short_name != nullptr && short_name == property_name) + return true; + for (int i = 0;; i++) { + const char* long_name = u_getPropertyName(property, static_cast<UPropertyNameChoice>(U_LONG_PROPERTY_NAME + i)); + if (long_name == nullptr) break; + if (long_name == property_name) return true; + } + return false; +} + +bool IsExactPropertyValueAlias(const std::string& property_value_name, UProperty property, int32_t property_value) { + const char* short_name = u_getPropertyValueName(property, property_value, U_SHORT_PROPERTY_NAME); + if (short_name != nullptr && short_name == property_value_name) + return true; + for (int i = 0;; i++) { + const char* long_name = u_getPropertyValueName(property, property_value, + static_cast<UPropertyNameChoice>(U_LONG_PROPERTY_NAME + i)); + if (long_name == nullptr) break; + if (long_name == property_value_name) return true; + } + return false; +} + +bool LookupPropertyValueName(LifoAlloc* alloc, + UProperty property, + const std::string& property_value_name, bool negate, + CharacterRangeVector* ranges, + CharacterRangeVector* lead_ranges, + CharacterRangeVector* trail_ranges, + WideCharRangeVector* wide_ranges) { + UProperty property_for_lookup = property; + if (property_for_lookup == UCHAR_SCRIPT_EXTENSIONS) { + // For the property Script_Extensions, we have to do the property value + // name lookup as if the property is Script. + property_for_lookup = UCHAR_SCRIPT; + } + int32_t property_value = u_getPropertyValueEnum(property_for_lookup, property_value_name.c_str()); + if (property_value == UCHAR_INVALID_CODE) return false; + + // We require the property name to match exactly to one of the property value + // aliases. However, u_getPropertyValueEnum uses loose matching. + if (!IsExactPropertyValueAlias(property_value_name, property_for_lookup, property_value)) { + return false; + } + + UErrorCode ec = U_ZERO_ERROR; + icu::UnicodeSet set; + set.applyIntPropertyValue(property, property_value, ec); + bool success = ec == U_ZERO_ERROR && !set.isEmpty(); + + if (success) { + set.removeAllStrings(); + if (negate) set.complement(); + for (int i = 0; i < set.getRangeCount(); i++) { + CharacterRange::AddUnicodeRange(alloc, ranges, lead_ranges, trail_ranges, wide_ranges, + set.getRangeStart(i), set.getRangeEnd(i)); + } + } + return success; +} + +bool LookupSpecialPropertyValueName(LifoAlloc* alloc, + const std::string& name, bool negate, + CharacterRangeVector* ranges, + CharacterRangeVector* lead_ranges, + CharacterRangeVector* trail_ranges, + WideCharRangeVector* wide_ranges) { + if (name == "Any") { + if (negate) { + // Leave the list of character ranges empty, since the negation of 'Any' + // is the empty set. + } else { + CharacterRange::AddUnicodeRange(alloc, ranges, lead_ranges, trail_ranges, wide_ranges, + 0, unicode::NonBMPMax); + } + } else + if (name == "ASCII") { + if (negate) { + // negative ASCII contains all planes + CharacterRange::AddUnicodeRange(alloc, ranges, lead_ranges, trail_ranges, wide_ranges, + 0x80, unicode::NonBMPMax); + } else { + // positve ASCII is just low codepoints + ranges->append(CharacterRange::Range(0x00, 0x7F)); + } + } else + if (name == "Assigned") { + return LookupPropertyValueName(alloc, UCHAR_GENERAL_CATEGORY, "Unassigned", !negate, + ranges, lead_ranges, trail_ranges, wide_ranges); + } else { + return false; + } + return true; +} + +bool IsSupportedBinaryProperty(UProperty property) { + // Explicitly allowlist supported binary properties. The spec forbids supporting + // properties outside of this set to ensure interoperability. + switch (property) { + case UCHAR_ALPHABETIC: + // 'Any' is not supported by ICU. See LookupSpecialPropertyValueName. + // 'ASCII' is not supported by ICU. See LookupSpecialPropertyValueName. + case UCHAR_ASCII_HEX_DIGIT: + // 'Assigned' is not supported by ICU. See LookupSpecialPropertyValueName. + case UCHAR_BIDI_CONTROL: + case UCHAR_BIDI_MIRRORED: + case UCHAR_CASE_IGNORABLE: + case UCHAR_CASED: + case UCHAR_CHANGES_WHEN_CASEFOLDED: + case UCHAR_CHANGES_WHEN_CASEMAPPED: + case UCHAR_CHANGES_WHEN_LOWERCASED: + case UCHAR_CHANGES_WHEN_NFKC_CASEFOLDED: + case UCHAR_CHANGES_WHEN_TITLECASED: + case UCHAR_CHANGES_WHEN_UPPERCASED: + case UCHAR_DASH: + case UCHAR_DEFAULT_IGNORABLE_CODE_POINT: + case UCHAR_DEPRECATED: + case UCHAR_DIACRITIC: + case UCHAR_EMOJI: + case UCHAR_EMOJI_COMPONENT: + case UCHAR_EMOJI_MODIFIER_BASE: + case UCHAR_EMOJI_MODIFIER: + case UCHAR_EMOJI_PRESENTATION: + // case UCHAR_EXTENDED_PICTOGRAPHIC: + case UCHAR_EXTENDER: + case UCHAR_GRAPHEME_BASE: + case UCHAR_GRAPHEME_EXTEND: + case UCHAR_HEX_DIGIT: + case UCHAR_ID_CONTINUE: + case UCHAR_ID_START: + case UCHAR_IDEOGRAPHIC: + case UCHAR_IDS_BINARY_OPERATOR: + case UCHAR_IDS_TRINARY_OPERATOR: + case UCHAR_JOIN_CONTROL: + case UCHAR_LOGICAL_ORDER_EXCEPTION: + case UCHAR_LOWERCASE: + case UCHAR_MATH: + case UCHAR_NONCHARACTER_CODE_POINT: + case UCHAR_PATTERN_SYNTAX: + case UCHAR_PATTERN_WHITE_SPACE: + case UCHAR_QUOTATION_MARK: + case UCHAR_RADICAL: + case UCHAR_REGIONAL_INDICATOR: + case UCHAR_S_TERM: + case UCHAR_SOFT_DOTTED: + case UCHAR_TERMINAL_PUNCTUATION: + case UCHAR_UNIFIED_IDEOGRAPH: + case UCHAR_UPPERCASE: + case UCHAR_VARIATION_SELECTOR: + case UCHAR_WHITE_SPACE: + case UCHAR_XID_CONTINUE: + case UCHAR_XID_START: + return true; + default: + break; + } + return false; +} + +} // namespace + +/* static */ bool +CharacterRange::AddPropertyClassRange(LifoAlloc* alloc, + const std::string& name, const std::string& value, + bool negate, bool ignore_case, + CharacterRangeVector* ranges, + CharacterRangeVector* lead_ranges, + CharacterRangeVector* trail_ranges, + WideCharRangeVector* wide_ranges) +{ + MOZ_UNUSED(ignore_case); // Full support of unicodeSets flag will use it + if (value.empty()) { + // Only one name given. Check what it refers to. + // First attempt to interpret as general category property value name. + if (LookupPropertyValueName(alloc, UCHAR_GENERAL_CATEGORY_MASK, name, negate, + ranges, lead_ranges, trail_ranges, wide_ranges)) { + return true; + } + // Interpret hard-coded aliases defined by ES but not present in Unicode + if (LookupSpecialPropertyValueName(alloc, name, negate, ranges, lead_ranges, + trail_ranges, wide_ranges)) { + return true; + } + // Then attempt to interpret as binary property name with value name 'Y'. + UProperty property = u_getPropertyEnum(name.c_str()); + if (!IsSupportedBinaryProperty(property)) return false; + if (!IsExactPropertyAlias(name, property)) return false; + return LookupPropertyValueName(alloc, property, negate ? "N" : "Y", false, + ranges, lead_ranges, trail_ranges, wide_ranges); + } else { + // Both property name and value name are specified. Attempt to interpret + // the property name as enumerated property. + UProperty property = u_getPropertyEnum(name.c_str()); + if (!IsExactPropertyAlias(name, property)) return false; + if (property == UCHAR_GENERAL_CATEGORY) { + // We want to allow aggregate value names such as "Letter". + property = UCHAR_GENERAL_CATEGORY_MASK; + } else if (property != UCHAR_SCRIPT && + property != UCHAR_SCRIPT_EXTENSIONS) { + // The only allowed property is Script= + return false; + } + return LookupPropertyValueName(alloc, property, value, negate, + ranges, lead_ranges, trail_ranges, wide_ranges); + } +} + +/* static */ bool +CharacterRange::IsCanonical(const CharacterRangeVector& ranges) +{ + int n = ranges.length(); + if (n <= 1) + return true; + + int max = ranges[0].to(); + for (int i = 1; i < n; i++) { + CharacterRange next_range = ranges[i]; + if (next_range.from() <= max + 1) + return false; + max = next_range.to(); + } + return true; +} + +/* static */ void +CharacterRange::Canonicalize(CharacterRangeVector& character_ranges) +{ + if (character_ranges.length() <= 1) return; + // Check whether ranges are already canonical (increasing, non-overlapping, + // non-adjacent). + int n = character_ranges.length(); + int max = character_ranges[0].to(); + int i = 1; + while (i < n) { + CharacterRange current = character_ranges[i]; + if (current.from() <= max + 1) { + break; + } + max = current.to(); + i++; + } + // Canonical until the i'th range. If that's all of them, we are done. + if (i == n) return; + + // The ranges at index i and forward are not canonicalized. Make them so by + // doing the equivalent of insertion sort (inserting each into the previous + // list, in order). + // Notice that inserting a range can reduce the number of ranges in the + // result due to combining of adjacent and overlapping ranges. + int read = i; // Range to insert. + size_t num_canonical = i; // Length of canonicalized part of list. + do { + num_canonical = InsertRangeInCanonicalList(character_ranges, + num_canonical, + character_ranges[read]); + read++; + } while (read < n); + + while (character_ranges.length() > num_canonical) + character_ranges.popBack(); + + MOZ_ASSERT(IsCanonical(character_ranges)); +} + +/* static */ int +CharacterRange::InsertRangeInCanonicalList(CharacterRangeVector& list, + int count, + CharacterRange insert) +{ + // Inserts a range into list[0..count[, which must be sorted + // by from value and non-overlapping and non-adjacent, using at most + // list[0..count] for the result. Returns the number of resulting + // canonicalized ranges. Inserting a range may collapse existing ranges into + // fewer ranges, so the return value can be anything in the range 1..count+1. + char16_t from = insert.from(); + char16_t to = insert.to(); + int start_pos = 0; + int end_pos = count; + for (int i = count - 1; i >= 0; i--) { + CharacterRange current = list[i]; + if (current.from() > to + 1) { + end_pos = i; + } else if (current.to() + 1 < from) { + start_pos = i + 1; + break; + } + } + + // Inserted range overlaps, or is adjacent to, ranges at positions + // [start_pos..end_pos[. Ranges before start_pos or at or after end_pos are + // not affected by the insertion. + // If start_pos == end_pos, the range must be inserted before start_pos. + // if start_pos < end_pos, the entire range from start_pos to end_pos + // must be merged with the insert range. + + if (start_pos == end_pos) { + // Insert between existing ranges at position start_pos. + if (start_pos < count) { + list.moveReplace(start_pos, start_pos + 1, count - start_pos); + } + list[start_pos] = insert; + return count + 1; + } + if (start_pos + 1 == end_pos) { + // Replace single existing range at position start_pos. + CharacterRange to_replace = list[start_pos]; + int new_from = Min(to_replace.from(), from); + int new_to = Max(to_replace.to(), to); + list[start_pos] = CharacterRange(new_from, new_to); + return count; + } + // Replace a number of existing ranges from start_pos to end_pos - 1. + // Move the remaining ranges down. + + int new_from = Min(list[start_pos].from(), from); + int new_to = Max(list[end_pos - 1].to(), to); + if (end_pos < count) { + list.moveReplace(end_pos, start_pos + 1, count - end_pos); + } + list[start_pos] = CharacterRange(new_from, new_to); + return count - (end_pos - start_pos) + 1; +} + +int +irregexp::GetCaseIndependentLetters(char16_t character, + bool ascii_subject, + bool unicode, + const char16_t* choices, + size_t choices_length, + char16_t* letters) +{ + size_t count = 0; + for (size_t i = 0; i < choices_length; i++) { + char16_t c = choices[i]; + + // Skip characters that can't appear in one byte strings. + if (!unicode && ascii_subject && c > kMaxOneByteCharCode) + continue; + + // Watch for duplicates. + bool found = false; + for (size_t j = 0; j < count; j++) { + if (letters[j] == c) { + found = true; + break; + } + } + if (found) + continue; + + letters[count++] = c; + } + + return count; +} + +int +irregexp::GetCaseIndependentLetters(char16_t character, + bool ascii_subject, + bool unicode, + char16_t* letters) +{ + if (unicode) { + const char16_t choices[] = { + character, + unicode::FoldCase(character), + unicode::ReverseFoldCase1(character), + unicode::ReverseFoldCase2(character), + unicode::ReverseFoldCase3(character), + }; + return GetCaseIndependentLetters(character, ascii_subject, unicode, + choices, ArrayLength(choices), letters); + } + + char16_t upper = unicode::ToUpperCase(character); + unicode::CodepointsWithSameUpperCase others(character); + char16_t other1 = others.other1(); + char16_t other2 = others.other2(); + char16_t other3 = others.other3(); + + // ES 2017 draft 996af87b7072b3c3dd2b1def856c66f456102215 21.2.4.2 + // step 3.g. + // The standard requires that non-ASCII characters cannot have ASCII + // character codes in their equivalence class, even though this + // situation occurs multiple times in the Unicode tables. + static const unsigned kMaxAsciiCharCode = 127; + if (upper <= kMaxAsciiCharCode) { + if (character > kMaxAsciiCharCode) { + // If Canonicalize(character) == character, all other characters + // should be ignored. + return GetCaseIndependentLetters(character, ascii_subject, unicode, + &character, 1, letters); + } + + if (other1 > kMaxAsciiCharCode) + other1 = character; + if (other2 > kMaxAsciiCharCode) + other2 = character; + if (other3 > kMaxAsciiCharCode) + other3 = character; + } + + const char16_t choices[] = { + character, + upper, + other1, + other2, + other3 + }; + return GetCaseIndependentLetters(character, ascii_subject, unicode, + choices, ArrayLength(choices), letters); +} + +/* +Generated from following Node.js source: + +package.json + +``` +{ + "private": true, + "dependencies": { + "unicode-12.0.0": "^0.7.9" + } +} +``` + +generate-unicode-sequence-property-data.js + +``` +const toHex = (symbol) => { + return '0x' + symbol.codePointAt(0).toString(16) + .toUpperCase().padStart(6, '0'); +}; + +const generateData = (property) => { + const sequences = + require(`unicode-12.0.0/Sequence_Property/${ property }/index.js`); + const id = property.replace(/_/g, '') + 's'; + const buffer = []; + for (const sequence of sequences) { + const symbols = [...sequence]; + const codePoints = symbols.map(symbol => toHex(symbol)); + buffer.push(' ' + codePoints.join(', ') + ', 0,'); + } + const output = + `const uc32 UnicodePropertySequences::k${ id }[] = {\n` + + `${ buffer.join('\n') }\n 0 // null-terminating the list\n};\n`; + return output; +}; + +const properties = [ + 'Emoji_Flag_Sequence', + 'Emoji_Tag_Sequence', + 'Emoji_ZWJ_Sequence', +]; + +for (const property of properties) { + console.log(generateData(property)); +} +``` +*/ + +const widechar js::irregexp::kEmojiFlagSequences[] = { + 0x01F1E6, 0x01F1E8, 0, + 0x01F1FF, 0x01F1FC, 0, + 0x01F1E6, 0x01F1EA, 0, + 0x01F1E6, 0x01F1EB, 0, + 0x01F1E6, 0x01F1EC, 0, + 0x01F1E6, 0x01F1EE, 0, + 0x01F1E6, 0x01F1F1, 0, + 0x01F1E6, 0x01F1F2, 0, + 0x01F1E6, 0x01F1F4, 0, + 0x01F1E6, 0x01F1F6, 0, + 0x01F1E6, 0x01F1F7, 0, + 0x01F1E6, 0x01F1F8, 0, + 0x01F1E6, 0x01F1F9, 0, + 0x01F1E6, 0x01F1FA, 0, + 0x01F1E6, 0x01F1FC, 0, + 0x01F1E6, 0x01F1FD, 0, + 0x01F1E6, 0x01F1FF, 0, + 0x01F1E7, 0x01F1E6, 0, + 0x01F1E7, 0x01F1E7, 0, + 0x01F1E7, 0x01F1E9, 0, + 0x01F1E7, 0x01F1EA, 0, + 0x01F1E7, 0x01F1EB, 0, + 0x01F1E7, 0x01F1EC, 0, + 0x01F1E7, 0x01F1ED, 0, + 0x01F1E7, 0x01F1EE, 0, + 0x01F1E7, 0x01F1EF, 0, + 0x01F1E7, 0x01F1F1, 0, + 0x01F1E7, 0x01F1F2, 0, + 0x01F1E7, 0x01F1F3, 0, + 0x01F1E7, 0x01F1F4, 0, + 0x01F1E7, 0x01F1F6, 0, + 0x01F1E7, 0x01F1F7, 0, + 0x01F1E7, 0x01F1F8, 0, + 0x01F1E7, 0x01F1F9, 0, + 0x01F1E7, 0x01F1FB, 0, + 0x01F1E7, 0x01F1FC, 0, + 0x01F1E7, 0x01F1FE, 0, + 0x01F1E7, 0x01F1FF, 0, + 0x01F1E8, 0x01F1E6, 0, + 0x01F1E8, 0x01F1E8, 0, + 0x01F1E8, 0x01F1E9, 0, + 0x01F1E8, 0x01F1EB, 0, + 0x01F1E8, 0x01F1EC, 0, + 0x01F1E8, 0x01F1ED, 0, + 0x01F1E8, 0x01F1EE, 0, + 0x01F1E8, 0x01F1F0, 0, + 0x01F1E8, 0x01F1F1, 0, + 0x01F1E8, 0x01F1F2, 0, + 0x01F1E8, 0x01F1F3, 0, + 0x01F1E8, 0x01F1F4, 0, + 0x01F1E8, 0x01F1F5, 0, + 0x01F1E8, 0x01F1F7, 0, + 0x01F1E8, 0x01F1FA, 0, + 0x01F1E8, 0x01F1FB, 0, + 0x01F1E8, 0x01F1FC, 0, + 0x01F1E8, 0x01F1FD, 0, + 0x01F1E8, 0x01F1FE, 0, + 0x01F1E8, 0x01F1FF, 0, + 0x01F1E9, 0x01F1EA, 0, + 0x01F1E9, 0x01F1EC, 0, + 0x01F1E9, 0x01F1EF, 0, + 0x01F1E9, 0x01F1F0, 0, + 0x01F1E9, 0x01F1F2, 0, + 0x01F1E9, 0x01F1F4, 0, + 0x01F1E9, 0x01F1FF, 0, + 0x01F1EA, 0x01F1E6, 0, + 0x01F1EA, 0x01F1E8, 0, + 0x01F1EA, 0x01F1EA, 0, + 0x01F1EA, 0x01F1EC, 0, + 0x01F1EA, 0x01F1ED, 0, + 0x01F1EA, 0x01F1F7, 0, + 0x01F1EA, 0x01F1F8, 0, + 0x01F1EA, 0x01F1F9, 0, + 0x01F1EA, 0x01F1FA, 0, + 0x01F1EB, 0x01F1EE, 0, + 0x01F1EB, 0x01F1EF, 0, + 0x01F1EB, 0x01F1F0, 0, + 0x01F1EB, 0x01F1F2, 0, + 0x01F1EB, 0x01F1F4, 0, + 0x01F1EB, 0x01F1F7, 0, + 0x01F1EC, 0x01F1E6, 0, + 0x01F1EC, 0x01F1E7, 0, + 0x01F1EC, 0x01F1E9, 0, + 0x01F1EC, 0x01F1EA, 0, + 0x01F1EC, 0x01F1EB, 0, + 0x01F1EC, 0x01F1EC, 0, + 0x01F1EC, 0x01F1ED, 0, + 0x01F1EC, 0x01F1EE, 0, + 0x01F1EC, 0x01F1F1, 0, + 0x01F1EC, 0x01F1F2, 0, + 0x01F1EC, 0x01F1F3, 0, + 0x01F1EC, 0x01F1F5, 0, + 0x01F1EC, 0x01F1F6, 0, + 0x01F1EC, 0x01F1F7, 0, + 0x01F1EC, 0x01F1F8, 0, + 0x01F1EC, 0x01F1F9, 0, + 0x01F1EC, 0x01F1FA, 0, + 0x01F1EC, 0x01F1FC, 0, + 0x01F1EC, 0x01F1FE, 0, + 0x01F1ED, 0x01F1F0, 0, + 0x01F1ED, 0x01F1F2, 0, + 0x01F1ED, 0x01F1F3, 0, + 0x01F1ED, 0x01F1F7, 0, + 0x01F1ED, 0x01F1F9, 0, + 0x01F1ED, 0x01F1FA, 0, + 0x01F1EE, 0x01F1E8, 0, + 0x01F1EE, 0x01F1E9, 0, + 0x01F1EE, 0x01F1EA, 0, + 0x01F1EE, 0x01F1F1, 0, + 0x01F1EE, 0x01F1F2, 0, + 0x01F1EE, 0x01F1F3, 0, + 0x01F1EE, 0x01F1F4, 0, + 0x01F1EE, 0x01F1F6, 0, + 0x01F1EE, 0x01F1F7, 0, + 0x01F1EE, 0x01F1F8, 0, + 0x01F1EE, 0x01F1F9, 0, + 0x01F1EF, 0x01F1EA, 0, + 0x01F1EF, 0x01F1F2, 0, + 0x01F1EF, 0x01F1F4, 0, + 0x01F1EF, 0x01F1F5, 0, + 0x01F1F0, 0x01F1EA, 0, + 0x01F1F0, 0x01F1EC, 0, + 0x01F1F0, 0x01F1ED, 0, + 0x01F1F0, 0x01F1EE, 0, + 0x01F1F0, 0x01F1F2, 0, + 0x01F1F0, 0x01F1F3, 0, + 0x01F1F0, 0x01F1F5, 0, + 0x01F1F0, 0x01F1F7, 0, + 0x01F1F0, 0x01F1FC, 0, + 0x01F1E6, 0x01F1E9, 0, + 0x01F1F0, 0x01F1FF, 0, + 0x01F1F1, 0x01F1E6, 0, + 0x01F1F1, 0x01F1E7, 0, + 0x01F1F1, 0x01F1E8, 0, + 0x01F1F1, 0x01F1EE, 0, + 0x01F1F1, 0x01F1F0, 0, + 0x01F1F1, 0x01F1F7, 0, + 0x01F1F1, 0x01F1F8, 0, + 0x01F1F1, 0x01F1F9, 0, + 0x01F1F1, 0x01F1FA, 0, + 0x01F1F1, 0x01F1FB, 0, + 0x01F1F1, 0x01F1FE, 0, + 0x01F1F2, 0x01F1E6, 0, + 0x01F1F2, 0x01F1E8, 0, + 0x01F1F2, 0x01F1E9, 0, + 0x01F1F2, 0x01F1EA, 0, + 0x01F1F2, 0x01F1EB, 0, + 0x01F1F2, 0x01F1EC, 0, + 0x01F1F2, 0x01F1ED, 0, + 0x01F1F2, 0x01F1F0, 0, + 0x01F1F2, 0x01F1F1, 0, + 0x01F1F2, 0x01F1F2, 0, + 0x01F1F2, 0x01F1F3, 0, + 0x01F1F2, 0x01F1F4, 0, + 0x01F1F2, 0x01F1F5, 0, + 0x01F1F2, 0x01F1F6, 0, + 0x01F1F2, 0x01F1F7, 0, + 0x01F1F2, 0x01F1F8, 0, + 0x01F1F2, 0x01F1F9, 0, + 0x01F1F2, 0x01F1FA, 0, + 0x01F1F2, 0x01F1FB, 0, + 0x01F1F2, 0x01F1FC, 0, + 0x01F1F2, 0x01F1FD, 0, + 0x01F1F2, 0x01F1FE, 0, + 0x01F1F2, 0x01F1FF, 0, + 0x01F1F3, 0x01F1E6, 0, + 0x01F1F3, 0x01F1E8, 0, + 0x01F1F3, 0x01F1EA, 0, + 0x01F1F3, 0x01F1EB, 0, + 0x01F1F3, 0x01F1EC, 0, + 0x01F1F3, 0x01F1EE, 0, + 0x01F1F3, 0x01F1F1, 0, + 0x01F1F3, 0x01F1F4, 0, + 0x01F1F3, 0x01F1F5, 0, + 0x01F1F3, 0x01F1F7, 0, + 0x01F1F3, 0x01F1FA, 0, + 0x01F1F3, 0x01F1FF, 0, + 0x01F1F4, 0x01F1F2, 0, + 0x01F1F5, 0x01F1E6, 0, + 0x01F1F5, 0x01F1EA, 0, + 0x01F1F5, 0x01F1EB, 0, + 0x01F1F5, 0x01F1EC, 0, + 0x01F1F5, 0x01F1ED, 0, + 0x01F1F5, 0x01F1F0, 0, + 0x01F1F5, 0x01F1F1, 0, + 0x01F1F5, 0x01F1F2, 0, + 0x01F1F5, 0x01F1F3, 0, + 0x01F1F5, 0x01F1F7, 0, + 0x01F1F5, 0x01F1F8, 0, + 0x01F1F5, 0x01F1F9, 0, + 0x01F1F5, 0x01F1FC, 0, + 0x01F1F5, 0x01F1FE, 0, + 0x01F1F6, 0x01F1E6, 0, + 0x01F1F7, 0x01F1EA, 0, + 0x01F1F7, 0x01F1F4, 0, + 0x01F1F7, 0x01F1F8, 0, + 0x01F1F7, 0x01F1FA, 0, + 0x01F1F7, 0x01F1FC, 0, + 0x01F1F8, 0x01F1E6, 0, + 0x01F1F8, 0x01F1E7, 0, + 0x01F1F8, 0x01F1E8, 0, + 0x01F1F8, 0x01F1E9, 0, + 0x01F1F8, 0x01F1EA, 0, + 0x01F1F8, 0x01F1EC, 0, + 0x01F1F8, 0x01F1ED, 0, + 0x01F1F8, 0x01F1EE, 0, + 0x01F1F8, 0x01F1EF, 0, + 0x01F1F8, 0x01F1F0, 0, + 0x01F1F8, 0x01F1F1, 0, + 0x01F1F8, 0x01F1F2, 0, + 0x01F1F8, 0x01F1F3, 0, + 0x01F1F8, 0x01F1F4, 0, + 0x01F1F8, 0x01F1F7, 0, + 0x01F1F8, 0x01F1F8, 0, + 0x01F1F8, 0x01F1F9, 0, + 0x01F1F8, 0x01F1FB, 0, + 0x01F1F8, 0x01F1FD, 0, + 0x01F1F8, 0x01F1FE, 0, + 0x01F1F8, 0x01F1FF, 0, + 0x01F1F9, 0x01F1E6, 0, + 0x01F1F9, 0x01F1E8, 0, + 0x01F1F9, 0x01F1E9, 0, + 0x01F1F9, 0x01F1EB, 0, + 0x01F1F9, 0x01F1EC, 0, + 0x01F1F9, 0x01F1ED, 0, + 0x01F1F9, 0x01F1EF, 0, + 0x01F1F9, 0x01F1F0, 0, + 0x01F1F9, 0x01F1F1, 0, + 0x01F1F9, 0x01F1F2, 0, + 0x01F1F9, 0x01F1F3, 0, + 0x01F1F9, 0x01F1F4, 0, + 0x01F1F9, 0x01F1F7, 0, + 0x01F1F9, 0x01F1F9, 0, + 0x01F1F9, 0x01F1FB, 0, + 0x01F1F9, 0x01F1FC, 0, + 0x01F1F9, 0x01F1FF, 0, + 0x01F1FA, 0x01F1E6, 0, + 0x01F1FA, 0x01F1EC, 0, + 0x01F1FA, 0x01F1F2, 0, + 0x01F1FA, 0x01F1F3, 0, + 0x01F1FA, 0x01F1F8, 0, + 0x01F1FA, 0x01F1FE, 0, + 0x01F1FA, 0x01F1FF, 0, + 0x01F1FB, 0x01F1E6, 0, + 0x01F1FB, 0x01F1E8, 0, + 0x01F1FB, 0x01F1EA, 0, + 0x01F1FB, 0x01F1EC, 0, + 0x01F1FB, 0x01F1EE, 0, + 0x01F1FB, 0x01F1F3, 0, + 0x01F1FB, 0x01F1FA, 0, + 0x01F1FC, 0x01F1EB, 0, + 0x01F1FC, 0x01F1F8, 0, + 0x01F1FD, 0x01F1F0, 0, + 0x01F1FE, 0x01F1EA, 0, + 0x01F1FE, 0x01F1F9, 0, + 0x01F1FF, 0x01F1E6, 0, + 0x01F1FF, 0x01F1F2, 0, + 0x01F1F0, 0x01F1FE, 0, + 0 // null-terminating the list +}; + +const widechar js::irregexp::kEmojiTagSequences[] = { + 0x01F3F4, 0x0E0067, 0x0E0062, 0x0E0065, 0x0E006E, 0x0E0067, 0x0E007F, 0, + 0x01F3F4, 0x0E0067, 0x0E0062, 0x0E0073, 0x0E0063, 0x0E0074, 0x0E007F, 0, + 0x01F3F4, 0x0E0067, 0x0E0062, 0x0E0077, 0x0E006C, 0x0E0073, 0x0E007F, 0, + 0 // null-terminating the list +}; + +const widechar js::irregexp::kEmojiZWJSequences[] = { + 0x01F468, 0x00200D, 0x002764, 0x00FE0F, 0x00200D, 0x01F468, 0, + 0x01F441, 0x00FE0F, 0x00200D, 0x01F5E8, 0x00FE0F, 0, + 0x01F468, 0x00200D, 0x01F466, 0, + 0x01F468, 0x00200D, 0x01F466, 0x00200D, 0x01F466, 0, + 0x01F468, 0x00200D, 0x01F467, 0, + 0x01F468, 0x00200D, 0x01F467, 0x00200D, 0x01F466, 0, + 0x01F468, 0x00200D, 0x01F467, 0x00200D, 0x01F467, 0, + 0x01F468, 0x00200D, 0x01F468, 0x00200D, 0x01F466, 0, + 0x01F468, 0x00200D, 0x01F468, 0x00200D, 0x01F466, 0x00200D, 0x01F466, 0, + 0x01F468, 0x00200D, 0x01F468, 0x00200D, 0x01F467, 0, + 0x01F468, 0x00200D, 0x01F468, 0x00200D, 0x01F467, 0x00200D, 0x01F466, 0, + 0x01F468, 0x00200D, 0x01F468, 0x00200D, 0x01F467, 0x00200D, 0x01F467, 0, + 0x01F468, 0x00200D, 0x01F469, 0x00200D, 0x01F466, 0, + 0x01F468, 0x00200D, 0x01F469, 0x00200D, 0x01F466, 0x00200D, 0x01F466, 0, + 0x01F468, 0x00200D, 0x01F469, 0x00200D, 0x01F467, 0, + 0x01F468, 0x00200D, 0x01F469, 0x00200D, 0x01F467, 0x00200D, 0x01F466, 0, + 0x01F468, 0x00200D, 0x01F469, 0x00200D, 0x01F467, 0x00200D, 0x01F467, 0, + 0x01F468, 0x01F3FC, 0x00200D, 0x01F91D, 0x00200D, 0x01F468, 0x01F3FB, 0, + 0x01F468, 0x01F3FD, 0x00200D, 0x01F91D, 0x00200D, 0x01F468, 0x01F3FB, 0, + 0x01F468, 0x01F3FD, 0x00200D, 0x01F91D, 0x00200D, 0x01F468, 0x01F3FC, 0, + 0x01F468, 0x01F3FE, 0x00200D, 0x01F91D, 0x00200D, 0x01F468, 0x01F3FB, 0, + 0x01F468, 0x01F3FE, 0x00200D, 0x01F91D, 0x00200D, 0x01F468, 0x01F3FC, 0, + 0x01F468, 0x01F3FE, 0x00200D, 0x01F91D, 0x00200D, 0x01F468, 0x01F3FD, 0, + 0x01F468, 0x01F3FF, 0x00200D, 0x01F91D, 0x00200D, 0x01F468, 0x01F3FB, 0, + 0x01F468, 0x01F3FF, 0x00200D, 0x01F91D, 0x00200D, 0x01F468, 0x01F3FC, 0, + 0x01F468, 0x01F3FF, 0x00200D, 0x01F91D, 0x00200D, 0x01F468, 0x01F3FD, 0, + 0x01F468, 0x01F3FF, 0x00200D, 0x01F91D, 0x00200D, 0x01F468, 0x01F3FE, 0, + 0x01F469, 0x00200D, 0x002764, 0x00FE0F, 0x00200D, 0x01F468, 0, + 0x01F469, 0x00200D, 0x002764, 0x00FE0F, 0x00200D, 0x01F469, 0, + 0x01F469, 0x00200D, 0x002764, 0x00FE0F, 0x00200D, 0x01F48B, 0x00200D, + 0x01F468, 0, + 0x01F469, 0x00200D, 0x002764, 0x00FE0F, 0x00200D, 0x01F48B, 0x00200D, + 0x01F469, 0, + 0x01F469, 0x00200D, 0x01F466, 0, + 0x01F469, 0x00200D, 0x01F466, 0x00200D, 0x01F466, 0, + 0x01F469, 0x00200D, 0x01F467, 0, + 0x01F469, 0x00200D, 0x01F467, 0x00200D, 0x01F466, 0, + 0x01F469, 0x00200D, 0x01F467, 0x00200D, 0x01F467, 0, + 0x01F469, 0x00200D, 0x01F469, 0x00200D, 0x01F466, 0, + 0x01F469, 0x00200D, 0x01F469, 0x00200D, 0x01F466, 0x00200D, 0x01F466, 0, + 0x01F469, 0x00200D, 0x01F469, 0x00200D, 0x01F467, 0, + 0x01F469, 0x00200D, 0x01F469, 0x00200D, 0x01F467, 0x00200D, 0x01F466, 0, + 0x01F469, 0x00200D, 0x01F469, 0x00200D, 0x01F467, 0x00200D, 0x01F467, 0, + 0x01F469, 0x01F3FB, 0x00200D, 0x01F91D, 0x00200D, 0x01F468, 0x01F3FC, 0, + 0x01F469, 0x01F3FB, 0x00200D, 0x01F91D, 0x00200D, 0x01F468, 0x01F3FD, 0, + 0x01F469, 0x01F3FB, 0x00200D, 0x01F91D, 0x00200D, 0x01F468, 0x01F3FE, 0, + 0x01F469, 0x01F3FB, 0x00200D, 0x01F91D, 0x00200D, 0x01F468, 0x01F3FF, 0, + 0x01F469, 0x01F3FC, 0x00200D, 0x01F91D, 0x00200D, 0x01F468, 0x01F3FB, 0, + 0x01F469, 0x01F3FC, 0x00200D, 0x01F91D, 0x00200D, 0x01F468, 0x01F3FD, 0, + 0x01F469, 0x01F3FC, 0x00200D, 0x01F91D, 0x00200D, 0x01F468, 0x01F3FE, 0, + 0x01F469, 0x01F3FC, 0x00200D, 0x01F91D, 0x00200D, 0x01F468, 0x01F3FF, 0, + 0x01F469, 0x01F3FC, 0x00200D, 0x01F91D, 0x00200D, 0x01F469, 0x01F3FB, 0, + 0x01F469, 0x01F3FD, 0x00200D, 0x01F91D, 0x00200D, 0x01F468, 0x01F3FB, 0, + 0x01F469, 0x01F3FD, 0x00200D, 0x01F91D, 0x00200D, 0x01F468, 0x01F3FC, 0, + 0x01F469, 0x01F3FD, 0x00200D, 0x01F91D, 0x00200D, 0x01F468, 0x01F3FE, 0, + 0x01F469, 0x01F3FD, 0x00200D, 0x01F91D, 0x00200D, 0x01F468, 0x01F3FF, 0, + 0x01F469, 0x01F3FD, 0x00200D, 0x01F91D, 0x00200D, 0x01F469, 0x01F3FB, 0, + 0x01F469, 0x01F3FD, 0x00200D, 0x01F91D, 0x00200D, 0x01F469, 0x01F3FC, 0, + 0x01F469, 0x01F3FE, 0x00200D, 0x01F91D, 0x00200D, 0x01F468, 0x01F3FB, 0, + 0x01F469, 0x01F3FE, 0x00200D, 0x01F91D, 0x00200D, 0x01F468, 0x01F3FC, 0, + 0x01F469, 0x01F3FE, 0x00200D, 0x01F91D, 0x00200D, 0x01F468, 0x01F3FD, 0, + 0x01F469, 0x01F3FE, 0x00200D, 0x01F91D, 0x00200D, 0x01F468, 0x01F3FF, 0, + 0x01F469, 0x01F3FE, 0x00200D, 0x01F91D, 0x00200D, 0x01F469, 0x01F3FB, 0, + 0x01F469, 0x01F3FE, 0x00200D, 0x01F91D, 0x00200D, 0x01F469, 0x01F3FC, 0, + 0x01F469, 0x01F3FE, 0x00200D, 0x01F91D, 0x00200D, 0x01F469, 0x01F3FD, 0, + 0x01F469, 0x01F3FF, 0x00200D, 0x01F91D, 0x00200D, 0x01F468, 0x01F3FB, 0, + 0x01F469, 0x01F3FF, 0x00200D, 0x01F91D, 0x00200D, 0x01F468, 0x01F3FC, 0, + 0x01F469, 0x01F3FF, 0x00200D, 0x01F91D, 0x00200D, 0x01F468, 0x01F3FD, 0, + 0x01F469, 0x01F3FF, 0x00200D, 0x01F91D, 0x00200D, 0x01F468, 0x01F3FE, 0, + 0x01F469, 0x01F3FF, 0x00200D, 0x01F91D, 0x00200D, 0x01F469, 0x01F3FB, 0, + 0x01F469, 0x01F3FF, 0x00200D, 0x01F91D, 0x00200D, 0x01F469, 0x01F3FC, 0, + 0x01F469, 0x01F3FF, 0x00200D, 0x01F91D, 0x00200D, 0x01F469, 0x01F3FD, 0, + 0x01F469, 0x01F3FF, 0x00200D, 0x01F91D, 0x00200D, 0x01F469, 0x01F3FE, 0, + 0x01F9D1, 0x00200D, 0x01F91D, 0x00200D, 0x01F9D1, 0, + 0x01F9D1, 0x01F3FB, 0x00200D, 0x01F91D, 0x00200D, 0x01F9D1, 0x01F3FB, 0, + 0x01F9D1, 0x01F3FC, 0x00200D, 0x01F91D, 0x00200D, 0x01F9D1, 0x01F3FB, 0, + 0x01F9D1, 0x01F3FC, 0x00200D, 0x01F91D, 0x00200D, 0x01F9D1, 0x01F3FC, 0, + 0x01F9D1, 0x01F3FD, 0x00200D, 0x01F91D, 0x00200D, 0x01F9D1, 0x01F3FB, 0, + 0x01F9D1, 0x01F3FD, 0x00200D, 0x01F91D, 0x00200D, 0x01F9D1, 0x01F3FC, 0, + 0x01F9D1, 0x01F3FD, 0x00200D, 0x01F91D, 0x00200D, 0x01F9D1, 0x01F3FD, 0, + 0x01F9D1, 0x01F3FE, 0x00200D, 0x01F91D, 0x00200D, 0x01F9D1, 0x01F3FB, 0, + 0x01F9D1, 0x01F3FE, 0x00200D, 0x01F91D, 0x00200D, 0x01F9D1, 0x01F3FC, 0, + 0x01F9D1, 0x01F3FE, 0x00200D, 0x01F91D, 0x00200D, 0x01F9D1, 0x01F3FD, 0, + 0x01F9D1, 0x01F3FE, 0x00200D, 0x01F91D, 0x00200D, 0x01F9D1, 0x01F3FE, 0, + 0x01F9D1, 0x01F3FF, 0x00200D, 0x01F91D, 0x00200D, 0x01F9D1, 0x01F3FB, 0, + 0x01F9D1, 0x01F3FF, 0x00200D, 0x01F91D, 0x00200D, 0x01F9D1, 0x01F3FC, 0, + 0x01F9D1, 0x01F3FF, 0x00200D, 0x01F91D, 0x00200D, 0x01F9D1, 0x01F3FD, 0, + 0x01F9D1, 0x01F3FF, 0x00200D, 0x01F91D, 0x00200D, 0x01F9D1, 0x01F3FE, 0, + 0x01F9D1, 0x01F3FF, 0x00200D, 0x01F91D, 0x00200D, 0x01F9D1, 0x01F3FF, 0, + 0x01F468, 0x00200D, 0x002695, 0x00FE0F, 0, + 0x01F468, 0x00200D, 0x002696, 0x00FE0F, 0, + 0x01F468, 0x00200D, 0x002708, 0x00FE0F, 0, + 0x01F468, 0x00200D, 0x01F33E, 0, + 0x01F468, 0x00200D, 0x01F373, 0, + 0x01F468, 0x00200D, 0x01F393, 0, + 0x01F468, 0x00200D, 0x01F3A4, 0, + 0x01F468, 0x00200D, 0x01F3A8, 0, + 0x01F468, 0x00200D, 0x01F3EB, 0, + 0x01F468, 0x00200D, 0x01F3ED, 0, + 0x01F468, 0x00200D, 0x01F4BB, 0, + 0x01F468, 0x00200D, 0x01F4BC, 0, + 0x01F468, 0x00200D, 0x01F527, 0, + 0x01F468, 0x00200D, 0x01F52C, 0, + 0x01F468, 0x00200D, 0x01F680, 0, + 0x01F468, 0x00200D, 0x01F692, 0, + 0x01F468, 0x00200D, 0x01F9AF, 0, + 0x01F468, 0x00200D, 0x01F9BC, 0, + 0x01F468, 0x00200D, 0x01F9BD, 0, + 0x01F468, 0x01F3FB, 0x00200D, 0x002695, 0x00FE0F, 0, + 0x01F468, 0x01F3FB, 0x00200D, 0x002696, 0x00FE0F, 0, + 0x01F468, 0x01F3FB, 0x00200D, 0x002708, 0x00FE0F, 0, + 0x01F468, 0x01F3FB, 0x00200D, 0x01F33E, 0, + 0x01F468, 0x01F3FB, 0x00200D, 0x01F373, 0, + 0x01F468, 0x01F3FB, 0x00200D, 0x01F393, 0, + 0x01F468, 0x01F3FB, 0x00200D, 0x01F3A4, 0, + 0x01F468, 0x01F3FB, 0x00200D, 0x01F3A8, 0, + 0x01F468, 0x01F3FB, 0x00200D, 0x01F3EB, 0, + 0x01F468, 0x01F3FB, 0x00200D, 0x01F3ED, 0, + 0x01F468, 0x01F3FB, 0x00200D, 0x01F4BB, 0, + 0x01F468, 0x01F3FB, 0x00200D, 0x01F4BC, 0, + 0x01F468, 0x01F3FB, 0x00200D, 0x01F527, 0, + 0x01F468, 0x01F3FB, 0x00200D, 0x01F52C, 0, + 0x01F468, 0x01F3FB, 0x00200D, 0x01F680, 0, + 0x01F468, 0x01F3FB, 0x00200D, 0x01F692, 0, + 0x01F468, 0x01F3FB, 0x00200D, 0x01F9AF, 0, + 0x01F468, 0x01F3FB, 0x00200D, 0x01F9BC, 0, + 0x01F468, 0x01F3FB, 0x00200D, 0x01F9BD, 0, + 0x01F468, 0x01F3FC, 0x00200D, 0x002695, 0x00FE0F, 0, + 0x01F468, 0x01F3FC, 0x00200D, 0x002696, 0x00FE0F, 0, + 0x01F468, 0x01F3FC, 0x00200D, 0x002708, 0x00FE0F, 0, + 0x01F468, 0x01F3FC, 0x00200D, 0x01F33E, 0, + 0x01F468, 0x01F3FC, 0x00200D, 0x01F373, 0, + 0x01F468, 0x01F3FC, 0x00200D, 0x01F393, 0, + 0x01F468, 0x01F3FC, 0x00200D, 0x01F3A4, 0, + 0x01F468, 0x01F3FC, 0x00200D, 0x01F3A8, 0, + 0x01F468, 0x01F3FC, 0x00200D, 0x01F3EB, 0, + 0x01F468, 0x01F3FC, 0x00200D, 0x01F3ED, 0, + 0x01F468, 0x01F3FC, 0x00200D, 0x01F4BB, 0, + 0x01F468, 0x01F3FC, 0x00200D, 0x01F4BC, 0, + 0x01F468, 0x01F3FC, 0x00200D, 0x01F527, 0, + 0x01F468, 0x01F3FC, 0x00200D, 0x01F52C, 0, + 0x01F468, 0x01F3FC, 0x00200D, 0x01F680, 0, + 0x01F468, 0x01F3FC, 0x00200D, 0x01F692, 0, + 0x01F468, 0x01F3FC, 0x00200D, 0x01F9AF, 0, + 0x01F468, 0x01F3FC, 0x00200D, 0x01F9BC, 0, + 0x01F468, 0x01F3FC, 0x00200D, 0x01F9BD, 0, + 0x01F468, 0x01F3FD, 0x00200D, 0x002695, 0x00FE0F, 0, + 0x01F468, 0x01F3FD, 0x00200D, 0x002696, 0x00FE0F, 0, + 0x01F468, 0x01F3FD, 0x00200D, 0x002708, 0x00FE0F, 0, + 0x01F468, 0x01F3FD, 0x00200D, 0x01F33E, 0, + 0x01F468, 0x01F3FD, 0x00200D, 0x01F373, 0, + 0x01F468, 0x01F3FD, 0x00200D, 0x01F393, 0, + 0x01F468, 0x01F3FD, 0x00200D, 0x01F3A4, 0, + 0x01F468, 0x01F3FD, 0x00200D, 0x01F3A8, 0, + 0x01F468, 0x01F3FD, 0x00200D, 0x01F3EB, 0, + 0x01F468, 0x01F3FD, 0x00200D, 0x01F3ED, 0, + 0x01F468, 0x01F3FD, 0x00200D, 0x01F4BB, 0, + 0x01F468, 0x01F3FD, 0x00200D, 0x01F4BC, 0, + 0x01F468, 0x01F3FD, 0x00200D, 0x01F527, 0, + 0x01F468, 0x01F3FD, 0x00200D, 0x01F52C, 0, + 0x01F468, 0x01F3FD, 0x00200D, 0x01F680, 0, + 0x01F468, 0x01F3FD, 0x00200D, 0x01F692, 0, + 0x01F468, 0x01F3FD, 0x00200D, 0x01F9AF, 0, + 0x01F468, 0x01F3FD, 0x00200D, 0x01F9BC, 0, + 0x01F468, 0x01F3FD, 0x00200D, 0x01F9BD, 0, + 0x01F468, 0x01F3FE, 0x00200D, 0x002695, 0x00FE0F, 0, + 0x01F468, 0x01F3FE, 0x00200D, 0x002696, 0x00FE0F, 0, + 0x01F468, 0x01F3FE, 0x00200D, 0x002708, 0x00FE0F, 0, + 0x01F468, 0x01F3FE, 0x00200D, 0x01F33E, 0, + 0x01F468, 0x01F3FE, 0x00200D, 0x01F373, 0, + 0x01F468, 0x01F3FE, 0x00200D, 0x01F393, 0, + 0x01F468, 0x01F3FE, 0x00200D, 0x01F3A4, 0, + 0x01F468, 0x01F3FE, 0x00200D, 0x01F3A8, 0, + 0x01F468, 0x01F3FE, 0x00200D, 0x01F3EB, 0, + 0x01F468, 0x01F3FE, 0x00200D, 0x01F3ED, 0, + 0x01F468, 0x01F3FE, 0x00200D, 0x01F4BB, 0, + 0x01F468, 0x01F3FE, 0x00200D, 0x01F4BC, 0, + 0x01F468, 0x01F3FE, 0x00200D, 0x01F527, 0, + 0x01F468, 0x01F3FE, 0x00200D, 0x01F52C, 0, + 0x01F468, 0x01F3FE, 0x00200D, 0x01F680, 0, + 0x01F468, 0x01F3FE, 0x00200D, 0x01F692, 0, + 0x01F468, 0x01F3FE, 0x00200D, 0x01F9AF, 0, + 0x01F468, 0x01F3FE, 0x00200D, 0x01F9BC, 0, + 0x01F468, 0x01F3FE, 0x00200D, 0x01F9BD, 0, + 0x01F468, 0x01F3FF, 0x00200D, 0x002695, 0x00FE0F, 0, + 0x01F468, 0x01F3FF, 0x00200D, 0x002696, 0x00FE0F, 0, + 0x01F468, 0x01F3FF, 0x00200D, 0x002708, 0x00FE0F, 0, + 0x01F468, 0x01F3FF, 0x00200D, 0x01F33E, 0, + 0x01F468, 0x01F3FF, 0x00200D, 0x01F373, 0, + 0x01F468, 0x01F3FF, 0x00200D, 0x01F393, 0, + 0x01F468, 0x01F3FF, 0x00200D, 0x01F3A4, 0, + 0x01F468, 0x01F3FF, 0x00200D, 0x01F3A8, 0, + 0x01F468, 0x01F3FF, 0x00200D, 0x01F3EB, 0, + 0x01F468, 0x01F3FF, 0x00200D, 0x01F3ED, 0, + 0x01F468, 0x01F3FF, 0x00200D, 0x01F4BB, 0, + 0x01F468, 0x01F3FF, 0x00200D, 0x01F4BC, 0, + 0x01F468, 0x01F3FF, 0x00200D, 0x01F527, 0, + 0x01F468, 0x01F3FF, 0x00200D, 0x01F52C, 0, + 0x01F468, 0x01F3FF, 0x00200D, 0x01F680, 0, + 0x01F468, 0x01F3FF, 0x00200D, 0x01F692, 0, + 0x01F468, 0x01F3FF, 0x00200D, 0x01F9AF, 0, + 0x01F468, 0x01F3FF, 0x00200D, 0x01F9BC, 0, + 0x01F468, 0x01F3FF, 0x00200D, 0x01F9BD, 0, + 0x01F469, 0x00200D, 0x002695, 0x00FE0F, 0, + 0x01F469, 0x00200D, 0x002696, 0x00FE0F, 0, + 0x01F469, 0x00200D, 0x002708, 0x00FE0F, 0, + 0x01F469, 0x00200D, 0x01F33E, 0, + 0x01F469, 0x00200D, 0x01F373, 0, + 0x01F469, 0x00200D, 0x01F393, 0, + 0x01F469, 0x00200D, 0x01F3A4, 0, + 0x01F469, 0x00200D, 0x01F3A8, 0, + 0x01F469, 0x00200D, 0x01F3EB, 0, + 0x01F469, 0x00200D, 0x01F3ED, 0, + 0x01F469, 0x00200D, 0x01F4BB, 0, + 0x01F469, 0x00200D, 0x01F4BC, 0, + 0x01F469, 0x00200D, 0x01F527, 0, + 0x01F469, 0x00200D, 0x01F52C, 0, + 0x01F469, 0x00200D, 0x01F680, 0, + 0x01F469, 0x00200D, 0x01F692, 0, + 0x01F469, 0x00200D, 0x01F9AF, 0, + 0x01F469, 0x00200D, 0x01F9BC, 0, + 0x01F469, 0x00200D, 0x01F9BD, 0, + 0x01F469, 0x01F3FB, 0x00200D, 0x002695, 0x00FE0F, 0, + 0x01F469, 0x01F3FB, 0x00200D, 0x002696, 0x00FE0F, 0, + 0x01F469, 0x01F3FB, 0x00200D, 0x002708, 0x00FE0F, 0, + 0x01F469, 0x01F3FB, 0x00200D, 0x01F33E, 0, + 0x01F469, 0x01F3FB, 0x00200D, 0x01F373, 0, + 0x01F469, 0x01F3FB, 0x00200D, 0x01F393, 0, + 0x01F469, 0x01F3FB, 0x00200D, 0x01F3A4, 0, + 0x01F469, 0x01F3FB, 0x00200D, 0x01F3A8, 0, + 0x01F469, 0x01F3FB, 0x00200D, 0x01F3EB, 0, + 0x01F469, 0x01F3FB, 0x00200D, 0x01F3ED, 0, + 0x01F469, 0x01F3FB, 0x00200D, 0x01F4BB, 0, + 0x01F469, 0x01F3FB, 0x00200D, 0x01F4BC, 0, + 0x01F469, 0x01F3FB, 0x00200D, 0x01F527, 0, + 0x01F469, 0x01F3FB, 0x00200D, 0x01F52C, 0, + 0x01F469, 0x01F3FB, 0x00200D, 0x01F680, 0, + 0x01F469, 0x01F3FB, 0x00200D, 0x01F692, 0, + 0x01F469, 0x01F3FB, 0x00200D, 0x01F9AF, 0, + 0x01F469, 0x01F3FB, 0x00200D, 0x01F9BC, 0, + 0x01F469, 0x01F3FB, 0x00200D, 0x01F9BD, 0, + 0x01F469, 0x01F3FC, 0x00200D, 0x002695, 0x00FE0F, 0, + 0x01F469, 0x01F3FC, 0x00200D, 0x002696, 0x00FE0F, 0, + 0x01F469, 0x01F3FC, 0x00200D, 0x002708, 0x00FE0F, 0, + 0x01F469, 0x01F3FC, 0x00200D, 0x01F33E, 0, + 0x01F469, 0x01F3FC, 0x00200D, 0x01F373, 0, + 0x01F469, 0x01F3FC, 0x00200D, 0x01F393, 0, + 0x01F469, 0x01F3FC, 0x00200D, 0x01F3A4, 0, + 0x01F469, 0x01F3FC, 0x00200D, 0x01F3A8, 0, + 0x01F469, 0x01F3FC, 0x00200D, 0x01F3EB, 0, + 0x01F469, 0x01F3FC, 0x00200D, 0x01F3ED, 0, + 0x01F469, 0x01F3FC, 0x00200D, 0x01F4BB, 0, + 0x01F469, 0x01F3FC, 0x00200D, 0x01F4BC, 0, + 0x01F469, 0x01F3FC, 0x00200D, 0x01F527, 0, + 0x01F469, 0x01F3FC, 0x00200D, 0x01F52C, 0, + 0x01F469, 0x01F3FC, 0x00200D, 0x01F680, 0, + 0x01F469, 0x01F3FC, 0x00200D, 0x01F692, 0, + 0x01F469, 0x01F3FC, 0x00200D, 0x01F9AF, 0, + 0x01F469, 0x01F3FC, 0x00200D, 0x01F9BC, 0, + 0x01F469, 0x01F3FC, 0x00200D, 0x01F9BD, 0, + 0x01F469, 0x01F3FD, 0x00200D, 0x002695, 0x00FE0F, 0, + 0x01F469, 0x01F3FD, 0x00200D, 0x002696, 0x00FE0F, 0, + 0x01F469, 0x01F3FD, 0x00200D, 0x002708, 0x00FE0F, 0, + 0x01F469, 0x01F3FD, 0x00200D, 0x01F33E, 0, + 0x01F469, 0x01F3FD, 0x00200D, 0x01F373, 0, + 0x01F469, 0x01F3FD, 0x00200D, 0x01F393, 0, + 0x01F469, 0x01F3FD, 0x00200D, 0x01F3A4, 0, + 0x01F469, 0x01F3FD, 0x00200D, 0x01F3A8, 0, + 0x01F469, 0x01F3FD, 0x00200D, 0x01F3EB, 0, + 0x01F469, 0x01F3FD, 0x00200D, 0x01F3ED, 0, + 0x01F469, 0x01F3FD, 0x00200D, 0x01F4BB, 0, + 0x01F469, 0x01F3FD, 0x00200D, 0x01F4BC, 0, + 0x01F469, 0x01F3FD, 0x00200D, 0x01F527, 0, + 0x01F469, 0x01F3FD, 0x00200D, 0x01F52C, 0, + 0x01F469, 0x01F3FD, 0x00200D, 0x01F680, 0, + 0x01F469, 0x01F3FD, 0x00200D, 0x01F692, 0, + 0x01F469, 0x01F3FD, 0x00200D, 0x01F9AF, 0, + 0x01F469, 0x01F3FD, 0x00200D, 0x01F9BC, 0, + 0x01F469, 0x01F3FD, 0x00200D, 0x01F9BD, 0, + 0x01F469, 0x01F3FE, 0x00200D, 0x002695, 0x00FE0F, 0, + 0x01F469, 0x01F3FE, 0x00200D, 0x002696, 0x00FE0F, 0, + 0x01F469, 0x01F3FE, 0x00200D, 0x002708, 0x00FE0F, 0, + 0x01F469, 0x01F3FE, 0x00200D, 0x01F33E, 0, + 0x01F469, 0x01F3FE, 0x00200D, 0x01F373, 0, + 0x01F469, 0x01F3FE, 0x00200D, 0x01F393, 0, + 0x01F469, 0x01F3FE, 0x00200D, 0x01F3A4, 0, + 0x01F469, 0x01F3FE, 0x00200D, 0x01F3A8, 0, + 0x01F469, 0x01F3FE, 0x00200D, 0x01F3EB, 0, + 0x01F469, 0x01F3FE, 0x00200D, 0x01F3ED, 0, + 0x01F469, 0x01F3FE, 0x00200D, 0x01F4BB, 0, + 0x01F469, 0x01F3FE, 0x00200D, 0x01F4BC, 0, + 0x01F469, 0x01F3FE, 0x00200D, 0x01F527, 0, + 0x01F469, 0x01F3FE, 0x00200D, 0x01F52C, 0, + 0x01F469, 0x01F3FE, 0x00200D, 0x01F680, 0, + 0x01F469, 0x01F3FE, 0x00200D, 0x01F692, 0, + 0x01F469, 0x01F3FE, 0x00200D, 0x01F9AF, 0, + 0x01F469, 0x01F3FE, 0x00200D, 0x01F9BC, 0, + 0x01F469, 0x01F3FE, 0x00200D, 0x01F9BD, 0, + 0x01F469, 0x01F3FF, 0x00200D, 0x002695, 0x00FE0F, 0, + 0x01F469, 0x01F3FF, 0x00200D, 0x002696, 0x00FE0F, 0, + 0x01F469, 0x01F3FF, 0x00200D, 0x002708, 0x00FE0F, 0, + 0x01F469, 0x01F3FF, 0x00200D, 0x01F33E, 0, + 0x01F469, 0x01F3FF, 0x00200D, 0x01F373, 0, + 0x01F469, 0x01F3FF, 0x00200D, 0x01F393, 0, + 0x01F469, 0x01F3FF, 0x00200D, 0x01F3A4, 0, + 0x01F469, 0x01F3FF, 0x00200D, 0x01F3A8, 0, + 0x01F469, 0x01F3FF, 0x00200D, 0x01F3EB, 0, + 0x01F469, 0x01F3FF, 0x00200D, 0x01F3ED, 0, + 0x01F469, 0x01F3FF, 0x00200D, 0x01F4BB, 0, + 0x01F469, 0x01F3FF, 0x00200D, 0x01F4BC, 0, + 0x01F469, 0x01F3FF, 0x00200D, 0x01F527, 0, + 0x01F469, 0x01F3FF, 0x00200D, 0x01F52C, 0, + 0x01F469, 0x01F3FF, 0x00200D, 0x01F680, 0, + 0x01F469, 0x01F3FF, 0x00200D, 0x01F692, 0, + 0x01F469, 0x01F3FF, 0x00200D, 0x01F9AF, 0, + 0x01F469, 0x01F3FF, 0x00200D, 0x01F9BC, 0, + 0x01F469, 0x01F3FF, 0x00200D, 0x01F9BD, 0, + 0x0026F9, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x0026F9, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x0026F9, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x0026F9, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x0026F9, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x0026F9, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x0026F9, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x0026F9, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x0026F9, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x0026F9, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x0026F9, 0x00FE0F, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x0026F9, 0x00FE0F, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F3C3, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F3C3, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F3C3, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F3C3, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F3C3, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F3C3, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F3C3, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F3C3, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F3C3, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F3C3, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F3C3, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F3C3, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F3C4, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F3C4, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F3C4, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F3C4, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F3C4, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F3C4, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F3C4, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F3C4, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F3C4, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F3C4, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F3C4, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F3C4, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F3CA, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F3CA, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F3CA, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F3CA, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F3CA, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F3CA, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F3CA, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F3CA, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F3CA, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F3CA, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F3CA, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F3CA, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F3CB, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F3CB, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F3CB, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F3CB, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F3CB, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F3CB, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F3CB, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F3CB, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F3CB, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F3CB, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F3CB, 0x00FE0F, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F3CB, 0x00FE0F, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F3CC, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F3CC, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F3CC, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F3CC, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F3CC, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F3CC, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F3CC, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F3CC, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F3CC, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F3CC, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F3CC, 0x00FE0F, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F3CC, 0x00FE0F, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F46E, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F46E, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F46E, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F46E, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F46E, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F46E, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F46E, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F46E, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F46E, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F46E, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F46E, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F46E, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F46F, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F46F, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F471, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F471, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F471, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F471, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F471, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F471, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F471, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F471, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F471, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F471, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F471, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F471, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F473, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F473, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F473, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F473, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F473, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F473, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F473, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F473, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F473, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F473, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F473, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F473, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F477, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F477, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F477, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F477, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F477, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F477, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F477, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F477, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F477, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F477, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F477, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F477, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F481, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F481, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F481, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F481, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F481, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F481, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F481, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F481, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F481, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F481, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F481, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F481, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F482, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F482, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F468, 0x00200D, 0x002764, 0x00FE0F, 0x00200D, 0x01F48B, 0x00200D, + 0x01F468, 0, + 0x01F482, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F482, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F482, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F482, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F482, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F482, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F482, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F482, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F482, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F486, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F486, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F486, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F486, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F486, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F486, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F486, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F486, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F486, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F486, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F486, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F486, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F487, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F487, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F487, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F487, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F487, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F487, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F487, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F487, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F487, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F487, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F487, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F487, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F575, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F575, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F575, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F575, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F575, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F575, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F575, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F575, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F575, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F575, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F575, 0x00FE0F, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F575, 0x00FE0F, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F645, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F645, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F645, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F645, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F645, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F645, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F645, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F645, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F645, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F645, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F645, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F645, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F646, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F646, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F646, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F646, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F646, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F646, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F646, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F646, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F646, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F646, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F646, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F646, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F647, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F647, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F647, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F647, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F647, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F647, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F647, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F647, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F647, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F647, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F647, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F647, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F64B, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F64B, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F64B, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F64B, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F64B, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F64B, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F64B, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F64B, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F64B, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F64B, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F64B, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F64B, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F64D, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F64D, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F64D, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F64D, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F64D, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F64D, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F64D, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F64D, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F64D, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F64D, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F64D, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F64D, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F64E, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F64E, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F64E, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F64E, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F64E, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F64E, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F64E, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F64E, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F64E, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F64E, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F64E, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F64E, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F6A3, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F6A3, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F6A3, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F6A3, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F6A3, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F6A3, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F6A3, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F6A3, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F6A3, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F6A3, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F6A3, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F6A3, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F6B4, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F6B4, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F6B4, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F6B4, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F6B4, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F6B4, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F6B4, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F6B4, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F6B4, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F6B4, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F6B4, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F6B4, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F6B5, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F6B5, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F6B5, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F6B5, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F6B5, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F6B5, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F6B5, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F6B5, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F6B5, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F6B5, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F6B5, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F6B5, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F6B6, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F6B6, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F6B6, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F6B6, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F6B6, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F6B6, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F6B6, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F6B6, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F6B6, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F6B6, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F6B6, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F6B6, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F926, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F926, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F926, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F926, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F926, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F926, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F926, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F926, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F926, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F926, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F926, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F926, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F937, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F937, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F937, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F937, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F937, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F937, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F937, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F937, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F937, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F937, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F937, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F937, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F938, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F938, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F938, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F938, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F938, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F938, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F938, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F938, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F938, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F938, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F938, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F938, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F939, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F939, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F939, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F939, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F939, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F939, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F939, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F939, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F939, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F939, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F939, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F939, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F93C, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F93C, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F93D, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F93D, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F93D, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F93D, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F93D, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F93D, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F93D, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F93D, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F93D, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F93D, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F93D, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F93D, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F93E, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F93E, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F93E, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F93E, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F93E, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F93E, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F93E, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F93E, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F93E, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F93E, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F93E, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F93E, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F9B8, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F9B8, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F9B8, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F9B8, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F9B8, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F9B8, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F9B8, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F9B8, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F9B8, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F9B8, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F9B8, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F9B8, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F9B9, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F9B9, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F9B9, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F9B9, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F9B9, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F9B9, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F9B9, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F9B9, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F9B9, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F9B9, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F9B9, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F9B9, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F9CD, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F9CD, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F9CD, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F9CD, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F9CD, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F9CD, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F9CD, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F9CD, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F9CD, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F9CD, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F9CD, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F9CD, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F9CE, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F9CE, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F9CE, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F9CE, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F9CE, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F9CE, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F9CE, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F9CE, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F9CE, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F9CE, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F9CE, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F9CE, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F9CF, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F9CF, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F9CF, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F9CF, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F9CF, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F9CF, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F9CF, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F9CF, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F9CF, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F9CF, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F9CF, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F9CF, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F9D6, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F9D6, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F9D6, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F9D6, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F9D6, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F9D6, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F9D6, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F9D6, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F9D6, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F9D6, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F9D6, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F9D6, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F9D7, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F9D7, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F9D7, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F9D7, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F9D7, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F9D7, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F9D7, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F9D7, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F9D7, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F9D7, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F9D7, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F9D7, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F9D8, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F9D8, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F9D8, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F9D8, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F9D8, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F9D8, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F9D8, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F9D8, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F9D8, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F9D8, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F9D8, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F9D8, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F9D9, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F9D9, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F9D9, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F9D9, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F9D9, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F9D9, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F9D9, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F9D9, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F9D9, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F9D9, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F9D9, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F9D9, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F9DA, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F9DA, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F9DA, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F9DA, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F9DA, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F9DA, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F9DA, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F9DA, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F9DA, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F9DA, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F9DA, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F9DA, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F9DB, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F9DB, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F9DB, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F9DB, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F9DB, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F9DB, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F9DB, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F9DB, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F9DB, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F9DB, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F9DB, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F9DB, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F9DC, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F9DC, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F9DC, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F9DC, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F9DC, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F9DC, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F9DC, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F9DC, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F9DC, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F9DC, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F9DC, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F9DC, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F9DD, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F9DD, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F9DD, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F9DD, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F9DD, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F9DD, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F9DD, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F9DD, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F9DD, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F9DD, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F9DD, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F9DD, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F9DE, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F9DE, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F9DF, 0x00200D, 0x002640, 0x00FE0F, 0, + 0x01F9DF, 0x00200D, 0x002642, 0x00FE0F, 0, + 0x01F468, 0x00200D, 0x01F9B0, 0, + 0x01F468, 0x00200D, 0x01F9B1, 0, + 0x01F468, 0x00200D, 0x01F9B2, 0, + 0x01F468, 0x00200D, 0x01F9B3, 0, + 0x01F468, 0x01F3FB, 0x00200D, 0x01F9B0, 0, + 0x01F468, 0x01F3FB, 0x00200D, 0x01F9B1, 0, + 0x01F468, 0x01F3FB, 0x00200D, 0x01F9B2, 0, + 0x01F468, 0x01F3FB, 0x00200D, 0x01F9B3, 0, + 0x01F468, 0x01F3FC, 0x00200D, 0x01F9B0, 0, + 0x01F468, 0x01F3FC, 0x00200D, 0x01F9B1, 0, + 0x01F468, 0x01F3FC, 0x00200D, 0x01F9B2, 0, + 0x01F468, 0x01F3FC, 0x00200D, 0x01F9B3, 0, + 0x01F468, 0x01F3FD, 0x00200D, 0x01F9B0, 0, + 0x01F468, 0x01F3FD, 0x00200D, 0x01F9B1, 0, + 0x01F468, 0x01F3FD, 0x00200D, 0x01F9B2, 0, + 0x01F468, 0x01F3FD, 0x00200D, 0x01F9B3, 0, + 0x01F468, 0x01F3FE, 0x00200D, 0x01F9B0, 0, + 0x01F468, 0x01F3FE, 0x00200D, 0x01F9B1, 0, + 0x01F468, 0x01F3FE, 0x00200D, 0x01F9B2, 0, + 0x01F468, 0x01F3FE, 0x00200D, 0x01F9B3, 0, + 0x01F468, 0x01F3FF, 0x00200D, 0x01F9B0, 0, + 0x01F468, 0x01F3FF, 0x00200D, 0x01F9B1, 0, + 0x01F468, 0x01F3FF, 0x00200D, 0x01F9B2, 0, + 0x01F468, 0x01F3FF, 0x00200D, 0x01F9B3, 0, + 0x01F469, 0x00200D, 0x01F9B0, 0, + 0x01F469, 0x00200D, 0x01F9B1, 0, + 0x01F469, 0x00200D, 0x01F9B2, 0, + 0x01F469, 0x00200D, 0x01F9B3, 0, + 0x01F469, 0x01F3FB, 0x00200D, 0x01F9B0, 0, + 0x01F469, 0x01F3FB, 0x00200D, 0x01F9B1, 0, + 0x01F469, 0x01F3FB, 0x00200D, 0x01F9B2, 0, + 0x01F469, 0x01F3FB, 0x00200D, 0x01F9B3, 0, + 0x01F469, 0x01F3FC, 0x00200D, 0x01F9B0, 0, + 0x01F469, 0x01F3FC, 0x00200D, 0x01F9B1, 0, + 0x01F469, 0x01F3FC, 0x00200D, 0x01F9B2, 0, + 0x01F469, 0x01F3FC, 0x00200D, 0x01F9B3, 0, + 0x01F469, 0x01F3FD, 0x00200D, 0x01F9B0, 0, + 0x01F469, 0x01F3FD, 0x00200D, 0x01F9B1, 0, + 0x01F469, 0x01F3FD, 0x00200D, 0x01F9B2, 0, + 0x01F469, 0x01F3FD, 0x00200D, 0x01F9B3, 0, + 0x01F469, 0x01F3FE, 0x00200D, 0x01F9B0, 0, + 0x01F469, 0x01F3FE, 0x00200D, 0x01F9B1, 0, + 0x01F469, 0x01F3FE, 0x00200D, 0x01F9B2, 0, + 0x01F469, 0x01F3FE, 0x00200D, 0x01F9B3, 0, + 0x01F469, 0x01F3FF, 0x00200D, 0x01F9B0, 0, + 0x01F469, 0x01F3FF, 0x00200D, 0x01F9B1, 0, + 0x01F469, 0x01F3FF, 0x00200D, 0x01F9B2, 0, + 0x01F469, 0x01F3FF, 0x00200D, 0x01F9B3, 0, + 0x01F3F3, 0x00FE0F, 0x00200D, 0x01F308, 0, + 0x01F3F4, 0x00200D, 0x002620, 0x00FE0F, 0, + 0x01F415, 0x00200D, 0x01F9BA, 0, + 0x01F482, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0, + 0 // null-terminating the list +}; + + diff --git a/js/src/irregexp/RegExpCharRanges.h b/js/src/irregexp/RegExpCharRanges.h new file mode 100644 index 0000000000..16a1c00b06 --- /dev/null +++ b/js/src/irregexp/RegExpCharRanges.h @@ -0,0 +1,235 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ + +// Copyright 2012 the V8 project authors. All rights reserved. +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef V8_JSREGEXPCHARRANGES_H_ +#define V8_JSREGEXPCHARRANGES_H_ + +#include <string> + +#include "irregexp/RegExpCharacters.h" +#include "irregexp/InfallibleVector.h" + +namespace js { + +namespace irregexp { + +// Characters parsed by RegExpParser can be either char16_t or kEndMarker. +typedef uint32_t widechar; + +static const int kMaxOneByteCharCode = 0xff; +static const int kMaxUtf16CodeUnit = 0xffff; +static const size_t kEcma262UnCanonicalizeMaxWidth = 4; +static const char16_t kNoCharClass = 0; + +extern const widechar kEmojiFlagSequences[]; +extern const widechar kEmojiTagSequences[]; +extern const widechar kEmojiZWJSequences[]; + +static inline char16_t +MaximumCharacter(bool ascii) +{ + return ascii ? kMaxOneByteCharCode : kMaxUtf16CodeUnit; +} + + +// Returns the number of characters in the equivalence class, omitting those +// that cannot occur in the source string if it is a one byte string. +int +GetCaseIndependentLetters(char16_t character, + bool ascii_subject, + bool unicode, + const char16_t* choices, + size_t choices_length, + char16_t* letters); + +int +GetCaseIndependentLetters(char16_t character, + bool ascii_subject, + bool unicode, + char16_t* letters); + +class CharacterRange; +class WideCharRange; +typedef InfallibleVector<CharacterRange, 1> CharacterRangeVector; +typedef InfallibleVector<WideCharRange, 1> WideCharRangeVector; + +// Represents code units in the range from from_ to to_, both ends are +// inclusive. +class CharacterRange +{ + public: + // static methods for dealing with CharacterRangeVectors + + static void AddClass(const int* elmv, int elmc, CharacterRangeVector* ranges); + static void AddClassNegated(const int* elmv, int elmc, CharacterRangeVector* ranges); + static void AddClassEscape(LifoAlloc* alloc, char16_t type, CharacterRangeVector* ranges); + static void AddClassEscapeUnicode(LifoAlloc* alloc, char16_t type, + CharacterRangeVector* ranges, bool ignoreCase); + + // Adds a character or pre-defined character class to character ranges. + // If char_class is not kNoCharClass, it's interpreted as a class + // escape (i.e., 's' means whitespace, from '\s'). + static void AddCharOrEscape(LifoAlloc* alloc, CharacterRangeVector* ranges, + char16_t char_class, widechar c); + static void AddCharOrEscapeUnicode(LifoAlloc* alloc, + CharacterRangeVector* ranges, + CharacterRangeVector* lead_ranges, + CharacterRangeVector* trail_ranges, + WideCharRangeVector* wide_ranges, + char16_t char_class, + widechar c, + bool ignore_case); + // Simplified version of AddUnicodeRange for single characters + static void AddCharUnicode(LifoAlloc* alloc, + CharacterRangeVector* ranges, + CharacterRangeVector* lead_ranges, + CharacterRangeVector* trail_ranges, + WideCharRangeVector* wide_ranges, + widechar c); + static void AddUnicodeRange(LifoAlloc* alloc, + CharacterRangeVector* ranges, + CharacterRangeVector* lead_ranges, + CharacterRangeVector* trail_ranges, + WideCharRangeVector* wide_ranges, + widechar first, + widechar next); + + static bool RangesContainLatin1Equivalents(const CharacterRangeVector& ranges, bool unicode); + static bool CompareRanges(const CharacterRangeVector& ranges, const int* special_class, size_t length); + static bool CompareInverseRanges(const CharacterRangeVector& ranges, const int* special_class, size_t length); + + // Negate a vector of ranges by subtracting its ranges from a range + // encompassing the full range of possible values. + template <typename RangeType> + static void NegateUnicodeRanges(LifoAlloc* alloc, InfallibleVector<RangeType, 1>** ranges, + RangeType full_range); + + // static methods for Unicode Property Escapes + static bool AddPropertyClassRange(LifoAlloc* alloc, + const std::string& name, const std::string& value, + bool negate, bool ignore_case, + CharacterRangeVector* ranges, + CharacterRangeVector* lead_ranges, + CharacterRangeVector* trail_ranges, + WideCharRangeVector* wide_ranges); + + // static methods for dealing with canonical CharacterRangeVectors + + // Whether a range list is in canonical form: Ranges ordered by from value, + // and ranges non-overlapping and non-adjacent. + static bool IsCanonical(const CharacterRangeVector& ranges); + + // Convert range list to canonical form. The characters covered by the ranges + // will still be the same, but no character is in more than one range, and + // adjacent ranges are merged. The resulting list may be shorter than the + // original, but cannot be longer. + static void Canonicalize(CharacterRangeVector& ranges); + + static int InsertRangeInCanonicalList(CharacterRangeVector& list, int count, CharacterRange insert); + + // Negate the contents of a character range in canonical form. + static void Negate(const LifoAlloc* alloc, + CharacterRangeVector src, + CharacterRangeVector* dst); + public: + CharacterRange() + : from_(0), to_(0) + {} + + CharacterRange(char16_t from, char16_t to) + : from_(from), to_(to) + {} + + static inline CharacterRange Singleton(char16_t value) { + return CharacterRange(value, value); + } + static inline CharacterRange Range(char16_t from, char16_t to) { + MOZ_ASSERT(from <= to); + return CharacterRange(from, to); + } + static inline CharacterRange Everything() { + return CharacterRange(0, kMaxUtf16CodeUnit); + } + static inline CharacterRange LeadSurrogate() { + return CharacterRange(unicode::LeadSurrogateMin, unicode::LeadSurrogateMax); + } + static inline CharacterRange TrailSurrogate() { + return CharacterRange(unicode::TrailSurrogateMin, unicode::TrailSurrogateMax); + } + bool Contains(char16_t i) { return from_ <= i && i <= to_; } + char16_t from() const { return from_; } + void set_from(char16_t value) { from_ = value; } + char16_t to() const { return to_; } + void set_to(char16_t value) { to_ = value; } + bool is_valid() { return from_ <= to_; } + bool IsEverything(char16_t max) { return from_ == 0 && to_ >= max; } + bool IsSingleton() { return (from_ == to_); } + + void AddCaseEquivalents(bool is_ascii, bool unicode, CharacterRangeVector* ranges); + private: + char16_t from_; + char16_t to_; +}; + + +class WideCharRange +{ + public: + WideCharRange() + : from_(0), to_(0) + {} + + WideCharRange(widechar from, widechar to) + : from_(from), to_(to) + {} + + static inline WideCharRange Singleton(widechar value) { + return WideCharRange(value, value); + } + static inline WideCharRange Range(widechar from, widechar to) { + MOZ_ASSERT(from <= to); + return WideCharRange(from, to); + } + static inline WideCharRange NonBMP() { + return WideCharRange(unicode::NonBMPMin, unicode::NonBMPMax); + } + + bool Contains(widechar i) const { return from_ <= i && i <= to_; } + widechar from() const { return from_; } + widechar to() const { return to_; } + + private: + widechar from_; + widechar to_; +}; + + +} } // namespace js::irregexp + +#endif // V8_JSREGEXPCHARRANGES_H_ diff --git a/js/src/irregexp/RegExpEngine.cpp b/js/src/irregexp/RegExpEngine.cpp index 07679e21b8..f3db7c1847 100644 --- a/js/src/irregexp/RegExpEngine.cpp +++ b/js/src/irregexp/RegExpEngine.cpp @@ -30,18 +30,14 @@ #include "irregexp/RegExpEngine.h" #include "irregexp/NativeRegExpMacroAssembler.h" -#include "irregexp/RegExpCharacters.h" +#include "irregexp/RegExpCharacters.h" #include "irregexp/RegExpMacroAssembler.h" #include "jit/ExecutableAllocator.h" #include "jit/JitCommon.h" -// Generated table -#include "irregexp/RegExpCharacters-inl.h" - using namespace js; using namespace js::irregexp; -using mozilla::ArrayLength; using mozilla::DebugOnly; using mozilla::Maybe; @@ -64,317 +60,6 @@ RegExpNode::RegExpNode(LifoAlloc* alloc) bm_info_[0] = bm_info_[1] = nullptr; } -static const int kMaxOneByteCharCode = 0xff; -static const int kMaxUtf16CodeUnit = 0xffff; - -static char16_t -MaximumCharacter(bool ascii) -{ - return ascii ? kMaxOneByteCharCode : kMaxUtf16CodeUnit; -} - -static void -AddClass(const int* elmv, int elmc, - CharacterRangeVector* ranges) -{ - elmc--; - MOZ_ASSERT(elmv[elmc] == 0x10000); - for (int i = 0; i < elmc; i += 2) { - MOZ_ASSERT(elmv[i] < elmv[i + 1]); - ranges->append(CharacterRange(elmv[i], elmv[i + 1] - 1)); - } -} - -static void -AddClassNegated(const int* elmv, - int elmc, - CharacterRangeVector* ranges) -{ - elmc--; - MOZ_ASSERT(elmv[elmc] == 0x10000); - MOZ_ASSERT(elmv[0] != 0x0000); - MOZ_ASSERT(elmv[elmc-1] != kMaxUtf16CodeUnit); - char16_t last = 0x0000; - for (int i = 0; i < elmc; i += 2) { - MOZ_ASSERT(last <= elmv[i] - 1); - MOZ_ASSERT(elmv[i] < elmv[i + 1]); - ranges->append(CharacterRange(last, elmv[i] - 1)); - last = elmv[i + 1]; - } - ranges->append(CharacterRange(last, kMaxUtf16CodeUnit)); -} - -void -CharacterRange::AddClassEscape(LifoAlloc* alloc, char16_t type, - CharacterRangeVector* ranges) -{ - switch (type) { - case 's': - AddClass(kSpaceRanges, kSpaceRangeCount, ranges); - break; - case 'S': - AddClassNegated(kSpaceRanges, kSpaceRangeCount, ranges); - break; - case 'w': - AddClass(kWordRanges, kWordRangeCount, ranges); - break; - case 'W': - AddClassNegated(kWordRanges, kWordRangeCount, ranges); - break; - case 'd': - AddClass(kDigitRanges, kDigitRangeCount, ranges); - break; - case 'D': - AddClassNegated(kDigitRanges, kDigitRangeCount, ranges); - break; - case '.': - AddClassNegated(kLineTerminatorRanges, kLineTerminatorRangeCount, ranges); - break; - // This is not a character range as defined by the spec but a - // convenient shorthand for a character class that matches any - // character. - case '*': - ranges->append(CharacterRange::Everything()); - break; - // This is the set of characters matched by the $ and ^ symbols - // in multiline mode. - case 'n': - AddClass(kLineTerminatorRanges, kLineTerminatorRangeCount, ranges); - break; - default: - MOZ_CRASH("Bad character class escape"); - } -} - -// Add class escape, excluding surrogate pair range. -void -CharacterRange::AddClassEscapeUnicode(LifoAlloc* alloc, char16_t type, - CharacterRangeVector* ranges, bool ignore_case) -{ - switch (type) { - case 's': - case 'd': - return AddClassEscape(alloc, type, ranges); - break; - case 'S': - AddClassNegated(kSpaceAndSurrogateRanges, kSpaceAndSurrogateRangeCount, ranges); - break; - case 'w': - if (ignore_case) - AddClass(kIgnoreCaseWordRanges, kIgnoreCaseWordRangeCount, ranges); - else - AddClassEscape(alloc, type, ranges); - break; - case 'W': - if (ignore_case) { - AddClass(kNegatedIgnoreCaseWordAndSurrogateRanges, - kNegatedIgnoreCaseWordAndSurrogateRangeCount, ranges); - } else { - AddClassNegated(kWordAndSurrogateRanges, kWordAndSurrogateRangeCount, ranges); - } - break; - case 'D': - AddClassNegated(kDigitAndSurrogateRanges, kDigitAndSurrogateRangeCount, ranges); - break; - default: - MOZ_CRASH("Bad type!"); - } -} - -static bool -RangesContainLatin1Equivalents(const CharacterRangeVector& ranges, bool unicode) -{ - for (size_t i = 0; i < ranges.length(); i++) { - // TODO(dcarney): this could be a lot more efficient. - if (RangeContainsLatin1Equivalents(ranges[i], unicode)) - return true; - } - return false; -} - -static const size_t kEcma262UnCanonicalizeMaxWidth = 4; - -// Returns the number of characters in the equivalence class, omitting those -// that cannot occur in the source string if it is a one byte string. -static int -GetCaseIndependentLetters(char16_t character, - bool ascii_subject, - bool unicode, - const char16_t* choices, - size_t choices_length, - char16_t* letters) -{ - size_t count = 0; - for (size_t i = 0; i < choices_length; i++) { - char16_t c = choices[i]; - - // Skip characters that can't appear in one byte strings. - if (!unicode && ascii_subject && c > kMaxOneByteCharCode) - continue; - - // Watch for duplicates. - bool found = false; - for (size_t j = 0; j < count; j++) { - if (letters[j] == c) { - found = true; - break; - } - } - if (found) - continue; - - letters[count++] = c; - } - - return count; -} - -static int -GetCaseIndependentLetters(char16_t character, - bool ascii_subject, - bool unicode, - char16_t* letters) -{ - if (unicode) { - const char16_t choices[] = { - character, - unicode::FoldCase(character), - unicode::ReverseFoldCase1(character), - unicode::ReverseFoldCase2(character), - unicode::ReverseFoldCase3(character), - }; - return GetCaseIndependentLetters(character, ascii_subject, unicode, - choices, ArrayLength(choices), letters); - } - - char16_t upper = unicode::ToUpperCase(character); - unicode::CodepointsWithSameUpperCase others(character); - char16_t other1 = others.other1(); - char16_t other2 = others.other2(); - char16_t other3 = others.other3(); - - // ES 2017 draft 996af87b7072b3c3dd2b1def856c66f456102215 21.2.4.2 - // step 3.g. - // The standard requires that non-ASCII characters cannot have ASCII - // character codes in their equivalence class, even though this - // situation occurs multiple times in the Unicode tables. - static const unsigned kMaxAsciiCharCode = 127; - if (upper <= kMaxAsciiCharCode) { - if (character > kMaxAsciiCharCode) { - // If Canonicalize(character) == character, all other characters - // should be ignored. - return GetCaseIndependentLetters(character, ascii_subject, unicode, - &character, 1, letters); - } - - if (other1 > kMaxAsciiCharCode) - other1 = character; - if (other2 > kMaxAsciiCharCode) - other2 = character; - if (other3 > kMaxAsciiCharCode) - other3 = character; - } - - const char16_t choices[] = { - character, - upper, - other1, - other2, - other3 - }; - return GetCaseIndependentLetters(character, ascii_subject, unicode, - choices, ArrayLength(choices), letters); -} - -void -CharacterRange::AddCaseEquivalents(bool is_ascii, bool unicode, CharacterRangeVector* ranges) -{ - char16_t bottom = from(); - char16_t top = to(); - - if (is_ascii && !RangeContainsLatin1Equivalents(*this, unicode)) { - if (bottom > kMaxOneByteCharCode) - return; - if (top > kMaxOneByteCharCode) - top = kMaxOneByteCharCode; - } - - for (char16_t c = bottom;; c++) { - char16_t chars[kEcma262UnCanonicalizeMaxWidth]; - size_t length = GetCaseIndependentLetters(c, is_ascii, unicode, chars); - - for (size_t i = 0; i < length; i++) { - char16_t other = chars[i]; - if (other == c) - continue; - - // Try to combine with an existing range. - bool found = false; - for (size_t i = 0; i < ranges->length(); i++) { - CharacterRange& range = (*ranges)[i]; - if (range.Contains(other)) { - found = true; - break; - } else if (other == range.from() - 1) { - range.set_from(other); - found = true; - break; - } else if (other == range.to() + 1) { - range.set_to(other); - found = true; - break; - } - } - - if (!found) - ranges->append(CharacterRange::Singleton(other)); - } - - if (c == top) - break; - } -} - -static bool -CompareInverseRanges(const CharacterRangeVector& ranges, const int* special_class, size_t length) -{ - length--; // Remove final 0x10000. - MOZ_ASSERT(special_class[length] == 0x10000); - MOZ_ASSERT(ranges.length() != 0); - MOZ_ASSERT(length != 0); - MOZ_ASSERT(special_class[0] != 0); - if (ranges.length() != (length >> 1) + 1) - return false; - CharacterRange range = ranges[0]; - if (range.from() != 0) - return false; - for (size_t i = 0; i < length; i += 2) { - if (special_class[i] != (range.to() + 1)) - return false; - range = ranges[(i >> 1) + 1]; - if (special_class[i+1] != range.from()) - return false; - } - if (range.to() != 0xffff) - return false; - return true; -} - -static bool -CompareRanges(const CharacterRangeVector& ranges, const int* special_class, size_t length) -{ - length--; // Remove final 0x10000. - MOZ_ASSERT(special_class[length] == 0x10000); - if (ranges.length() * 2 != length) - return false; - for (size_t i = 0; i < length; i += 2) { - CharacterRange range = ranges[i >> 1]; - if (range.from() != special_class[i] || range.to() != special_class[i + 1] - 1) - return false; - } - return true; -} - bool RegExpCharacterClass::is_standard(LifoAlloc* alloc) { @@ -384,168 +69,37 @@ RegExpCharacterClass::is_standard(LifoAlloc* alloc) return false; if (set_.is_standard()) return true; - if (CompareRanges(set_.ranges(alloc), kSpaceRanges, kSpaceRangeCount)) { + if (CharacterRange::CompareRanges(set_.ranges(alloc), kSpaceRanges, kSpaceRangeCount)) { set_.set_standard_set_type('s'); return true; } - if (CompareInverseRanges(set_.ranges(alloc), kSpaceRanges, kSpaceRangeCount)) { + if (CharacterRange::CompareInverseRanges(set_.ranges(alloc), kSpaceRanges, kSpaceRangeCount)) { set_.set_standard_set_type('S'); return true; } - if (CompareInverseRanges(set_.ranges(alloc), + if (CharacterRange::CompareInverseRanges(set_.ranges(alloc), kLineTerminatorRanges, kLineTerminatorRangeCount)) { set_.set_standard_set_type('.'); return true; } - if (CompareRanges(set_.ranges(alloc), + if (CharacterRange::CompareRanges(set_.ranges(alloc), kLineTerminatorRanges, kLineTerminatorRangeCount)) { set_.set_standard_set_type('n'); return true; } - if (CompareRanges(set_.ranges(alloc), kWordRanges, kWordRangeCount)) { + if (CharacterRange::CompareRanges(set_.ranges(alloc), kWordRanges, kWordRangeCount)) { set_.set_standard_set_type('w'); return true; } - if (CompareInverseRanges(set_.ranges(alloc), kWordRanges, kWordRangeCount)) { + if (CharacterRange::CompareInverseRanges(set_.ranges(alloc), kWordRanges, kWordRangeCount)) { set_.set_standard_set_type('W'); return true; } return false; } -bool -CharacterRange::IsCanonical(const CharacterRangeVector& ranges) -{ - int n = ranges.length(); - if (n <= 1) - return true; - - int max = ranges[0].to(); - for (int i = 1; i < n; i++) { - CharacterRange next_range = ranges[i]; - if (next_range.from() <= max + 1) - return false; - max = next_range.to(); - } - return true; -} - -// Move a number of elements in a zonelist to another position -// in the same list. Handles overlapping source and target areas. -static -void MoveRanges(CharacterRangeVector& list, int from, int to, int count) -{ - // Ranges are potentially overlapping. - if (from < to) { - for (int i = count - 1; i >= 0; i--) - list[to + i] = list[from + i]; - } else { - for (int i = 0; i < count; i++) - list[to + i] = list[from + i]; - } -} - -static int -InsertRangeInCanonicalList(CharacterRangeVector& list, - int count, - CharacterRange insert) -{ - // Inserts a range into list[0..count[, which must be sorted - // by from value and non-overlapping and non-adjacent, using at most - // list[0..count] for the result. Returns the number of resulting - // canonicalized ranges. Inserting a range may collapse existing ranges into - // fewer ranges, so the return value can be anything in the range 1..count+1. - char16_t from = insert.from(); - char16_t to = insert.to(); - int start_pos = 0; - int end_pos = count; - for (int i = count - 1; i >= 0; i--) { - CharacterRange current = list[i]; - if (current.from() > to + 1) { - end_pos = i; - } else if (current.to() + 1 < from) { - start_pos = i + 1; - break; - } - } - - // Inserted range overlaps, or is adjacent to, ranges at positions - // [start_pos..end_pos[. Ranges before start_pos or at or after end_pos are - // not affected by the insertion. - // If start_pos == end_pos, the range must be inserted before start_pos. - // if start_pos < end_pos, the entire range from start_pos to end_pos - // must be merged with the insert range. - - if (start_pos == end_pos) { - // Insert between existing ranges at position start_pos. - if (start_pos < count) { - MoveRanges(list, start_pos, start_pos + 1, count - start_pos); - } - list[start_pos] = insert; - return count + 1; - } - if (start_pos + 1 == end_pos) { - // Replace single existing range at position start_pos. - CharacterRange to_replace = list[start_pos]; - int new_from = Min(to_replace.from(), from); - int new_to = Max(to_replace.to(), to); - list[start_pos] = CharacterRange(new_from, new_to); - return count; - } - // Replace a number of existing ranges from start_pos to end_pos - 1. - // Move the remaining ranges down. - - int new_from = Min(list[start_pos].from(), from); - int new_to = Max(list[end_pos - 1].to(), to); - if (end_pos < count) { - MoveRanges(list, end_pos, start_pos + 1, count - end_pos); - } - list[start_pos] = CharacterRange(new_from, new_to); - return count - (end_pos - start_pos) + 1; -} - -void -CharacterRange::Canonicalize(CharacterRangeVector& character_ranges) -{ - if (character_ranges.length() <= 1) return; - // Check whether ranges are already canonical (increasing, non-overlapping, - // non-adjacent). - int n = character_ranges.length(); - int max = character_ranges[0].to(); - int i = 1; - while (i < n) { - CharacterRange current = character_ranges[i]; - if (current.from() <= max + 1) { - break; - } - max = current.to(); - i++; - } - // Canonical until the i'th range. If that's all of them, we are done. - if (i == n) return; - - // The ranges at index i and forward are not canonicalized. Make them so by - // doing the equivalent of insertion sort (inserting each into the previous - // list, in order). - // Notice that inserting a range can reduce the number of ranges in the - // result due to combining of adjacent and overlapping ranges. - int read = i; // Range to insert. - size_t num_canonical = i; // Length of canonicalized part of list. - do { - num_canonical = InsertRangeInCanonicalList(character_ranges, - num_canonical, - character_ranges[read]); - read++; - } while (read < n); - - while (character_ranges.length() > num_canonical) - character_ranges.popBack(); - - MOZ_ASSERT(CharacterRange::IsCanonical(character_ranges)); -} - // ------------------------------------------------------------------- // SeqRegExpNode @@ -720,6 +274,8 @@ ActionNode::EmptyMatchCheck(int start_register, int TextNode::EatsAtLeast(int still_to_find, int budget, bool not_at_start) { + if (read_backward()) + return 0; int answer = Length(); if (answer >= still_to_find) return answer; @@ -735,8 +291,7 @@ TextNode::EatsAtLeast(int still_to_find, int budget, bool not_at_start) int TextNode::GreedyLoopTextLength() { - TextElement elm = elements()[elements().length() - 1]; - return elm.cp_offset() + elm.length(); + return Length(); } RegExpNode* @@ -789,7 +344,7 @@ TextNode::FilterASCII(int depth, bool ignore_case, bool unicode) ranges[0].to() >= kMaxOneByteCharCode) { // This will be handled in a later filter. - if (ignore_case && RangesContainLatin1Equivalents(ranges, unicode)) + if (ignore_case && CharacterRange::RangesContainLatin1Equivalents(ranges, unicode)) continue; return set_replacement(nullptr); } @@ -798,7 +353,7 @@ TextNode::FilterASCII(int depth, bool ignore_case, bool unicode) ranges[0].from() > kMaxOneByteCharCode) { // This will be handled in a later filter. - if (ignore_case && RangesContainLatin1Equivalents(ranges, unicode)) + if (ignore_case && CharacterRange::RangesContainLatin1Equivalents(ranges, unicode)) continue; return set_replacement(nullptr); } @@ -886,6 +441,8 @@ AssertionNode::FillInBMInfo(int offset, int budget, BoyerMooreLookahead* bm, boo int BackReferenceNode::EatsAtLeast(int still_to_find, int budget, bool not_at_start) { + if (read_backward()) + return 0; if (budget <= 0) return 0; return on_success()->EatsAtLeast(still_to_find, budget - 1, not_at_start); @@ -1577,6 +1134,9 @@ class irregexp::RegExpCompiler current_expansion_factor_ = value; } + bool read_backward() { return read_backward_; } + void set_read_backward(bool value) { read_backward_ = value; } + JSContext* cx() const { return cx_; } LifoAlloc* alloc() const { return alloc_; } @@ -1594,6 +1154,7 @@ class irregexp::RegExpCompiler bool unicode_; bool reg_exp_too_big_; int current_expansion_factor_; + bool read_backward_; FrequencyCollator frequency_collator_; JSContext* cx_; LifoAlloc* alloc_; @@ -1623,6 +1184,7 @@ RegExpCompiler::RegExpCompiler(JSContext* cx, LifoAlloc* alloc, int capture_coun unicode_(unicode), reg_exp_too_big_(false), current_expansion_factor_(1), + read_backward_(false), frequency_collator_(), cx_(cx), alloc_(alloc) @@ -1746,7 +1308,7 @@ irregexp::CompilePattern(JSContext* cx, RegExpShared* shared, RegExpCompileData* // at the start of input. ChoiceNode* first_step_node = alloc.newInfallible<ChoiceNode>(&alloc, 2); RegExpNode* char_class = - alloc.newInfallible<TextNode>(alloc.newInfallible<RegExpCharacterClass>('*'), loop_node); + alloc.newInfallible<TextNode>(alloc.newInfallible<RegExpCharacterClass>('*'), false, loop_node); first_step_node->AddAlternative(GuardedAlternative(captured_body)); first_step_node->AddAlternative(GuardedAlternative(char_class)); node = first_step_node; @@ -1849,19 +1411,19 @@ RegExpAtom::ToNode(RegExpCompiler* compiler, RegExpNode* on_success) TextElementVector* elms = compiler->alloc()->newInfallible<TextElementVector>(*compiler->alloc()); elms->append(TextElement::Atom(this)); - return compiler->alloc()->newInfallible<TextNode>(elms, on_success); + return compiler->alloc()->newInfallible<TextNode>(elms, compiler->read_backward(), on_success); } RegExpNode* RegExpText::ToNode(RegExpCompiler* compiler, RegExpNode* on_success) { - return compiler->alloc()->newInfallible<TextNode>(&elements_, on_success); + return compiler->alloc()->newInfallible<TextNode>(&elements_, compiler->read_backward(), on_success); } RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler, RegExpNode* on_success) { - return compiler->alloc()->newInfallible<TextNode>(this, on_success); + return compiler->alloc()->newInfallible<TextNode>(this, compiler->read_backward(), on_success); } RegExpNode* @@ -2002,7 +1564,9 @@ RegExpQuantifier::ToNode(int min, alternation->AddAlternative(GuardedAlternative(body->ToNode(compiler, answer))); } answer = alternation; - if (not_at_start) alternation->set_not_at_start(); + if (not_at_start && !compiler->read_backward()) { + alternation->set_not_at_start(); + } } return answer; } @@ -2014,8 +1578,9 @@ RegExpQuantifier::ToNode(int min, int reg_ctr = needs_counter ? compiler->AllocateRegister() : RegExpCompiler::kNoRegister; - LoopChoiceNode* center = alloc->newInfallible<LoopChoiceNode>(alloc, body->min_match() == 0); - if (not_at_start) + LoopChoiceNode* center = alloc->newInfallible<LoopChoiceNode>(alloc, body->min_match() == 0, + compiler->read_backward()); + if (not_at_start && !compiler->read_backward()) center->set_not_at_start(); RegExpNode* loop_return = needs_counter ? static_cast<RegExpNode*>(ActionNode::IncrementRegister(reg_ctr, center)) @@ -2091,7 +1656,7 @@ RegExpAssertion::ToNode(RegExpCompiler* compiler, CharacterRange::AddClassEscape(alloc, 'n', newline_ranges); RegExpCharacterClass* newline_atom = alloc->newInfallible<RegExpCharacterClass>('n'); TextNode* newline_matcher = - alloc->newInfallible<TextNode>(newline_atom, + alloc->newInfallible<TextNode>(newline_atom, false, ActionNode::PositiveSubmatchSuccess(stack_pointer_register, position_register, 0, // No captures inside. @@ -2123,6 +1688,7 @@ RegExpBackReference::ToNode(RegExpCompiler* compiler, RegExpNode* on_success) { return compiler->alloc()->newInfallible<BackReferenceNode>(RegExpCapture::StartRegister(index()), RegExpCapture::EndRegister(index()), + compiler->read_backward(), on_success); } @@ -2133,7 +1699,7 @@ RegExpEmpty::ToNode(RegExpCompiler* compiler, RegExpNode* on_success) } RegExpNode* -RegExpLookahead::ToNode(RegExpCompiler* compiler, RegExpNode* on_success) +RegExpLookaround::ToNode(RegExpCompiler* compiler, RegExpNode* on_success) { int stack_pointer_register = compiler->AllocateRegister(); int position_register = compiler->AllocateRegister(); @@ -2144,6 +1710,10 @@ RegExpLookahead::ToNode(RegExpCompiler* compiler, RegExpNode* on_success) int register_start = register_of_first_capture + capture_from_ * registers_per_capture; + RegExpNode* result; + bool was_reading_backward = compiler->read_backward(); + compiler->set_read_backward(type() == LOOKBEHIND); + if (is_positive()) { RegExpNode* bodyNode = body()->ToNode(compiler, @@ -2152,37 +1722,39 @@ RegExpLookahead::ToNode(RegExpCompiler* compiler, RegExpNode* on_success) register_count, register_start, on_success)); - return ActionNode::BeginSubmatch(stack_pointer_register, + result = ActionNode::BeginSubmatch(stack_pointer_register, + position_register, + bodyNode); + } else { + // We use a ChoiceNode for a negative lookahead because it has most of + // the characteristics we need. It has the body of the lookahead as its + // first alternative and the expression after the lookahead of the second + // alternative. If the first alternative succeeds then the + // NegativeSubmatchSuccess will unwind the stack including everything the + // choice node set up and backtrack. If the first alternative fails then + // the second alternative is tried, which is exactly the desired result + // for a negative lookahead. The NegativeLookaheadChoiceNode is a special + // ChoiceNode that knows to ignore the first exit when calculating quick + // checks. + LifoAlloc* alloc = compiler->alloc(); + + RegExpNode* success = + alloc->newInfallible<NegativeSubmatchSuccess>(alloc, + stack_pointer_register, + position_register, + register_count, + register_start); + GuardedAlternative body_alt(body()->ToNode(compiler, success)); + + ChoiceNode* choice_node = + alloc->newInfallible<NegativeLookaheadChoiceNode>(alloc, body_alt, GuardedAlternative(on_success)); + + result = ActionNode::BeginSubmatch(stack_pointer_register, position_register, - bodyNode); - } - - // We use a ChoiceNode for a negative lookahead because it has most of - // the characteristics we need. It has the body of the lookahead as its - // first alternative and the expression after the lookahead of the second - // alternative. If the first alternative succeeds then the - // NegativeSubmatchSuccess will unwind the stack including everything the - // choice node set up and backtrack. If the first alternative fails then - // the second alternative is tried, which is exactly the desired result - // for a negative lookahead. The NegativeLookaheadChoiceNode is a special - // ChoiceNode that knows to ignore the first exit when calculating quick - // checks. - LifoAlloc* alloc = compiler->alloc(); - - RegExpNode* success = - alloc->newInfallible<NegativeSubmatchSuccess>(alloc, - stack_pointer_register, - position_register, - register_count, - register_start); - GuardedAlternative body_alt(body()->ToNode(compiler, success)); - - ChoiceNode* choice_node = - alloc->newInfallible<NegativeLookaheadChoiceNode>(alloc, body_alt, GuardedAlternative(on_success)); - - return ActionNode::BeginSubmatch(stack_pointer_register, - position_register, - choice_node); + choice_node); + } + compiler->set_read_backward(was_reading_backward); + return result; } RegExpNode* @@ -2197,8 +1769,12 @@ RegExpCapture::ToNode(RegExpTree* body, RegExpCompiler* compiler, RegExpNode* on_success) { + MOZ_ASSERT(body); int start_reg = RegExpCapture::StartRegister(index); int end_reg = RegExpCapture::EndRegister(index); + if (compiler->read_backward()) { + std::swap(start_reg, end_reg); + } RegExpNode* store_end = ActionNode::StorePosition(end_reg, true, on_success); RegExpNode* body_node = body->ToNode(compiler, store_end); return ActionNode::StorePosition(start_reg, true, body_node); @@ -2209,8 +1785,15 @@ RegExpAlternative::ToNode(RegExpCompiler* compiler, RegExpNode* on_success) { const RegExpTreeVector& children = nodes(); RegExpNode* current = on_success; - for (int i = children.length() - 1; i >= 0; i--) - current = children[i]->ToNode(compiler, current); + if (compiler->read_backward()) { + for (int i = 0; i < children.length(); i++) { + current = children[i]->ToNode(compiler, current); + } + } else { + for (int i = children.length() - 1; i >= 0; i--) { + current = children[i]->ToNode(compiler, current); + } + } return current; } @@ -2574,6 +2157,7 @@ Trace::PerformDeferredActions(LifoAlloc* alloc, { // The "+1" is to avoid a push_limit of zero if stack_limit_slack() is 1. const int push_limit = (assembler->stack_limit_slack() + 1) / 2; + static const int kNoStore = INT32_MIN; // Count pushes performed to force a stack limit check occasionally. int pushes = 0; @@ -2590,7 +2174,7 @@ Trace::PerformDeferredActions(LifoAlloc* alloc, int value = 0; bool absolute = false; bool clear = false; - int store_position = -1; + int store_position = kNoStore; // This is a little tricky because we are scanning the actions in reverse // historical order (newest first). for (DeferredAction* action = actions_; @@ -2611,7 +2195,7 @@ Trace::PerformDeferredActions(LifoAlloc* alloc, // we can set undo_action to IGNORE if we know there is no value to // restore. undo_action = DEFER_RESTORE; - MOZ_ASSERT(store_position == -1); + MOZ_ASSERT(store_position == kNoStore); MOZ_ASSERT(!clear); break; } @@ -2619,14 +2203,14 @@ Trace::PerformDeferredActions(LifoAlloc* alloc, if (!absolute) { value++; } - MOZ_ASSERT(store_position == -1); + MOZ_ASSERT(store_position == kNoStore); MOZ_ASSERT(!clear); undo_action = DEFER_RESTORE; break; case ActionNode::STORE_POSITION: { Trace::DeferredCapture* pc = static_cast<Trace::DeferredCapture*>(action); - if (!clear && store_position == -1) { + if (!clear && store_position == kNoStore) { store_position = pc->cp_offset(); } @@ -2650,7 +2234,7 @@ Trace::PerformDeferredActions(LifoAlloc* alloc, // Since we're scanning in reverse order, if we've already // set the position we have to ignore historically earlier // clearing operations. - if (store_position == -1) { + if (store_position == kNoStore) { clear = true; } undo_action = DEFER_RESTORE; @@ -2680,7 +2264,7 @@ Trace::PerformDeferredActions(LifoAlloc* alloc, } // Perform the chronologically last action (or accumulated increment) // for the register. - if (store_position != -1) { + if (store_position != kNoStore) { assembler->WriteCurrentPositionToRegister(reg, store_position); } else if (clear) { assembler->ClearRegisters(reg, reg); @@ -2763,7 +2347,6 @@ Trace::InvalidateCurrentCharacter() void Trace::AdvanceCurrentPositionInTrace(int by, RegExpCompiler* compiler) { - MOZ_ASSERT(by > 0); // We don't have an instruction for shifting the current character register // down or for using a shifted value for anything so lets just forget that // we preloaded any characters into it. @@ -2881,16 +2464,23 @@ EmitHat(RegExpCompiler* compiler, RegExpNode* on_success, Trace* trace) Trace new_trace(*trace); new_trace.InvalidateCurrentCharacter(); + // A positive (> 0) cp_offset means we've already successfully matched a + // non-empty-width part of the pattern, and thus cannot be at or before the + // start of the subject string. We can thus skip both at-start and + // bounds-checks when loading the one-character lookbehind. + const bool may_be_at_or_before_subject_string_start = new_trace.cp_offset() <= 0; + jit::Label ok; - if (new_trace.cp_offset() == 0) { - // The start of input counts as a newline in this context, so skip to - // ok if we are at the start. - assembler->CheckAtStart(&ok); + if (may_be_at_or_before_subject_string_start) { + // The start of input counts as a newline in this context, so skip to ok if + // we are at the start. + assembler->CheckAtStart(new_trace.cp_offset(), &ok); } - // We already checked that we are not at the start of input so it must be - // OK to load the previous character. - assembler->LoadCurrentCharacter(new_trace.cp_offset() -1, new_trace.backtrack(), false); + // If we've already checked that we are not at the start of input, it's okay + // to load the previous character without bounds checks. + const bool can_skip_bounds_check = !may_be_at_or_before_subject_string_start; + assembler->LoadCurrentCharacter(new_trace.cp_offset() -1, new_trace.backtrack(), can_skip_bounds_check); if (!assembler->CheckSpecialCharacterClass('n', new_trace.backtrack())) { // Newline means \n, \r, 0x2028 or 0x2029. @@ -2915,11 +2505,10 @@ EmitNotAfterLeadSurrogate(RegExpCompiler* compiler, RegExpNode* on_success, Trac new_trace.InvalidateCurrentCharacter(); jit::Label ok; - if (new_trace.cp_offset() == 0) - assembler->CheckAtStart(&ok); + if (new_trace.cp_offset() <= 0) { + assembler->CheckAtStart(new_trace.cp_offset(), &ok); + } - // We already checked that we are not at the start of input so it must be - // OK to load the previous character. assembler->LoadCurrentCharacter(new_trace.cp_offset() - 1, new_trace.backtrack(), false); assembler->CheckCharacterInRange(unicode::LeadSurrogateMin, unicode::LeadSurrogateMax, new_trace.backtrack()); @@ -2943,8 +2532,9 @@ EmitNotInSurrogatePair(RegExpCompiler* compiler, RegExpNode* on_success, Trace* Trace new_trace(*trace); new_trace.InvalidateCurrentCharacter(); - if (new_trace.cp_offset() == 0) - assembler->CheckAtStart(&ok); + if (new_trace.cp_offset() <= 0) { + assembler->CheckAtStart(new_trace.cp_offset(), &ok); + } // First check if next character is a trail surrogate. assembler->LoadCurrentCharacter(new_trace.cp_offset(), new_trace.backtrack(), false); @@ -3062,10 +2652,10 @@ AssertionNode::BacktrackIfPrevious(RegExpCompiler* compiler, jit::Label* non_word = backtrack_if_previous == kIsNonWord ? new_trace.backtrack() : &fall_through; jit::Label* word = backtrack_if_previous == kIsNonWord ? &fall_through : new_trace.backtrack(); - if (new_trace.cp_offset() == 0) { + if (new_trace.cp_offset() <= 0) { // The start of input counts as a non-word character, so the question is // decided if we are at the start. - assembler->CheckAtStart(non_word); + assembler->CheckAtStart(new_trace.cp_offset(), non_word); } // We already checked that we are not at the start of input so it must be // OK to load the previous character. @@ -3108,9 +2698,9 @@ AssertionNode::Emit(RegExpCompiler* compiler, Trace* trace) return; } if (trace->at_start() == Trace::UNKNOWN) { - assembler->CheckNotAtStart(trace->backtrack()); + assembler->CheckNotAtStart(trace->cp_offset(), trace->backtrack()); Trace at_start_trace = *trace; - at_start_trace.set_at_start(true); + at_start_trace.set_at_start(Trace::TRUE_VALUE); on_success()->Emit(compiler, &at_start_trace); return; } @@ -3813,9 +3403,10 @@ TextNode::TextEmitPass(RegExpCompiler* compiler, jit::Label* backtrack = trace->backtrack(); QuickCheckDetails* quick_check = trace->quick_check_performed(); int element_count = elements().length(); + int backward_offset = read_backward() ? -Length() : 0; for (int i = preloaded ? 0 : element_count - 1; i >= 0; i--) { TextElement elm = elements()[i]; - int cp_offset = trace->cp_offset() + elm.cp_offset(); + int cp_offset = trace->cp_offset() + elm.cp_offset() + backward_offset; if (elm.text_type() == TextElement::ATOM) { const CharacterVector& quarks = elm.atom()->data(); for (int j = preloaded ? 0 : quarks.length() - 1; j >= 0; j--) { @@ -3843,11 +3434,12 @@ TextNode::TextEmitPass(RegExpCompiler* compiler, break; } if (emit_function != nullptr) { + bool bounds_check = *checked_up_to < cp_offset + j || read_backward(); bool bound_checked = emit_function(compiler, quarks[j], backtrack, cp_offset + j, - *checked_up_to < cp_offset + j, + bounds_check, preloaded); if (bound_checked) UpdateBoundsCheck(cp_offset + j, checked_up_to); } @@ -3858,13 +3450,14 @@ TextNode::TextEmitPass(RegExpCompiler* compiler, if (first_element_checked && i == 0) continue; if (DeterminedAlready(quick_check, elm.cp_offset())) continue; RegExpCharacterClass* cc = elm.char_class(); + bool bounds_check = *checked_up_to < cp_offset || read_backward(); EmitCharClass(alloc(), assembler, cc, ascii, backtrack, cp_offset, - *checked_up_to < cp_offset, + bounds_check, preloaded); UpdateBoundsCheck(cp_offset, checked_up_to); } @@ -3944,8 +3537,11 @@ TextNode::Emit(RegExpCompiler* compiler, Trace* trace) } Trace successor_trace(*trace); - successor_trace.set_at_start(false); - successor_trace.AdvanceCurrentPositionInTrace(Length(), compiler); + // If we advance backward, we may end up at the start. + successor_trace.AdvanceCurrentPositionInTrace( + read_backward() ? -Length() : Length(), compiler); + successor_trace.set_at_start(read_backward() ? Trace::UNKNOWN + : Trace::FALSE_VALUE); RecursionCheck rc(compiler); on_success()->Emit(compiler, &successor_trace); } @@ -4117,6 +3713,8 @@ ChoiceNode::CalculatePreloadCharacters(RegExpCompiler* compiler, int eats_at_lea RegExpNode* TextNode::GetSuccessorOfOmnivorousTextNode(RegExpCompiler* compiler) { + if (read_backward()) return nullptr; + if (elements().length() != 1) return nullptr; @@ -4164,7 +3762,7 @@ ChoiceNode::GreedyLoopTextLengthForAlternative(GuardedAlternative* alternative) SeqRegExpNode* seq_node = static_cast<SeqRegExpNode*>(node); node = seq_node->on_success(); } - return length; + return read_backward() ? -length : length; } // Creates a list of AlternativeGenerations. If the list has a reasonable @@ -4239,7 +3837,7 @@ ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) jit::Label greedy_loop_label; Trace counter_backtrack_trace; counter_backtrack_trace.set_backtrack(&greedy_loop_label); - if (not_at_start()) counter_backtrack_trace.set_at_start(false); + if (not_at_start()) counter_backtrack_trace.set_at_start(Trace::FALSE_VALUE); if (choice_count > 1 && text_length != kNodeIsTooComplexForGreedyLoops) { // Here we have special handling for greedy loops containing only text nodes @@ -4255,7 +3853,7 @@ ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) current_trace = &counter_backtrack_trace; jit::Label greedy_match_failed; Trace greedy_match_trace; - if (not_at_start()) greedy_match_trace.set_at_start(false); + if (not_at_start()) greedy_match_trace.set_at_start(Trace::FALSE_VALUE); greedy_match_trace.set_backtrack(&greedy_match_failed); jit::Label loop_label; macro_assembler->Bind(&loop_label); @@ -4325,6 +3923,8 @@ ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) // For now we just call all choices one after the other. The idea ultimately // is to use the Dispatch table to try only the relevant ones. for (size_t i = first_normal_choice; i < choice_count; i++) { + bool is_last = i == choice_count - 1; + bool fall_through_on_failure = !is_last; GuardedAlternative alternative = alternatives()[i]; AlternativeGeneration* alt_gen = alt_gens.at(i); alt_gen->quick_check_details.set_characters(preload_characters); @@ -4340,20 +3940,20 @@ ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) if (not_at_start_) new_trace.set_at_start(Trace::FALSE_VALUE); alt_gen->expects_preload = preload_is_current; bool generate_full_check_inline = false; - if (try_to_emit_quick_check_for_alternative(i) && + if (try_to_emit_quick_check_for_alternative(i == 0) && alternative.node()->EmitQuickCheck(compiler, &new_trace, preload_has_checked_bounds, &alt_gen->possible_success, &alt_gen->quick_check_details, - i < choice_count - 1)) { + fall_through_on_failure)) { // Quick check was generated for this choice. preload_is_current = true; preload_has_checked_bounds = true; // On the last choice in the ChoiceNode we generated the quick // check to fall through on possible success. So now we need to // generate the full check inline. - if (i == choice_count - 1) { + if (!fall_through_on_failure) { macro_assembler->Bind(&alt_gen->possible_success); new_trace.set_quick_check_performed(&alt_gen->quick_check_details); new_trace.set_characters_preloaded(preload_characters); @@ -4361,7 +3961,7 @@ ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) generate_full_check_inline = true; } } else if (alt_gen->quick_check_details.cannot_match()) { - if (i == choice_count - 1 && !greedy_loop) { + if (!fall_through_on_failure && !greedy_loop) { macro_assembler->JumpOrBacktrack(trace->backtrack()); } continue; @@ -4375,7 +3975,7 @@ ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) alt_gen->expects_preload = false; new_trace.InvalidateCurrentCharacter(); } - if (i < choice_count - 1) { + if (!is_last) { new_trace.set_backtrack(&alt_gen->after); } generate_full_check_inline = true; @@ -4413,12 +4013,14 @@ ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) if (new_trace.actions() != nullptr) { new_trace.set_flush_budget(new_flush_budget); } + bool next_expects_preload = + i == choice_count - 1 ? false : alt_gens.at(i + 1)->expects_preload; EmitOutOfLineContinuation(compiler, &new_trace, alternatives()[i], alt_gen, preload_characters, - alt_gens.at(i + 1)->expects_preload); + next_expects_preload); } } @@ -4604,11 +4206,14 @@ BackReferenceNode::Emit(RegExpCompiler* compiler, Trace* trace) MOZ_ASSERT(start_reg_ + 1 == end_reg_); if (compiler->ignore_case()) { assembler->CheckNotBackReferenceIgnoreCase(start_reg_, + read_backward(), trace->backtrack(), compiler->unicode()); } else { - assembler->CheckNotBackReference(start_reg_, trace->backtrack()); + assembler->CheckNotBackReference(start_reg_, read_backward(), trace->backtrack()); } + // We are going to advance backward, so we may end up at the start. + if (read_backward()) trace->set_at_start(Trace::UNKNOWN); on_success()->Emit(compiler, trace); } @@ -4820,6 +4425,9 @@ TextNode::GetQuickCheckDetails(QuickCheckDetails* details, int characters_filled_in, bool not_at_start) { + // Do not collect any quick check details if the text node reads backward, + // since it reads in the opposite direction than we use for quick checks. + if (read_backward()) return; MOZ_ASSERT(characters_filled_in < details->characters()); int characters = details->characters(); int char_mask = MaximumCharacter(compiler->ascii()); @@ -4976,8 +4584,7 @@ QuickCheckDetails::Clear() void QuickCheckDetails::Advance(int by, bool ascii) { - MOZ_ASSERT(by >= 0); - if (by >= characters_) { + if (by >= characters_ || by < 0) { Clear(); return; } diff --git a/js/src/irregexp/RegExpEngine.h b/js/src/irregexp/RegExpEngine.h index e2cabaa026..22e9d944a4 100644 --- a/js/src/irregexp/RegExpEngine.h +++ b/js/src/irregexp/RegExpEngine.h @@ -34,6 +34,9 @@ #include "ds/SplayTree.h" #include "jit/Label.h" + +#include "irregexp/InfallibleVector.h" +#include "irregexp/RegExpCharRanges.h" #include "vm/RegExpObject.h" namespace js { @@ -57,13 +60,28 @@ struct RegExpCompileData : tree(nullptr), simple(true), contains_anchor(false), - capture_count(0) + capture_count(0), + capture_name_list(nullptr), + capture_index_list(nullptr) {} + // The parsed AST as produced by the RegExpParser. RegExpTree* tree; + // True, iff the pattern is a 'simple' atom with zero captures. In other + // words, the pattern consists of a string with no metacharacters and special + // regexp features, and can be implemented as a standard string search. bool simple; + + // True, iff the pattern is anchored at the start of the string with '^'. bool contains_anchor; + + // The number of capture groups, without the global capture \0. int capture_count; + + // Only use if the pattern contains named captures. If so, this contains a + // mapping of capture names to capture indices, as Values. + CharacterVectorVector* capture_name_list; + IntegerVector* capture_index_list; }; struct RegExpCode @@ -118,7 +136,7 @@ InterpretCode(JSContext* cx, const uint8_t* byteCode, const CharT* chars, size_t VISIT(Atom) \ VISIT(Quantifier) \ VISIT(Capture) \ - VISIT(Lookahead) \ + VISIT(Lookaround) \ VISIT(BackReference) \ VISIT(Empty) \ VISIT(Text) @@ -127,108 +145,6 @@ InterpretCode(JSContext* cx, const uint8_t* byteCode, const CharT* chars, size_t FOR_EACH_REG_EXP_TREE_TYPE(FORWARD_DECLARE) #undef FORWARD_DECLARE -// InfallibleVector is like Vector, but all its methods are infallible (they -// crash on OOM). We use this class instead of Vector to avoid a ton of -// MOZ_MUST_USE warnings in irregexp code (imported from V8). -template<typename T, size_t N> -class InfallibleVector -{ - Vector<T, N, LifoAllocPolicy<Infallible>> vector_; - - InfallibleVector(const InfallibleVector&) = delete; - void operator=(const InfallibleVector&) = delete; - - public: - explicit InfallibleVector(const LifoAllocPolicy<Infallible>& alloc) : vector_(alloc) {} - - void append(const T& t) { MOZ_ALWAYS_TRUE(vector_.append(t)); } - void append(const T* begin, size_t length) { MOZ_ALWAYS_TRUE(vector_.append(begin, length)); } - - void clear() { vector_.clear(); } - void popBack() { vector_.popBack(); } - void reserve(size_t n) { MOZ_ALWAYS_TRUE(vector_.reserve(n)); } - - size_t length() const { return vector_.length(); } - T popCopy() { return vector_.popCopy(); } - - T* begin() { return vector_.begin(); } - const T* begin() const { return vector_.begin(); } - - T& operator[](size_t index) { return vector_[index]; } - const T& operator[](size_t index) const { return vector_[index]; } - - InfallibleVector& operator=(InfallibleVector&& rhs) { vector_ = Move(rhs.vector_); return *this; } -}; - -class CharacterRange; -typedef InfallibleVector<CharacterRange, 1> CharacterRangeVector; - -// Represents code units in the range from from_ to to_, both ends are -// inclusive. -class CharacterRange -{ - public: - CharacterRange() - : from_(0), to_(0) - {} - - CharacterRange(char16_t from, char16_t to) - : from_(from), to_(to) - {} - - static void AddClassEscape(LifoAlloc* alloc, char16_t type, CharacterRangeVector* ranges); - static void AddClassEscapeUnicode(LifoAlloc* alloc, char16_t type, - CharacterRangeVector* ranges, bool ignoreCase); - - static inline CharacterRange Singleton(char16_t value) { - return CharacterRange(value, value); - } - static inline CharacterRange Range(char16_t from, char16_t to) { - MOZ_ASSERT(from <= to); - return CharacterRange(from, to); - } - static inline CharacterRange Everything() { - return CharacterRange(0, 0xFFFF); - } - bool Contains(char16_t i) { return from_ <= i && i <= to_; } - char16_t from() const { return from_; } - void set_from(char16_t value) { from_ = value; } - char16_t to() const { return to_; } - void set_to(char16_t value) { to_ = value; } - bool is_valid() { return from_ <= to_; } - bool IsEverything(char16_t max) { return from_ == 0 && to_ >= max; } - bool IsSingleton() { return (from_ == to_); } - void AddCaseEquivalents(bool is_ascii, bool unicode, CharacterRangeVector* ranges); - - static void Split(const LifoAlloc* alloc, - CharacterRangeVector base, - const Vector<int>& overlay, - CharacterRangeVector* included, - CharacterRangeVector* excluded); - - // Whether a range list is in canonical form: Ranges ordered by from value, - // and ranges non-overlapping and non-adjacent. - static bool IsCanonical(const CharacterRangeVector& ranges); - - // Convert range list to canonical form. The characters covered by the ranges - // will still be the same, but no character is in more than one range, and - // adjacent ranges are merged. The resulting list may be shorter than the - // original, but cannot be longer. - static void Canonicalize(CharacterRangeVector& ranges); - - // Negate the contents of a character range in canonical form. - static void Negate(const LifoAlloc* alloc, - CharacterRangeVector src, - CharacterRangeVector* dst); - - static const int kStartMarker = (1 << 24); - static const int kPayloadMask = (1 << 24) - 1; - - private: - char16_t from_; - char16_t to_; -}; - // A set of unsigned integers that behaves especially well on small // integers (< 32). class OutSet @@ -524,7 +440,7 @@ class RegExpNode int characters_filled_in, bool not_at_start) = 0; - static const int kNodeIsTooComplexForGreedyLoops = -1; + static const int kNodeIsTooComplexForGreedyLoops = INT32_MIN; virtual int GreedyLoopTextLength() { return kNodeIsTooComplexForGreedyLoops; } @@ -762,15 +678,19 @@ class TextNode : public SeqRegExpNode { public: TextNode(TextElementVector* elements, + bool read_backward, RegExpNode* on_success) : SeqRegExpNode(on_success), - elements_(elements) + elements_(elements), + read_backward_(read_backward) {} TextNode(RegExpCharacterClass* that, + bool read_backward, RegExpNode* on_success) : SeqRegExpNode(on_success), - elements_(alloc()->newInfallible<TextElementVector>(*alloc())) + elements_(alloc()->newInfallible<TextElementVector>(*alloc())), + read_backward_(read_backward) { elements_->append(TextElement::CharClass(that)); } @@ -783,6 +703,7 @@ class TextNode : public SeqRegExpNode int characters_filled_in, bool not_at_start); TextElementVector& elements() { return *elements_; } + bool read_backward() { return read_backward_; } void MakeCaseIndependent(bool is_ascii, bool unicode); virtual int GreedyLoopTextLength(); virtual RegExpNode* GetSuccessorOfOmnivorousTextNode( @@ -813,6 +734,7 @@ class TextNode : public SeqRegExpNode int* checked_up_to); int Length(); TextElementVector* elements_; + bool read_backward_; }; class AssertionNode : public SeqRegExpNode @@ -881,15 +803,18 @@ class BackReferenceNode : public SeqRegExpNode public: BackReferenceNode(int start_reg, int end_reg, + bool read_backward, RegExpNode* on_success) : SeqRegExpNode(on_success), start_reg_(start_reg), - end_reg_(end_reg) + end_reg_(end_reg), + read_backward_(read_backward) {} virtual void Accept(NodeVisitor* visitor); int start_register() { return start_reg_; } int end_register() { return end_reg_; } + bool read_backward() { return read_backward_; } virtual void Emit(RegExpCompiler* compiler, Trace* trace); virtual int EatsAtLeast(int still_to_find, int recursion_depth, @@ -908,6 +833,7 @@ class BackReferenceNode : public SeqRegExpNode private: int start_reg_; int end_reg_; + bool read_backward_; }; class EndNode : public RegExpNode @@ -1050,8 +976,11 @@ class ChoiceNode : public RegExpNode bool not_at_start() { return not_at_start_; } void set_not_at_start() { not_at_start_ = true; } void set_being_calculated(bool b) { being_calculated_ = b; } - virtual bool try_to_emit_quick_check_for_alternative(int i) { return true; } + virtual bool try_to_emit_quick_check_for_alternative(bool is_first) { + return true; + } virtual RegExpNode* FilterASCII(int depth, bool ignore_case, bool unicode); + virtual bool read_backward() { return false; } protected: int GreedyLoopTextLengthForAlternative(GuardedAlternative* alternative); @@ -1103,18 +1032,22 @@ class NegativeLookaheadChoiceNode : public ChoiceNode // starts by loading enough characters for the alternative that takes fewest // characters, but on a negative lookahead the negative branch did not take // part in that calculation (EatsAtLeast) so the assumptions don't hold. - virtual bool try_to_emit_quick_check_for_alternative(int i) { return i != 0; } + bool try_to_emit_quick_check_for_alternative(bool is_first) override { + return !is_first; + } virtual RegExpNode* FilterASCII(int depth, bool ignore_case, bool unicode); }; class LoopChoiceNode : public ChoiceNode { public: - explicit LoopChoiceNode(LifoAlloc* alloc, bool body_can_be_zero_length) + explicit LoopChoiceNode(LifoAlloc* alloc, bool body_can_be_zero_length, + bool read_backward) : ChoiceNode(alloc, 2), loop_node_(nullptr), continue_node_(nullptr), - body_can_be_zero_length_(body_can_be_zero_length) + body_can_be_zero_length_(body_can_be_zero_length), + read_backward_(read_backward) {} void AddLoopAlternative(GuardedAlternative alt); @@ -1132,6 +1065,7 @@ class LoopChoiceNode : public ChoiceNode RegExpNode* loop_node() { return loop_node_; } RegExpNode* continue_node() { return continue_node_; } bool body_can_be_zero_length() { return body_can_be_zero_length_; } + virtual bool read_backward() { return read_backward_; } virtual void Accept(NodeVisitor* visitor); virtual RegExpNode* FilterASCII(int depth, bool ignore_case, bool unicode); @@ -1146,6 +1080,7 @@ class LoopChoiceNode : public ChoiceNode RegExpNode* loop_node_; RegExpNode* continue_node_; bool body_can_be_zero_length_; + bool read_backward_; }; // Improve the speed that we scan for an initial point where a non-anchored @@ -1421,8 +1356,8 @@ class Trace } TriBool at_start() { return at_start_; } - void set_at_start(bool at_start) { - at_start_ = at_start ? TRUE_VALUE : FALSE_VALUE; + void set_at_start(TriBool at_start) { + at_start_ = at_start; } jit::Label* backtrack() { return backtrack_; } jit::Label* loop_label() { return loop_label_; } diff --git a/js/src/irregexp/RegExpInterpreter.cpp b/js/src/irregexp/RegExpInterpreter.cpp index 5d1f0ea805..f53acfb606 100644 --- a/js/src/irregexp/RegExpInterpreter.cpp +++ b/js/src/irregexp/RegExpInterpreter.cpp @@ -221,8 +221,8 @@ irregexp::InterpretCode(JSContext* cx, const uint8_t* byteCode, const CharT* cha } break; BYTECODE(LOAD_CURRENT_CHAR) { - size_t pos = current + (insn >> BYTECODE_SHIFT); - if (pos >= length) { + int pos = current + (insn >> BYTECODE_SHIFT); + if (pos >= (int)length || pos < 0) { pc = byteCode + Load32Aligned(pc + 4); } else { current_char = chars[pos]; @@ -237,8 +237,8 @@ irregexp::InterpretCode(JSContext* cx, const uint8_t* byteCode, const CharT* cha break; } BYTECODE(LOAD_2_CURRENT_CHARS) { - size_t pos = current + (insn >> BYTECODE_SHIFT); - if (pos + 2 > length) { + int pos = current + (insn >> BYTECODE_SHIFT); + if (pos + 2 > (int)length || pos < 0) { pc = byteCode + Load32Aligned(pc + 4); } else { CharT next = chars[pos + 1]; @@ -424,6 +424,30 @@ irregexp::InterpretCode(JSContext* cx, const uint8_t* byteCode, const CharT* cha pc += BC_CHECK_NOT_BACK_REF_LENGTH; break; } + BYTECODE(CHECK_NOT_BACK_REF_BACKWARD) { + int from = registers[insn >> BYTECODE_SHIFT]; + int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from; + if (from < 0 || len <= 0) { + pc += BC_CHECK_NOT_BACK_REF_BACKWARD_LENGTH; + break; + } + if (int(current) - len < 0) { + pc = byteCode + Load32Aligned(pc + 4); + break; + } else { + int i; + for (i = 0; i < len; i++) { + if (chars[from + i] != chars[int(current) - len + i]) { + pc = byteCode + Load32Aligned(pc + 4); + break; + } + } + if (i < len) break; + current -= len; + } + pc += BC_CHECK_NOT_BACK_REF_BACKWARD_LENGTH; + break; + } BYTECODE(CHECK_NOT_BACK_REF_NO_CASE) { int from = registers[insn >> BYTECODE_SHIFT]; int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from; @@ -464,14 +488,54 @@ irregexp::InterpretCode(JSContext* cx, const uint8_t* byteCode, const CharT* cha } break; } + BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_BACKWARD) { + int from = registers[insn >> BYTECODE_SHIFT]; + int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from; + if (from < 0 || len <= 0) { + pc += BC_CHECK_NOT_BACK_REF_NO_CASE_BACKWARD_LENGTH; + break; + } + if (int(current) - len < 0) { + pc = byteCode + Load32Aligned(pc + 4); + break; + } + if (CaseInsensitiveCompareStrings(chars + from, chars + int(current) - len, len * sizeof(CharT))) { + current -= len; + pc += BC_CHECK_NOT_BACK_REF_NO_CASE_BACKWARD_LENGTH; + } else { + pc = byteCode + Load32Aligned(pc + 4); + } + break; + + } + BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_BACKWARD_UNICODE) { + int from = registers[insn >> BYTECODE_SHIFT]; + int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from; + if (from < 0 || len <= 0) { + pc += BC_CHECK_NOT_BACK_REF_NO_CASE_BACKWARD_LENGTH; + break; + } + if (int(current) - len < 0) { + pc = byteCode + Load32Aligned(pc + 4); + break; + } + if (CaseInsensitiveCompareUCStrings(chars + from, chars + int(current) - len, len * sizeof(CharT))) { + current -= len; + pc += BC_CHECK_NOT_BACK_REF_NO_CASE_BACKWARD_LENGTH; + } else { + pc = byteCode + Load32Aligned(pc + 4); + } + break; + + } BYTECODE(CHECK_AT_START) - if (current == 0) + if (current + (insn >> BYTECODE_SHIFT) == 0) pc = byteCode + Load32Aligned(pc + 4); else pc += BC_CHECK_AT_START_LENGTH; break; BYTECODE(CHECK_NOT_AT_START) - if (current == 0) + if (current + (insn >> BYTECODE_SHIFT) == 0) pc += BC_CHECK_NOT_AT_START_LENGTH; else pc = byteCode + Load32Aligned(pc + 4); diff --git a/js/src/irregexp/RegExpMacroAssembler.cpp b/js/src/irregexp/RegExpMacroAssembler.cpp index 94f6934d3f..2c4ec67ef5 100644 --- a/js/src/irregexp/RegExpMacroAssembler.cpp +++ b/js/src/irregexp/RegExpMacroAssembler.cpp @@ -172,9 +172,9 @@ InterpretedRegExpMacroAssembler::Bind(jit::Label* label) } void -InterpretedRegExpMacroAssembler::CheckAtStart(jit::Label* on_at_start) +InterpretedRegExpMacroAssembler::CheckAtStart(int cp_offset, jit::Label* on_at_start) { - Emit(BC_CHECK_AT_START, 0); + Emit(BC_CHECK_AT_START, cp_offset); EmitOrLink(on_at_start); } @@ -225,32 +225,37 @@ InterpretedRegExpMacroAssembler::CheckGreedyLoop(jit::Label* on_tos_equals_curre } void -InterpretedRegExpMacroAssembler::CheckNotAtStart(jit::Label* on_not_at_start) +InterpretedRegExpMacroAssembler::CheckNotAtStart(int cp_offset, jit::Label* on_not_at_start) { - Emit(BC_CHECK_NOT_AT_START, 0); + Emit(BC_CHECK_NOT_AT_START, cp_offset); EmitOrLink(on_not_at_start); } void -InterpretedRegExpMacroAssembler::CheckNotBackReference(int start_reg, jit::Label* on_no_match) +InterpretedRegExpMacroAssembler::CheckNotBackReference(int start_reg, bool read_backward, + jit::Label* on_no_match) { MOZ_ASSERT(start_reg >= 0); MOZ_ASSERT(start_reg <= kMaxRegister); - Emit(BC_CHECK_NOT_BACK_REF, start_reg); + Emit(read_backward ? BC_CHECK_NOT_BACK_REF_BACKWARD : BC_CHECK_NOT_BACK_REF, + start_reg); EmitOrLink(on_no_match); } void InterpretedRegExpMacroAssembler::CheckNotBackReferenceIgnoreCase(int start_reg, + bool read_backward, jit::Label* on_no_match, bool unicode) { MOZ_ASSERT(start_reg >= 0); MOZ_ASSERT(start_reg <= kMaxRegister); if (unicode) - Emit(BC_CHECK_NOT_BACK_REF_NO_CASE_UNICODE, start_reg); + Emit(read_backward ? BC_CHECK_NOT_BACK_REF_NO_CASE_BACKWARD_UNICODE : BC_CHECK_NOT_BACK_REF_NO_CASE_UNICODE, + start_reg); else - Emit(BC_CHECK_NOT_BACK_REF_NO_CASE, start_reg); + Emit(read_backward ? BC_CHECK_NOT_BACK_REF_NO_CASE_BACKWARD : BC_CHECK_NOT_BACK_REF_NO_CASE, + start_reg); EmitOrLink(on_no_match); } diff --git a/js/src/irregexp/RegExpMacroAssembler.h b/js/src/irregexp/RegExpMacroAssembler.h index e8275faf40..4fa0ab5630 100644 --- a/js/src/irregexp/RegExpMacroAssembler.h +++ b/js/src/irregexp/RegExpMacroAssembler.h @@ -96,7 +96,7 @@ class MOZ_STACK_CLASS RegExpMacroAssembler virtual void Backtrack() = 0; virtual void Bind(jit::Label* label) = 0; - virtual void CheckAtStart(jit::Label* on_at_start) = 0; + virtual void CheckAtStart(int cp_offset, jit::Label* on_at_start) = 0; // Dispatch after looking the current character up in a 2-bits-per-entry // map. The destinations vector has up to 4 labels. @@ -109,10 +109,10 @@ class MOZ_STACK_CLASS RegExpMacroAssembler virtual void CheckCharacterGT(char16_t limit, jit::Label* on_greater) = 0; virtual void CheckCharacterLT(char16_t limit, jit::Label* on_less) = 0; virtual void CheckGreedyLoop(jit::Label* on_tos_equals_current_position) = 0; - virtual void CheckNotAtStart(jit::Label* on_not_at_start) = 0; - virtual void CheckNotBackReference(int start_reg, jit::Label* on_no_match) = 0; - virtual void CheckNotBackReferenceIgnoreCase(int start_reg, jit::Label* on_no_match, - bool unicode) = 0; + virtual void CheckNotAtStart(int cp_offset, jit::Label* on_not_at_start) = 0; + virtual void CheckNotBackReference(int start_reg, bool read_backward, jit::Label* on_no_match) = 0; + virtual void CheckNotBackReferenceIgnoreCase(int start_reg, bool read_backward, + jit::Label* on_no_match, bool unicode) = 0; // Check the current character for a match with a literal character. If we // fail to match then goto the on_failure label. End of input always @@ -238,15 +238,16 @@ class MOZ_STACK_CLASS InterpretedRegExpMacroAssembler final : public RegExpMacro void AdvanceRegister(int reg, int by); void Backtrack(); void Bind(jit::Label* label); - void CheckAtStart(jit::Label* on_at_start); + void CheckAtStart(int cp_offset, jit::Label* on_at_start); void CheckCharacter(unsigned c, jit::Label* on_equal); void CheckCharacterAfterAnd(unsigned c, unsigned and_with, jit::Label* on_equal); void CheckCharacterGT(char16_t limit, jit::Label* on_greater); void CheckCharacterLT(char16_t limit, jit::Label* on_less); void CheckGreedyLoop(jit::Label* on_tos_equals_current_position); - void CheckNotAtStart(jit::Label* on_not_at_start); - void CheckNotBackReference(int start_reg, jit::Label* on_no_match); - void CheckNotBackReferenceIgnoreCase(int start_reg, jit::Label* on_no_match, bool unicode); + void CheckNotAtStart(int cp_offset, jit::Label* on_not_at_start); + void CheckNotBackReference(int start_reg, bool read_backward, jit::Label* on_no_match); + void CheckNotBackReferenceIgnoreCase(int start_reg, bool read_backward, + jit::Label* on_no_match, bool unicode); void CheckNotCharacter(unsigned c, jit::Label* on_not_equal); void CheckNotCharacterAfterAnd(unsigned c, unsigned and_with, jit::Label* on_not_equal); void CheckNotCharacterAfterMinusAnd(char16_t c, char16_t minus, char16_t and_with, diff --git a/js/src/irregexp/RegExpParser.cpp b/js/src/irregexp/RegExpParser.cpp index d0b19d471e..0deb3c658d 100644 --- a/js/src/irregexp/RegExpParser.cpp +++ b/js/src/irregexp/RegExpParser.cpp @@ -90,6 +90,26 @@ RegExpBuilder::AddCharacter(char16_t c) #endif } +// forward declare atom helpers from below +static inline RegExpTree* SurrogatePairAtom(LifoAlloc* alloc, char16_t lead, char16_t trail, bool ignore_case); +static inline RegExpTree* LeadSurrogateAtom(LifoAlloc* alloc, char16_t value); +static inline RegExpTree* TrailSurrogateAtom(LifoAlloc* alloc, char16_t value); + +void +RegExpBuilder::AddUnicodeCharacter(widechar c, bool ignore_case) { + if (c > unicode::UTF16Max) { + char16_t lead, trail; + unicode::UTF16Encode(c, &lead, &trail); + AddAtom(SurrogatePairAtom(alloc, lead, trail, ignore_case)); + } else if (unicode::IsLeadSurrogate(c)) { + AddAtom(LeadSurrogateAtom(alloc, c)); + } else if (unicode::IsTrailSurrogate(c)) { + AddAtom(TrailSurrogateAtom(alloc, c)); + } else { + AddCharacter(static_cast<char16_t>(c)); + } +} + void RegExpBuilder::AddEmpty() { @@ -225,7 +245,10 @@ RegExpParser<CharT>::RegExpParser(frontend::TokenStream& ts, LifoAlloc* alloc, : ts(ts), alloc(alloc), captures_(nullptr), + named_captures_(nullptr), + named_back_references_(nullptr), next_pos_(chars), + captures_started_(0), end_(end), current_(kEndMarker), capture_count_(0), @@ -236,7 +259,8 @@ RegExpParser<CharT>::RegExpParser(frontend::TokenStream& ts, LifoAlloc* alloc, dotall_(dotall), simple_(false), contains_anchor_(false), - is_scanned_for_captures_(false) + is_scanned_for_captures_(false), + has_named_captures_(false) { Advance(); } @@ -251,6 +275,30 @@ RegExpParser<CharT>::ReportError(unsigned errorNumber, const char* param /* = nu } template <typename CharT> +bool +RegExpParser<CharT>::StoreNamedCaptureMap(CharacterVectorVector** names, IntegerVector** indices) +{ + // Any named captures defined at all? + if (!named_captures_ || !named_captures_->length()) { + return true; + } + + CharacterVectorVector* nv = alloc->newInfallible<CharacterVectorVector>(*alloc); + IntegerVector* iv = alloc->newInfallible<IntegerVector>(*alloc); + + for (size_t i=0; i<named_captures_->length(); i++) { + RegExpCapture* capture = (*named_captures_)[i]; + const CharacterVector* cn = capture->name(); + nv->append(const_cast<CharacterVector*>(cn)); + iv->append(capture->index()); + } + + *names = nv; + *indices = iv; + return true; +} + +template <typename CharT> void RegExpParser<CharT>::Advance() { @@ -363,6 +411,39 @@ RegExpParser<CharT>::ParseBracedHexEscape(widechar* value) template <typename CharT> bool +RegExpParser<CharT>::ParseUnicodeEscape(widechar* value) +{ + // Parse a RegExpUnicodeEscapeSequence + // Both \uxxxx and \u{xxxxx} are allowed. \u has already been consumed. + const CharT* start = position(); + if (current() == '{' && unicode_) { + bool result = ParseBracedHexEscape(value); + if (!result) { + Reset(start); + } + return result; + } + // \u but no {, or \u{...} escapes not allowed. + bool result = ParseHexEscape(4, value); + if (result && unicode_ && unicode::IsLeadSurrogate(static_cast<char16_t>(*value)) && current() == '\\') { + // Attempt to read trail surrogate. + const CharT* start = position(); + if (Next() == 'u') { + Advance(2); + widechar trail; + if (ParseHexEscape(4, &trail) && + unicode::IsTrailSurrogate(static_cast<char16_t>(trail))) { + *value = unicode::UTF16Decode(static_cast<char16_t>(*value), static_cast<char16_t>(trail)); + return true; + } + } + Reset(start); + } + return result; +} + +template <typename CharT> +bool RegExpParser<CharT>::ParseTrailSurrogate(widechar* value) { if (current() != '\\') @@ -418,7 +499,8 @@ RangeAtom(LifoAlloc* alloc, char16_t from, char16_t to) static inline RegExpTree* NegativeLookahead(LifoAlloc* alloc, char16_t from, char16_t to) { - return alloc->newInfallible<RegExpLookahead>(RangeAtom(alloc, from, to), false, 0, 0); + return alloc->newInfallible<RegExpLookaround>(RangeAtom(alloc, from, to), false, + 0, 0, RegExpLookaround::LOOKAHEAD); } static bool @@ -558,30 +640,13 @@ RegExpParser<CharT>::ParseClassCharacterEscape(widechar* code) case 'u': { Advance(); widechar value; - if (unicode_) { - if (current() == '{') { - if (!ParseBracedHexEscape(&value)) - return false; - *code = value; - return true; - } - if (ParseHexEscape(4, &value)) { - if (unicode::IsLeadSurrogate(value)) { - widechar trail; - if (ParseTrailSurrogate(&trail)) { - *code = unicode::UTF16Decode(value, trail); - return true; - } - } - *code = value; - return true; - } - ReportError(JSMSG_INVALID_UNICODE_ESCAPE); - return false; + if (ParseUnicodeEscape(&value)) { + *code = value; + return true; } - if (ParseHexEscape(4, &value)) { - *code = value; - return true; + if (unicode_) { + ReportError(JSMSG_INVALID_UNICODE_ESCAPE); + return false; } // If \u is not followed by a four-digit or braced hexadecimal, treat it // as an identity escape. @@ -605,215 +670,6 @@ RegExpParser<CharT>::ParseClassCharacterEscape(widechar* code) return true; } -class WideCharRange -{ - public: - WideCharRange() - : from_(0), to_(0) - {} - - WideCharRange(widechar from, widechar to) - : from_(from), to_(to) - {} - - static inline WideCharRange Singleton(widechar value) { - return WideCharRange(value, value); - } - static inline WideCharRange Range(widechar from, widechar to) { - MOZ_ASSERT(from <= to); - return WideCharRange(from, to); - } - - bool Contains(widechar i) const { return from_ <= i && i <= to_; } - widechar from() const { return from_; } - widechar to() const { return to_; } - - private: - widechar from_; - widechar to_; -}; - -typedef InfallibleVector<WideCharRange, 1> WideCharRangeVector; - -static inline CharacterRange -LeadSurrogateRange() -{ - return CharacterRange::Range(unicode::LeadSurrogateMin, unicode::LeadSurrogateMax); -} - -static inline CharacterRange -TrailSurrogateRange() -{ - return CharacterRange::Range(unicode::TrailSurrogateMin, unicode::TrailSurrogateMax); -} - -static inline WideCharRange -NonBMPRange() -{ - return WideCharRange::Range(unicode::NonBMPMin, unicode::NonBMPMax); -} - -static const char16_t kNoCharClass = 0; - -// Adds a character or pre-defined character class to character ranges. -// If char_class is not kInvalidClass, it's interpreted as a class -// escape (i.e., 's' means whitespace, from '\s'). -static inline void -AddCharOrEscape(LifoAlloc* alloc, - CharacterRangeVector* ranges, - char16_t char_class, - widechar c) -{ - if (char_class != kNoCharClass) - CharacterRange::AddClassEscape(alloc, char_class, ranges); - else - ranges->append(CharacterRange::Singleton(c)); -} - -static inline void -AddCharOrEscapeUnicode(LifoAlloc* alloc, - CharacterRangeVector* ranges, - CharacterRangeVector* lead_ranges, - CharacterRangeVector* trail_ranges, - WideCharRangeVector* wide_ranges, - char16_t char_class, - widechar c, - bool ignore_case) -{ - if (char_class != kNoCharClass) { - CharacterRange::AddClassEscapeUnicode(alloc, char_class, ranges, ignore_case); - switch (char_class) { - case 'S': - case 'W': - case 'D': - lead_ranges->append(LeadSurrogateRange()); - trail_ranges->append(TrailSurrogateRange()); - wide_ranges->append(NonBMPRange()); - break; - case '.': - MOZ_CRASH("Bad char_class!"); - } - return; - } - - if (unicode::IsLeadSurrogate(c)) - lead_ranges->append(CharacterRange::Singleton(c)); - else if (unicode::IsTrailSurrogate(c)) - trail_ranges->append(CharacterRange::Singleton(c)); - else if (c >= unicode::NonBMPMin) - wide_ranges->append(WideCharRange::Singleton(c)); - else - ranges->append(CharacterRange::Singleton(c)); -} - -static inline void -AddUnicodeRange(LifoAlloc* alloc, - CharacterRangeVector* ranges, - CharacterRangeVector* lead_ranges, - CharacterRangeVector* trail_ranges, - WideCharRangeVector* wide_ranges, - widechar first, - widechar next) -{ - MOZ_ASSERT(first <= next); - if (first < unicode::LeadSurrogateMin) { - if (next < unicode::LeadSurrogateMin) { - ranges->append(CharacterRange::Range(first, next)); - return; - } - ranges->append(CharacterRange::Range(first, unicode::LeadSurrogateMin - 1)); - first = unicode::LeadSurrogateMin; - } - if (first <= unicode::LeadSurrogateMax) { - if (next <= unicode::LeadSurrogateMax) { - lead_ranges->append(CharacterRange::Range(first, next)); - return; - } - lead_ranges->append(CharacterRange::Range(first, unicode::LeadSurrogateMax)); - first = unicode::LeadSurrogateMax + 1; - } - MOZ_ASSERT(unicode::LeadSurrogateMax + 1 == unicode::TrailSurrogateMin); - if (first <= unicode::TrailSurrogateMax) { - if (next <= unicode::TrailSurrogateMax) { - trail_ranges->append(CharacterRange::Range(first, next)); - return; - } - trail_ranges->append(CharacterRange::Range(first, unicode::TrailSurrogateMax)); - first = unicode::TrailSurrogateMax + 1; - } - if (first <= unicode::UTF16Max) { - if (next <= unicode::UTF16Max) { - ranges->append(CharacterRange::Range(first, next)); - return; - } - ranges->append(CharacterRange::Range(first, unicode::UTF16Max)); - first = unicode::NonBMPMin; - } - MOZ_ASSERT(unicode::UTF16Max + 1 == unicode::NonBMPMin); - wide_ranges->append(WideCharRange::Range(first, next)); -} - -// Negate a vector of ranges by subtracting its ranges from a range -// encompassing the full range of possible values. -template <typename RangeType> -static inline void -NegateUnicodeRanges(LifoAlloc* alloc, InfallibleVector<RangeType, 1>** ranges, - RangeType full_range) -{ - typedef InfallibleVector<RangeType, 1> RangeVector; - RangeVector* tmp_ranges = alloc->newInfallible<RangeVector>(*alloc); - tmp_ranges->append(full_range); - RangeVector* result_ranges = alloc->newInfallible<RangeVector>(*alloc); - - // Perform the following calculation: - // result_ranges = tmp_ranges - ranges - // with the following steps: - // result_ranges = tmp_ranges - ranges[0] - // SWAP(result_ranges, tmp_ranges) - // result_ranges = tmp_ranges - ranges[1] - // SWAP(result_ranges, tmp_ranges) - // ... - // result_ranges = tmp_ranges - ranges[N-1] - // SWAP(result_ranges, tmp_ranges) - // The last SWAP is just for simplicity of the loop. - for (size_t i = 0; i < (*ranges)->length(); i++) { - result_ranges->clear(); - - const RangeType& range = (**ranges)[i]; - for (size_t j = 0; j < tmp_ranges->length(); j++) { - const RangeType& tmpRange = (*tmp_ranges)[j]; - auto from1 = tmpRange.from(); - auto to1 = tmpRange.to(); - auto from2 = range.from(); - auto to2 = range.to(); - - if (from1 < from2) { - if (to1 < from2) { - result_ranges->append(tmpRange); - } else if (to1 <= to2) { - result_ranges->append(RangeType::Range(from1, from2 - 1)); - } else { - result_ranges->append(RangeType::Range(from1, from2 - 1)); - result_ranges->append(RangeType::Range(to2 + 1, to1)); - } - } else if (from1 <= to2) { - if (to1 > to2) - result_ranges->append(RangeType::Range(to2 + 1, to1)); - } else { - result_ranges->append(tmpRange); - } - } - - auto tmp = tmp_ranges; - tmp_ranges = result_ranges; - result_ranges = tmp; - } - - // After the loop, result is pointed at by tmp_ranges, instead of - // result_ranges. - *ranges = tmp_ranges; -} - static bool WideCharRangesContain(WideCharRangeVector* wide_ranges, widechar c) { @@ -883,9 +739,9 @@ UnicodeRangesAtom(LifoAlloc* alloc, } if (is_negated) { - NegateUnicodeRanges(alloc, &lead_ranges, LeadSurrogateRange()); - NegateUnicodeRanges(alloc, &trail_ranges, TrailSurrogateRange()); - NegateUnicodeRanges(alloc, &wide_ranges, NonBMPRange()); + CharacterRange::NegateUnicodeRanges(alloc, &lead_ranges, CharacterRange::LeadSurrogate()); + CharacterRange::NegateUnicodeRanges(alloc, &trail_ranges, CharacterRange::TrailSurrogate()); + CharacterRange::NegateUnicodeRanges(alloc, &wide_ranges, WideCharRange::NonBMP()); } RegExpBuilder* builder = alloc->newInfallible<RegExpBuilder>(alloc); @@ -893,8 +749,8 @@ UnicodeRangesAtom(LifoAlloc* alloc, bool added = false; if (is_negated) { - ranges->append(LeadSurrogateRange()); - ranges->append(TrailSurrogateRange()); + ranges->append(CharacterRange::LeadSurrogate()); + ranges->append(CharacterRange::TrailSurrogate()); } if (ranges->length() > 0) { builder->AddAtom(alloc->newInfallible<RegExpCharacterClass>(ranges, is_negated)); @@ -1012,9 +868,9 @@ RegExpParser<CharT>::ParseCharacterClass() } while (has_more() && current() != ']') { - char16_t char_class = kNoCharClass; - widechar first = 0; - if (!ParseClassAtom(&char_class, &first)) + char16_t char_class_1 = kNoCharClass; + widechar char_1 = 0; + if (!ParseClassEscape(&char_class_1, &char_1, ranges, lead_ranges, trail_ranges, wide_ranges)) return nullptr; if (current() == '-') { Advance(); @@ -1023,41 +879,49 @@ RegExpParser<CharT>::ParseCharacterClass() // following code report an error. break; } else if (current() == ']') { - if (unicode_) { - AddCharOrEscapeUnicode(alloc, ranges, lead_ranges, trail_ranges, wide_ranges, - char_class, first, ignore_case_); - } else { - AddCharOrEscape(alloc, ranges, char_class, first); + // if the last item was not a class, add it verbatim. + if (char_class_1 == kNoCharClass) { + if (unicode_) { + CharacterRange::AddCharUnicode(alloc, ranges, lead_ranges, trail_ranges, wide_ranges, char_1); + } else { + ranges->append(CharacterRange::Singleton(char_1)); + } } + // Hyphen at the end of a class. Treat the '-' verbatim. ranges->append(CharacterRange::Singleton('-')); break; } char16_t char_class_2 = kNoCharClass; - widechar next = 0; - if (!ParseClassAtom(&char_class_2, &next)) + widechar char_2 = 0; + if (!ParseClassEscape(&char_class_2, &char_2, ranges, lead_ranges, trail_ranges, wide_ranges)) return nullptr; - if (char_class != kNoCharClass || char_class_2 != kNoCharClass) { + if (char_class_1 != kNoCharClass || char_class_2 != kNoCharClass) { if (unicode_) return ReportError(JSMSG_RANGE_WITH_CLASS_ESCAPE); - // Either end is an escaped character class. Treat the '-' verbatim. - AddCharOrEscape(alloc, ranges, char_class, first); + // Either end is an escaped character class. Treat the '-' verbatim and add the + // character that isn't a class + if (char_class_1 == kNoCharClass) + ranges->append(CharacterRange::Singleton(char_1)); ranges->append(CharacterRange::Singleton('-')); - AddCharOrEscape(alloc, ranges, char_class_2, next); + if (char_class_1 == kNoCharClass) + ranges->append(CharacterRange::Singleton(char_2)); continue; } - if (first > next) + if (char_1 > char_2) return ReportError(JSMSG_BAD_CLASS_RANGE); if (unicode_) - AddUnicodeRange(alloc, ranges, lead_ranges, trail_ranges,wide_ranges, first, next); + CharacterRange::AddUnicodeRange(alloc, ranges, lead_ranges, trail_ranges, wide_ranges, char_1, char_2); else - ranges->append(CharacterRange::Range(first, next)); + ranges->append(CharacterRange::Range(char_1, char_2)); } else { - if (unicode_) { - AddCharOrEscapeUnicode(alloc, ranges, lead_ranges, trail_ranges, wide_ranges, - char_class, first, ignore_case_); - } else { - AddCharOrEscape(alloc, ranges, char_class, first); + // if the last item was not a class, add it verbatim. + if (char_class_1 == kNoCharClass) { + if (unicode_) { + CharacterRange::AddCharUnicode(alloc, ranges, lead_ranges, trail_ranges, wide_ranges, char_1); + } else { + ranges->append(CharacterRange::Singleton(char_1)); + } } } } @@ -1070,22 +934,26 @@ RegExpParser<CharT>::ParseCharacterClass() is_negated = !is_negated; } return alloc->newInfallible<RegExpCharacterClass>(ranges, is_negated); - } + } else { + if (!is_negated && ranges->length() == 0 && lead_ranges->length() == 0 && + trail_ranges->length() == 0 && wide_ranges->length() == 0) + { + ranges->append(CharacterRange::Everything()); + return alloc->newInfallible<RegExpCharacterClass>(ranges, true); + } - if (!is_negated && ranges->length() == 0 && lead_ranges->length() == 0 && - trail_ranges->length() == 0 && wide_ranges->length() == 0) - { - ranges->append(CharacterRange::Everything()); - return alloc->newInfallible<RegExpCharacterClass>(ranges, true); + return UnicodeRangesAtom(alloc, ranges, lead_ranges, trail_ranges, wide_ranges, is_negated, + ignore_case_); } - - return UnicodeRangesAtom(alloc, ranges, lead_ranges, trail_ranges, wide_ranges, is_negated, - ignore_case_); } template <typename CharT> bool -RegExpParser<CharT>::ParseClassAtom(char16_t* char_class, widechar* value) +RegExpParser<CharT>::ParseClassEscape(char16_t* char_class, widechar *value, + CharacterRangeVector* ranges, + CharacterRangeVector* lead_ranges, + CharacterRangeVector* trail_ranges, + WideCharRangeVector* wide_ranges) { MOZ_ASSERT(*char_class == kNoCharClass); widechar first = current(); @@ -1094,10 +962,32 @@ RegExpParser<CharT>::ParseClassAtom(char16_t* char_class, widechar* value) case 'w': case 'W': case 'd': case 'D': case 's': case 'S': { *char_class = Next(); Advance(2); + // add character range to ranges immediately + if (unicode_) { + CharacterRange::AddCharOrEscapeUnicode(alloc, ranges, lead_ranges, trail_ranges, wide_ranges, + *char_class, 0, ignore_case_); + } else { + CharacterRange::AddCharOrEscape(alloc, ranges, *char_class, 0); + } return true; } case kEndMarker: return ReportError(JSMSG_ESCAPE_AT_END_OF_REGEXP); + case 'p': + case 'P': + if (unicode_) { + *char_class = Next(); + Advance(2); + bool negate = *char_class == 'P'; + std::string name, value; + if (!ParsePropertyClassName(name, value) || + !CharacterRange::AddPropertyClassRange(alloc, name, value, negate, ignore_case_, + ranges, lead_ranges, trail_ranges, wide_ranges)) { + return ReportError(JSMSG_INVALID_CLASS_PROPERTY_NAME); + } + return true; + } + MOZ_FALLTHROUGH default: if (!ParseClassCharacterEscape(value)) return false; @@ -1127,6 +1017,7 @@ template <typename CharT> void RegExpParser<CharT>::ScanForCaptures() { + const CharT* saved_position = position(); // Start with captures started previous to current position int capture_count = captures_started(); // Add count of captures after this position. @@ -1150,12 +1041,32 @@ RegExpParser<CharT>::ScanForCaptures() break; } case '(': - if (current() != '?') capture_count++; + if (current() == '?') { + // At this point we could be in + // * a non-capturing group '(:', + // * a lookbehind assertion '(?<=' '(?<!' + // * or a named capture '(?<'. + // + // Of these, only named captures are capturing groups. + + Advance(); + if (current() != '<') break; + + Advance(); + if (current() == '=' || current() == '!') break; + + // Found a possible named capture. It could turn out to be a syntax + // error (e.g. an unterminated or invalid name), but that distinction + // does not matter for our purposes. + has_named_captures_ = true; + } + capture_count++; break; } } capture_count_ = capture_count; is_scanned_for_captures_ = true; + Reset(saved_position); } inline bool @@ -1213,6 +1124,269 @@ RegExpParser<CharT>::ParseBackReferenceIndex(int* index_out) return true; } +static void push_code_unit(CharacterVector* v, uint32_t code_unit) +{ + // based off of unicode::UTF16Encode + if (!unicode::IsSupplementary(code_unit)) { + v->append(char16_t(code_unit)); + } else { + v->append(unicode::LeadSurrogate(code_unit)); + v->append(unicode::TrailSurrogate(code_unit)); + } +} + +bool IsUnicodePropertyValueCharacter(char c) { + // https://tc39.github.io/proposal-regexp-unicode-property-escapes/ + // + // Note that using this to validate each parsed char is quite conservative. + // A possible alternative solution would be to only ensure the parsed + // property name/value candidate string does not contain '\0' characters and + // let ICU lookups trigger the final failure. + if ('a' <= c && c <= 'z') return true; + if ('A' <= c && c <= 'Z') return true; + if ('0' <= c && c <= '9') return true; + return (c == '_'); +} + +template <typename CharT> +bool +RegExpParser<CharT>::ParsePropertyClassName(std::string& name, std::string& value) +{ + MOZ_ASSERT(name.empty()); + MOZ_ASSERT(value.empty()); + // Parse the property class as follows: + // - In \p{name}, 'name' is interpreted + // - either as a general category property value name. + // - or as a binary property name. + // - In \p{name=value}, 'name' is interpreted as an enumerated property name, + // and 'value' is interpreted as one of the available property value names. + // - Aliases in PropertyAlias.txt and PropertyValueAlias.txt can be used. + // - Loose matching is not applied. + if (current() == '{') { + // Parse \p{[PropertyName=]PropertyNameValue} + for (Advance(); current() != '}' && current() != '='; Advance()) { + if (!IsUnicodePropertyValueCharacter(current())) return false; + if (!has_next()) return false; + name += static_cast<char>(current()); + } + if (current() == '=') { + for (Advance(); current() != '}'; Advance()) { + if (!IsUnicodePropertyValueCharacter(current())) return false; + if (!has_next()) return false; + value += static_cast<char>(current()); + } + } + } else { + return false; + } + Advance(); + + return true; +} + +template <typename CharT> +const CharacterVector* +RegExpParser<CharT>::ParseCaptureGroupName() +{ + CharacterVector* name = alloc->newInfallible<CharacterVector>(*alloc); + + bool at_start = true; + while (true) { + widechar c = current(); + Advance(); + + // Convert unicode escapes. + if (c == '\\' && current() == 'u') { + Advance(); + if (!ParseUnicodeEscape(&c)) { + ReportError(JSMSG_INVALID_UNICODE_ESCAPE); + return nullptr; + } + } + + // The backslash char is misclassified as both ID_Start and ID_Continue. + if (c == '\\') { + ReportError(JSMSG_INVALID_CAPTURE_NAME); + return nullptr; + } + + if (at_start) { + if (!unicode::IsIdentifierStart(c)) { + ReportError(JSMSG_INVALID_CAPTURE_NAME); + return nullptr; + } + push_code_unit(name, c); + at_start = false; + } else { + if (c == '>') { + break; + } else if (unicode::IsIdentifierPart(c)) { + push_code_unit(name, c); + } else { + ReportError(JSMSG_INVALID_CAPTURE_NAME); + return nullptr; + } + } + } + + return name; +} + +template <typename CharT> +bool +RegExpParser<CharT>::CreateNamedCaptureAtIndex(const CharacterVector* name, + int index) +{ + MOZ_ASSERT(0 < index && index <= captures_started_); + MOZ_ASSERT(name !== nullptr); + + RegExpCapture* capture = GetCapture(index); + MOZ_ASSERT(capture->name() == nullptr); + + capture->set_name(name); + + if (named_captures_ == nullptr) { + named_captures_ = alloc->newInfallible<RegExpCaptureVector>(*alloc); + } else { + // Check for duplicates and bail if we find any. + if (FindNamedCapture(name) != nullptr) { + ReportError(JSMSG_DUPLICATE_CAPTURE_NAME); + return false; + } + } + named_captures_->append(capture); + return true; +} + +template <typename CharT> +RegExpCapture* +RegExpParser<CharT>::FindNamedCapture(const CharacterVector* name) +{ + // Linear search is fine since there are usually very few named groups + for (auto it=named_captures_->begin(); it<named_captures_->end(); it++) { + if (*(*it)->name() == *name) { + return *it; + } + } + return nullptr; +} + +template <typename CharT> +bool +RegExpParser<CharT>::ParseNamedBackReference(RegExpBuilder* builder, + RegExpParserState* state) +{ + // The parser is assumed to be on the '<' in \k<name>. + if (current() != '<') { + ReportError(JSMSG_INVALID_NAMED_REF); + return false; + } + + Advance(); + const CharacterVector* name = ParseCaptureGroupName(); + if (name == nullptr) { + return false; + } + + if (state->IsInsideCaptureGroup(name)) { + builder->AddEmpty(); + } else { + RegExpBackReference* atom = alloc->newInfallible<RegExpBackReference>(nullptr); + atom->set_name(name); + + builder->AddAtom(atom); + + if (named_back_references_ == nullptr) { + named_back_references_ = alloc->newInfallible<RegExpBackReferenceVector>(*alloc); + } + named_back_references_->append(atom); + } + + return true; +} + +template <typename CharT> +void +RegExpParser<CharT>::PatchNamedBackReferences() +{ + if (named_back_references_ == nullptr) return; + + if (named_captures_ == nullptr) { + // Named backrefs but no named groups + ReportError(JSMSG_INVALID_NAMED_CAPTURE_REF); + return; + } + + // Look up and patch the actual capture for each named back reference. + for (size_t i = 0; i < named_back_references_->length(); i++) { + RegExpBackReference* ref = (*named_back_references_)[i]; + + RegExpCapture* capture = FindNamedCapture(ref->name()); + if (capture == nullptr) { + ReportError(JSMSG_INVALID_NAMED_CAPTURE_REF); + return; + } + + ref->set_capture(capture); + } +} + +template <typename CharT> +RegExpCapture* +RegExpParser<CharT>::GetCapture(int index) +{ + // The index for the capture groups are one-based. Its index in the list is + // zero-based. + int known_captures = + is_scanned_for_captures_ ? capture_count_ : captures_started_; + MOZ_ASSERT(index <= known_captures); + if (captures_ == NULL) { + captures_ = alloc->newInfallible<RegExpCaptureVector>(*alloc); + } + while ((int)captures_->length() < known_captures) { + RegExpCapture* capture = alloc->newInfallible<RegExpCapture>(nullptr, captures_->length() + 1); + captures_->append(capture); + } + return (*captures_)[index - 1]; +} + +template <typename CharT> +bool +RegExpParser<CharT>::HasNamedCaptures() { + if (has_named_captures_ || is_scanned_for_captures_) { + return has_named_captures_; + } + + ScanForCaptures(); + return has_named_captures_; +} + +template <typename CharT> +bool +RegExpParser<CharT>::RegExpParserState::IsInsideCaptureGroup(int index) +{ + for (RegExpParserState* s = this; s != NULL; s = s->previous_state()) { + if (s->group_type() != CAPTURE) continue; + // Return true if we found the matching capture index. + if (index == s->capture_index()) return true; + // Abort if index is larger than what has been parsed up till this state. + if (index > s->capture_index()) return false; + } + return false; +} + +template <typename CharT> +bool +RegExpParser<CharT>::RegExpParserState::IsInsideCaptureGroup(const CharacterVector* name) +{ + for (RegExpParserState* s = this; s != NULL; s = s->previous_state()) { + if (s->group_type() != CAPTURE) continue; + if (!s->IsNamedCapture()) continue; + if (*s->capture_name() == *name) return true; + } + return false; +} + // QuantifierPrefix :: // { DecimalDigits } // { DecimalDigits , } @@ -1289,6 +1463,7 @@ RegExpTree* RegExpParser<CharT>::ParsePattern() { RegExpTree* result = ParseDisjunction(); + PatchNamedBackReferences(); MOZ_ASSERT_IF(result, !has_more()); return result; } @@ -1419,12 +1594,102 @@ UnicodeCharacterClassEscapeAtom(LifoAlloc* alloc, char16_t char_class, bool igno CharacterRangeVector* lead_ranges = alloc->newInfallible<CharacterRangeVector>(*alloc); CharacterRangeVector* trail_ranges = alloc->newInfallible<CharacterRangeVector>(*alloc); WideCharRangeVector* wide_ranges = alloc->newInfallible<WideCharRangeVector>(*alloc); - AddCharOrEscapeUnicode(alloc, ranges, lead_ranges, trail_ranges, wide_ranges, char_class, 0, - ignore_case); + CharacterRange::AddCharOrEscapeUnicode(alloc, ranges, lead_ranges, trail_ranges, wide_ranges, + char_class, 0, ignore_case); return UnicodeRangesAtom(alloc, ranges, lead_ranges, trail_ranges, wide_ranges, false, false); } + + +static inline RegExpTree* UnicodePropertyClassAtom(LifoAlloc* alloc, const std::string& name, + const std::string& value, bool negate, bool ignore_case); + +static inline RegExpTree* +UnicodePropertySequenceAtom(LifoAlloc* alloc, const std::string name) +{ + // If |name| is a special sequence name, return a subexpression that matches it. + // All possible sequences are hardcoded here. + const widechar* sequence_list = nullptr; + if (name == "Emoji_Flag_Sequence" || + name == "RGI_Emoji_Flag_Sequence") { + sequence_list = kEmojiFlagSequences; + } else + if (name == "Emoji_Tag_Sequence" || + name == "RGI_Emoji_Tag_Sequence") { + sequence_list = kEmojiTagSequences; + } else + if (name == "Emoji_ZWJ_Sequence" || + name == "RGI_Emoji_ZWJ_Sequence") { + sequence_list = kEmojiZWJSequences; + } + if (sequence_list != nullptr) { + // TODO(yangguo): this creates huge regexp code. Alternative to this is + // to create a new operator that checks for these sequences at runtime. + RegExpBuilder* builder = alloc->newInfallible<RegExpBuilder>(alloc); + while (true) { // Iterate through list of sequences. + while (*sequence_list != 0) { // Iterate through sequence. + builder->AddUnicodeCharacter(*sequence_list, false); + sequence_list++; + } + sequence_list++; + if (*sequence_list == 0) break; + builder->NewAlternative(); + } + return builder->ToRegExp(); + } + + if (name == "Emoji_Keycap_Sequence") { + // https://unicode.org/reports/tr51/#def_emoji_keycap_sequence + // emoji_keycap_sequence := [0-9#*] \x{FE0F 20E3} + RegExpBuilder* builder = alloc->newInfallible<RegExpBuilder>(alloc); + CharacterRangeVector* prefix_ranges = alloc->newInfallible<CharacterRangeVector>(*alloc); + prefix_ranges->append(CharacterRange::Range('0', '9')); + prefix_ranges->append(CharacterRange::Singleton('#')); + prefix_ranges->append(CharacterRange::Singleton('*')); + builder->AddAtom(alloc->newInfallible<RegExpCharacterClass>(prefix_ranges, false)); + builder->AddCharacter(0xFE0F); + builder->AddCharacter(0x20E3); + return builder->ToRegExp(); + } else + if (name == "Emoji_Modifier_Sequence" || + name == "RGI_Emoji_Modifier_Sequence") { + // https://unicode.org/reports/tr51/#def_emoji_modifier_sequence + // emoji_modifier_sequence := emoji_modifier_base emoji_modifier + + RegExpBuilder* builder = alloc->newInfallible<RegExpBuilder>(alloc); + builder->AddAtom(UnicodePropertyClassAtom(alloc, "Emoji_Modifier_Base", "", false, false)); + builder->AddAtom(UnicodePropertyClassAtom(alloc, "Emoji_Modifier", "", false, false)); + return builder->ToRegExp(); + } + + return nullptr; +} + +static inline RegExpTree* +UnicodePropertyClassAtom(LifoAlloc* alloc, const std::string& name, const std::string& value, + bool negate, bool ignore_case) +{ + CharacterRangeVector* ranges = alloc->newInfallible<CharacterRangeVector>(*alloc); + CharacterRangeVector* lead_ranges = alloc->newInfallible<CharacterRangeVector>(*alloc); + CharacterRangeVector* trail_ranges = alloc->newInfallible<CharacterRangeVector>(*alloc); + WideCharRangeVector* wide_ranges = alloc->newInfallible<WideCharRangeVector>(*alloc); + + if (CharacterRange::AddPropertyClassRange(alloc, name, value, negate, ignore_case, + ranges, lead_ranges, trail_ranges, wide_ranges)) { + return UnicodeRangesAtom(alloc, ranges, lead_ranges, trail_ranges, wide_ranges, false, false); + } + + if (value.empty() && !negate) { + // We allow Property Sequences in any unicode mode + // They used to be allowed in /u (before /v was introduced) and there is active + // discussion to change it back again. + // The benefits allow outweigh the noncompliance. + return UnicodePropertySequenceAtom(alloc, name); + } + return nullptr; +} + static inline RegExpTree* UnicodeBackReferenceAtom(LifoAlloc* alloc, RegExpTree* atom) { @@ -1455,24 +1720,24 @@ RegExpTree* RegExpParser<CharT>::ParseDisjunction() { // Used to store current state while parsing subexpressions. - RegExpParserState initial_state(alloc, nullptr, INITIAL, 0); - RegExpParserState* stored_state = &initial_state; + RegExpParserState initial_state(alloc, nullptr, INITIAL, RegExpLookaround::LOOKAHEAD, 0, nullptr); + RegExpParserState* state = &initial_state; // Cache the builder in a local variable for quick access. RegExpBuilder* builder = initial_state.builder(); while (true) { switch (current()) { case kEndMarker: - if (stored_state->IsSubexpression()) { + if (state->IsSubexpression()) { // Inside a parenthesized group when hitting end of input. return ReportError(JSMSG_MISSING_PAREN); } - MOZ_ASSERT(INITIAL == stored_state->group_type()); + MOZ_ASSERT(INITIAL == state->group_type()); // Parsing completed successfully. return builder->ToRegExp(); case ')': { - if (!stored_state->IsSubexpression()) + if (!state->IsSubexpression()) return ReportError(JSMSG_UNMATCHED_RIGHT_PAREN); - MOZ_ASSERT(INITIAL != stored_state->group_type()); + MOZ_ASSERT(INITIAL != state->group_type()); Advance(); // End disjunction parsing and convert builder content to new single @@ -1481,29 +1746,35 @@ RegExpParser<CharT>::ParseDisjunction() int end_capture_index = captures_started(); - int capture_index = stored_state->capture_index(); - SubexpressionType group_type = stored_state->group_type(); - - // Restore previous state. - stored_state = stored_state->previous_state(); - builder = stored_state->builder(); + int capture_index = state->capture_index(); + SubexpressionType group_type = state->group_type(); // Build result of subexpression. if (group_type == CAPTURE) { - RegExpCapture* capture = alloc->newInfallible<RegExpCapture>(body, capture_index); - (*captures_)[capture_index - 1] = capture; + if (state->IsNamedCapture()) { + if (!CreateNamedCaptureAtIndex(state->capture_name(), capture_index)) { + return nullptr; + } + } + RegExpCapture* capture = GetCapture(capture_index); + capture->set_body(body); body = capture; } else if (group_type != GROUPING) { - MOZ_ASSERT(group_type == POSITIVE_LOOKAHEAD || - group_type == NEGATIVE_LOOKAHEAD); - bool is_positive = (group_type == POSITIVE_LOOKAHEAD); - body = alloc->newInfallible<RegExpLookahead>(body, + MOZ_ASSERT(group_type == POSITIVE_LOOKAROUND || + group_type == NEGATIVE_LOOKAROUND); + bool is_positive = (group_type == POSITIVE_LOOKAROUND); + body = alloc->newInfallible<RegExpLookaround>(body, is_positive, end_capture_index - capture_index, - capture_index); + capture_index, + state->lookaround_type()); } + + // Restore previous state. + state = state->previous_state(); + builder = state->builder(); builder->AddAtom(body); - if (unicode_ && (group_type == POSITIVE_LOOKAHEAD || group_type == NEGATIVE_LOOKAHEAD)) + if (unicode_ && (group_type == POSITIVE_LOOKAROUND || group_type == NEGATIVE_LOOKAROUND)) continue; // For compatability with JSC and ES3, we allow quantifiers after // lookaheads, and break in all cases. @@ -1563,6 +1834,9 @@ RegExpParser<CharT>::ParseDisjunction() } case '(': { SubexpressionType subexpr_type = CAPTURE; + RegExpLookaround::Type lookaround_type = state->lookaround_type(); + bool is_named_capture = false; + const CharacterVector* capture_name = nullptr; Advance(); if (current() == '?') { switch (Next()) { @@ -1570,26 +1844,48 @@ RegExpParser<CharT>::ParseDisjunction() subexpr_type = GROUPING; break; case '=': - subexpr_type = POSITIVE_LOOKAHEAD; + lookaround_type = RegExpLookaround::LOOKAHEAD; + subexpr_type = POSITIVE_LOOKAROUND; break; case '!': - subexpr_type = NEGATIVE_LOOKAHEAD; + lookaround_type = RegExpLookaround::LOOKAHEAD; + subexpr_type = NEGATIVE_LOOKAROUND; + break; + case '<': + Advance(); + lookaround_type = RegExpLookaround::LOOKBEHIND; + if (Next() == '=') { + subexpr_type = POSITIVE_LOOKAROUND; + break; + } else if (Next() == '!') { + subexpr_type = NEGATIVE_LOOKAROUND; + break; + } + // Not a lookbehind, continue parsing as named group + is_named_capture = true; + has_named_captures_ = true; break; default: return ReportError(JSMSG_INVALID_GROUP); } - Advance(2); - } else { - if (captures_ == nullptr) - captures_ = alloc->newInfallible<RegExpCaptureVector>(*alloc); - if (captures_started() >= kMaxCaptures) - return ReportError(JSMSG_TOO_MANY_PARENS); - captures_->append((RegExpCapture*) nullptr); + Advance(is_named_capture ? 1 : 2); + } + if (subexpr_type == CAPTURE) { + if (captures_started() >= kMaxCaptures) + return ReportError(JSMSG_TOO_MANY_PARENS); + captures_started_++; + + if (is_named_capture) { + capture_name = ParseCaptureGroupName(); + if (!capture_name) + return nullptr; + } } // Store current state and begin new disjunction parsing. - stored_state = alloc->newInfallible<RegExpParserState>(alloc, stored_state, subexpr_type, - captures_started()); - builder = stored_state->builder(); + state = alloc->newInfallible<RegExpParserState>(alloc, state, subexpr_type, + lookaround_type, captures_started_, + capture_name); + builder = state->builder(); continue; } case '[': { @@ -1619,44 +1915,61 @@ RegExpParser<CharT>::ParseDisjunction() // CharacterClassEscape :: one of // d D s S w W case 'D': case 'S': case 'W': - if (unicode_) { - Advance(); - builder->AddAtom(UnicodeCharacterClassEscapeAtom(alloc, current(), - ignore_case_)); - Advance(); - break; - } - MOZ_FALLTHROUGH; case 'd': case 's': case 'w': { widechar c = Next(); + bool negated = c <= 'Z'; Advance(2); - CharacterRangeVector* ranges = - alloc->newInfallible<CharacterRangeVector>(*alloc); - if (unicode_) - CharacterRange::AddClassEscapeUnicode(alloc, c, ranges, ignore_case_); - else - CharacterRange::AddClassEscape(alloc, c, ranges); - RegExpTree* atom = alloc->newInfallible<RegExpCharacterClass>(ranges, false); - builder->AddAtom(atom); + if (unicode_ && negated) { + // must generate negative lookarounds for lone surrogates, done by AddCharOrEscapeUnicode + builder->AddAtom(UnicodeCharacterClassEscapeAtom(alloc, c, ignore_case_)); + } else { + // only match positive ranges + CharacterRangeVector* ranges = alloc->newInfallible<CharacterRangeVector>(*alloc); + if (unicode_) + CharacterRange::AddClassEscapeUnicode(alloc, c, ranges, ignore_case_); + else + CharacterRange::AddClassEscape(alloc, c, ranges); + RegExpTree* atom = alloc->newInfallible<RegExpCharacterClass>(ranges, false); + builder->AddAtom(atom); + } + break; + } + case 'p': case 'P': { + widechar p = Next(); + Advance(2); + if (unicode_) { + bool negate = p == 'P'; + std::string name, nvalue; + if (ParsePropertyClassName(name, nvalue)) { + RegExpTree* atom = UnicodePropertyClassAtom(alloc, name, nvalue, + negate, ignore_case_); + if (atom != nullptr) { + builder->AddAtom(atom); + break; + } + } + return ReportError(JSMSG_INVALID_PROPERTY_NAME); + } else { + builder->AddCharacter(p); + } break; } case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': { int index = 0; if (ParseBackReferenceIndex(&index)) { - RegExpCapture* capture = nullptr; - if (captures_ != nullptr && index <= (int) captures_->length()) { - capture = (*captures_)[index - 1]; - } - if (capture == nullptr) { - builder->AddEmpty(); - break; + if (state->IsInsideCaptureGroup(index)) { + // The backreference is inside the capture group it refers to. + // Nothing can possibly have been captured yet. + builder->AddEmpty(); + } else { + RegExpCapture* capture = GetCapture(index); + RegExpTree* atom = alloc->newInfallible<RegExpBackReference>(capture); + if (unicode_) + builder->AddAtom(UnicodeBackReferenceAtom(alloc, atom)); + else + builder->AddAtom(atom); } - RegExpTree* atom = alloc->newInfallible<RegExpBackReference>(capture); - if (unicode_) - builder->AddAtom(UnicodeBackReferenceAtom(alloc, atom)); - else - builder->AddAtom(atom); break; } if (unicode_) @@ -1741,45 +2054,28 @@ RegExpParser<CharT>::ParseDisjunction() case 'u': { Advance(2); widechar value; - if (unicode_) { - if (current() == '{') { - if (!ParseBracedHexEscape(&value)) - return nullptr; - if (unicode::IsLeadSurrogate(value)) { - builder->AddAtom(LeadSurrogateAtom(alloc, value)); - } else if (unicode::IsTrailSurrogate(value)) { - builder->AddAtom(TrailSurrogateAtom(alloc, value)); - } else if (value >= unicode::NonBMPMin) { - char16_t lead, trail; - unicode::UTF16Encode(value, &lead, &trail); - builder->AddAtom(SurrogatePairAtom(alloc, lead, trail, - ignore_case_)); - } else { - builder->AddCharacter(value); - } - } else if (ParseHexEscape(4, &value)) { - if (unicode::IsLeadSurrogate(value)) { - widechar trail; - if (ParseTrailSurrogate(&trail)) { - builder->AddAtom(SurrogatePairAtom(alloc, value, trail, - ignore_case_)); - } else { - builder->AddAtom(LeadSurrogateAtom(alloc, value)); - } - } else if (unicode::IsTrailSurrogate(value)) { - builder->AddAtom(TrailSurrogateAtom(alloc, value)); - } else { - builder->AddCharacter(value); - } - } else { - return ReportError(JSMSG_INVALID_UNICODE_ESCAPE); - } - break; + if (ParseUnicodeEscape(&value)) { + builder->AddUnicodeCharacter(value, ignore_case_); + } else if (!unicode_) { + builder->AddCharacter('u'); + } else { + return ReportError(JSMSG_INVALID_UNICODE_ESCAPE); } - if (ParseHexEscape(4, &value)) { - builder->AddCharacter(value); + break; + } + case 'k': { + // Either an identity escape or a named back-reference. The two + // interpretations are mutually exclusive: '\k' is interpreted as + // an identity escape for non-Unicode patterns without named + // capture groups, and as the beginning of a named back-reference + // in all other cases. + if (unicode_ || HasNamedCaptures()) { + Advance(2); + if (!ParseNamedBackReference(builder, state)) { + return ReportError(JSMSG_INVALID_IDENTITY_ESCAPE); + } } else { - builder->AddCharacter('u'); + builder->AddCharacter('k'); } break; } @@ -1911,6 +2207,7 @@ ParsePattern(frontend::TokenStream& ts, LifoAlloc& alloc, const CharT* chars, si data->simple = parser.simple(); data->contains_anchor = parser.contains_anchor(); data->capture_count = parser.captures_started(); + parser.StoreNamedCaptureMap(&data->capture_name_list, &data->capture_index_list); return true; } diff --git a/js/src/irregexp/RegExpParser.h b/js/src/irregexp/RegExpParser.h index 7c6e87e20f..5f3e6c54a2 100644 --- a/js/src/irregexp/RegExpParser.h +++ b/js/src/irregexp/RegExpParser.h @@ -139,6 +139,7 @@ class RegExpBuilder public: explicit RegExpBuilder(LifoAlloc* alloc); void AddCharacter(char16_t character); + void AddUnicodeCharacter(widechar c, bool ignore_case); // "Adds" an empty expression. Does nothing except consume a // following quantifier void AddEmpty(); @@ -168,9 +169,6 @@ class RegExpBuilder #endif }; -// Characters parsed by RegExpParser can be either char16_t or kEndMarker. -typedef uint32_t widechar; - template <typename CharT> class RegExpParser { @@ -198,24 +196,44 @@ class RegExpParser bool ParseHexEscape(int length, widechar* value); bool ParseBracedHexEscape(widechar* value); + bool ParseUnicodeEscape(widechar* value); bool ParseTrailSurrogate(widechar* value); bool ParseRawSurrogatePair(char16_t* lead, char16_t* trail); widechar ParseOctalLiteral(); + // Parse the "{name[=value]}" part of a property class escape. + bool ParsePropertyClassName(std::string& name, std::string& value); + + // Parses the name of a capture group (?<name>pattern). The name must adhere + // to IdentifierName in the ECMAScript standard. + const CharacterVector* ParseCaptureGroupName(); + // Tries to parse the input as a back reference. If successful it // stores the result in the output parameter and returns true. If // it fails it will push back the characters read so the same characters // can be reparsed. bool ParseBackReferenceIndex(int* index_out); - bool ParseClassAtom(char16_t* char_class, widechar *value); + // Parse a thing inside a character class. Either add escaped class to the range and return + // the matched range as |char_class|, or return a single character as |value| + // Unicode ranges can be null if not in Unicode mode + bool ParseClassEscape(char16_t* char_class, widechar *value, + CharacterRangeVector* ranges, + CharacterRangeVector* lead_ranges, + CharacterRangeVector* trail_ranges, + WideCharRangeVector* wide_ranges); RegExpTree* ReportError(unsigned errorNumber, const char* param = nullptr); void Advance(); void Advance(int dist) { next_pos_ += dist - 1; Advance(); } + + bool StoreNamedCaptureMap(CharacterVectorVector** names, IntegerVector** indices); + // Returns true iff the pattern contains named captures. May call + // ScanForCaptures to look ahead at the remaining pattern. + bool HasNamedCaptures(); void Reset(const CharT* pos) { next_pos_ = pos; @@ -228,7 +246,7 @@ class RegExpParser bool simple() { return simple_; } bool contains_anchor() { return contains_anchor_; } void set_contains_anchor() { contains_anchor_ = true; } - int captures_started() { return captures_ == nullptr ? 0 : captures_->length(); } + int captures_started() { return captures_started_; } const CharT* position() { return next_pos_ - 1; } static const int kMaxCaptures = 1 << 16; @@ -238,8 +256,8 @@ class RegExpParser enum SubexpressionType { INITIAL, CAPTURE, // All positive values represent captures. - POSITIVE_LOOKAHEAD, - NEGATIVE_LOOKAHEAD, + POSITIVE_LOOKAROUND, + NEGATIVE_LOOKAROUND, GROUPING }; @@ -248,11 +266,15 @@ class RegExpParser RegExpParserState(LifoAlloc* alloc, RegExpParserState* previous_state, SubexpressionType group_type, - int disjunction_capture_index) + RegExpLookaround::Type lookaround_type, + int disjunction_capture_index, + const CharacterVector* capture_name) : previous_state_(previous_state), builder_(alloc->newInfallible<RegExpBuilder>(alloc)), group_type_(group_type), - disjunction_capture_index_(disjunction_capture_index) + lookaround_type_(lookaround_type), + disjunction_capture_index_(disjunction_capture_index), + capture_name_(capture_name) {} // Parser state of containing expression, if any. RegExpParserState* previous_state() { return previous_state_; } @@ -261,10 +283,21 @@ class RegExpParser RegExpBuilder* builder() { return builder_; } // Type of regexp being parsed (parenthesized group or entire regexp). SubexpressionType group_type() { return group_type_; } + // Lookahead or Lookbehind. + RegExpLookaround::Type lookaround_type() { return lookaround_type_; } // Index in captures array of first capture in this sub-expression, if any. // Also the capture index of this sub-expression itself, if group_type // is CAPTURE. int capture_index() { return disjunction_capture_index_; } + // The name of the current sub-expression, if group_type is CAPTURE. Only + // used for named captures. + const CharacterVector* capture_name() const { return capture_name_; } + bool IsNamedCapture() const { return capture_name_ != nullptr; } + + // Check whether the parser is inside a capture group with the given index. + bool IsInsideCaptureGroup(int index); + // Check whether the parser is inside a capture group with the given name. + bool IsInsideCaptureGroup(const CharacterVector* name); private: // Linked list implementation of stack of states. @@ -273,10 +306,33 @@ class RegExpParser RegExpBuilder* builder_; // Stored disjunction type (capture, look-ahead or grouping), if any. SubexpressionType group_type_; + // Stored read direction. + RegExpLookaround::Type lookaround_type_; // Stored disjunction's capture index (if any). int disjunction_capture_index_; + // Stored capture name (if any). + const CharacterVector* const capture_name_; }; + // Return the 1-indexed RegExpCapture object, allocate if necessary. + RegExpCapture* GetCapture(int index); + + // Creates a new named capture at the specified index. Must be called exactly + // once for each named capture. Fails if a capture with the same name is + // encountered. + bool CreateNamedCaptureAtIndex(const CharacterVector* name, int index); + + // Find a named capture group by name, or return null if not found + RegExpCapture* FindNamedCapture(const CharacterVector* name); + + bool ParseNamedBackReference(RegExpBuilder* builder, + RegExpParserState* state); + + // After the initial parsing pass, patch corresponding RegExpCapture objects + // into all RegExpBackReferences. This is done after initial parsing in order + // to avoid complicating cases in which references comes before the capture. + void PatchNamedBackReferences(); + widechar current() { return current_; } bool has_more() { return has_more_; } bool has_next() { return next_pos_ < end_; } @@ -290,9 +346,13 @@ class RegExpParser frontend::TokenStream& ts; LifoAlloc* alloc; RegExpCaptureVector* captures_; + // contains the subset of captures_ that have names (for duplicate checking) + RegExpCaptureVector* named_captures_; + RegExpBackReferenceVector* named_back_references_; const CharT* next_pos_; const CharT* end_; widechar current_; + int captures_started_; // The capture count is only valid after we have scanned for captures. int capture_count_; bool has_more_; @@ -303,6 +363,7 @@ class RegExpParser bool simple_; bool contains_anchor_; bool is_scanned_for_captures_; + bool has_named_captures_; // Only valid after we have scanned for captures. }; } } // namespace js::irregexp diff --git a/js/src/jit/CodeGenerator.cpp b/js/src/jit/CodeGenerator.cpp index 66e8e25ddf..3f1b7251a3 100644 --- a/js/src/jit/CodeGenerator.cpp +++ b/js/src/jit/CodeGenerator.cpp @@ -1513,6 +1513,16 @@ JitCompartment::generateRegExpMatcherStub(JSContext* cx) return nullptr; } + // If a regexp has named captures, fall back to the OOL stub, which + // will end up calling CreateRegExpMatchResults. + Register shared = temp2; + masm.loadPtr(Address(regexp, NativeObject::getFixedSlotOffset(RegExpObject::PRIVATE_SLOT)), + shared); + masm.branchPtr(Assembler::NotEqual, + Address(shared, RegExpShared::offsetOfGroupsTemplate()), + ImmWord(0), + &oolEntry); + // Construct the result. Register object = temp1; Label matchResultFallback, matchResultJoin; @@ -1523,6 +1533,7 @@ JitCompartment::generateRegExpMatcherStub(JSContext* cx) masm.loadPtr(Address(object, NativeObject::offsetOfSlots()), temp2); masm.storeValue(templateObject->getSlot(0), Address(temp2, 0)); masm.storeValue(templateObject->getSlot(1), Address(temp2, sizeof(Value))); + masm.storeValue(templateObject->getSlot(2), Address(temp2, 2 * sizeof(Value))); size_t elementsOffset = NativeObject::offsetOfFixedElements(); @@ -1636,6 +1647,7 @@ JitCompartment::generateRegExpMatcherStub(JSContext* cx) MOZ_ASSERT(templateObject->numFixedSlots() == 0); MOZ_ASSERT(templateObject->lookupPure(cx->names().index)->slot() == 0); MOZ_ASSERT(templateObject->lookupPure(cx->names().input)->slot() == 1); + MOZ_ASSERT(templateObject->lookupPure(cx->names().groups)->slot() == 2); masm.load32(pairsVectorAddress, temp3); masm.storeValue(JSVAL_TYPE_INT32, temp3, Address(temp2, 0)); diff --git a/js/src/js.msg b/js/src/js.msg index 51854fc398..93d8a557b1 100644 --- a/js/src/js.msg +++ b/js/src/js.msg @@ -513,6 +513,12 @@ MSG_DEF(JSMSG_TOO_MANY_PARENS, 0, JSEXN_INTERNALERR, "too many parenthes MSG_DEF(JSMSG_UNICODE_OVERFLOW, 1, JSEXN_SYNTAXERR, "Unicode codepoint must not be greater than 0x10FFFF in {0}") MSG_DEF(JSMSG_UNMATCHED_RIGHT_PAREN, 0, JSEXN_SYNTAXERR, "unmatched ) in regular expression") MSG_DEF(JSMSG_UNTERM_CLASS, 0, JSEXN_SYNTAXERR, "unterminated character class") +MSG_DEF(JSMSG_INVALID_PROPERTY_NAME, 0, JSEXN_SYNTAXERR, "invalid property name in regular expression") +MSG_DEF(JSMSG_INVALID_CLASS_PROPERTY_NAME, 0, JSEXN_SYNTAXERR, "invalid class property name in regular expression") +MSG_DEF(JSMSG_INVALID_CAPTURE_NAME, 0, JSEXN_SYNTAXERR, "invalid capture group name in regular expression") +MSG_DEF(JSMSG_DUPLICATE_CAPTURE_NAME, 0, JSEXN_SYNTAXERR, "duplicate capture group name in regular expression") +MSG_DEF(JSMSG_INVALID_NAMED_REF, 0, JSEXN_SYNTAXERR, "invalid named reference in regular expression") +MSG_DEF(JSMSG_INVALID_NAMED_CAPTURE_REF, 0, JSEXN_SYNTAXERR, "invalid named capture reference in regular expression") // Self-hosting MSG_DEF(JSMSG_DEFAULT_LOCALE_ERROR, 0, JSEXN_ERR, "internal error getting the default locale") diff --git a/js/src/moz.build b/js/src/moz.build index 642dd7d911..5ac4fcd669 100644 --- a/js/src/moz.build +++ b/js/src/moz.build @@ -153,6 +153,7 @@ UNIFIED_SOURCES += [ 'irregexp/NativeRegExpMacroAssembler.cpp', 'irregexp/RegExpAST.cpp', 'irregexp/RegExpCharacters.cpp', + 'irregexp/RegExpCharRanges.cpp', 'irregexp/RegExpEngine.cpp', 'irregexp/RegExpInterpreter.cpp', 'irregexp/RegExpMacroAssembler.cpp', diff --git a/js/src/vm/CommonPropertyNames.h b/js/src/vm/CommonPropertyNames.h index 5080e6ab09..57ec80669c 100644 --- a/js/src/vm/CommonPropertyNames.h +++ b/js/src/vm/CommonPropertyNames.h @@ -162,6 +162,7 @@ macro(global, global, "global") \ macro(globalThis, globalThis, "globalThis") \ macro(group, group, "group") \ + macro(groups, groups, "groups") \ macro(Handle, Handle, "Handle") \ macro(has, has, "has") \ macro(hasOwn, hasOwn, "hasOwn") \ diff --git a/js/src/vm/RegExpObject.cpp b/js/src/vm/RegExpObject.cpp index 33b97a1174..e96db29edb 100644 --- a/js/src/vm/RegExpObject.cpp +++ b/js/src/vm/RegExpObject.cpp @@ -15,6 +15,7 @@ #include "builtin/RegExp.h" #include "frontend/TokenStream.h" +#include "irregexp/FeatureFlags.h" #ifdef DEBUG #include "irregexp/RegExpBytecode.h" #endif @@ -109,7 +110,7 @@ ScopedMatchPairs::allocOrExpandArray(size_t pairCount) bool VectorMatchPairs::allocOrExpandArray(size_t pairCount) { - if (!vec_.resizeUninitialized(sizeof(MatchPair) * pairCount)) + if (!vec_.resizeUninitialized(pairCount)) return false; pairs_ = &vec_[0]; @@ -950,7 +951,8 @@ js::StringHasRegExpMetaChars(JSLinearString* str) /* RegExpShared */ RegExpShared::RegExpShared(JSAtom* source, RegExpFlag flags) - : source(source), flags(flags), parenCount(0), canStringMatch(false), marked_(false) + : source(source), flags(flags), parenCount(0), canStringMatch(false), marked_(false), + numNamedCaptures_(0), groupsTemplate_(nullptr) {} RegExpShared::~RegExpShared() @@ -1005,6 +1007,56 @@ RegExpShared::compile(JSContext* cx, HandleLinearString input, } bool +RegExpShared::initializeNamedCaptures(JSContext* cx, irregexp::CharacterVectorVector* names, irregexp::IntegerVector* indices) +{ + MOZ_ASSERT(!groupsTemplate_); + MOZ_ASSERT(names); + MOZ_ASSERT(indices); + MOZ_ASSERT(names->length() == indices->length()); + + // The irregexp parser returns named capture information in the form + // of two arrays. We create a template object with a property for each + // capture name, and store the capture index as Integer in the corresponding value. + uint32_t numNamedCaptures = names->length(); + + // Create a plain template object. + RootedPlainObject templateObject(cx, NewObjectWithGivenProto<PlainObject>(cx, nullptr, TenuredObject)); + if (!templateObject) { + return false; + } + + // Create a new group for the template. + Rooted<TaggedProto> proto(cx, templateObject->taggedProto()); + ObjectGroup* group = ObjectGroupCompartment::makeGroup(cx, templateObject->getClass(), proto); + if (!group) { + return false; + } + templateObject->setGroup(group); + + // Initialize the properties of the template. + RootedId id(cx); + for (uint32_t i = 0; i < numNamedCaptures; i++) { + irregexp::CharacterVector* cv = (*names)[i]; + // Need to explicitly create an Atom (not a String) or it won't get added to the atom table + JSAtom* atom = AtomizeChars(cx, cv->begin(), cv->length()); + if (!atom) { + return false; + } + id = NameToId(atom->asPropertyName()); + RootedValue idx(cx, Int32Value((*indices)[i])); + if (!NativeDefineProperty(cx, templateObject, id, idx, + nullptr, nullptr, JSPROP_ENUMERATE)) { + return false; + } + AddTypePropertyId(cx, templateObject, id, TypeSet::Int32Type()); + } + + groupsTemplate_ = templateObject; + numNamedCaptures_ = numNamedCaptures; + return true; +} + +bool RegExpShared::compile(JSContext* cx, HandleAtom pattern, HandleLinearString input, CompilationMode mode, ForceByteCodeEnum force) { @@ -1026,6 +1078,12 @@ RegExpShared::compile(JSContext* cx, HandleAtom pattern, HandleLinearString inpu } this->parenCount = data.capture_count; + if (data.capture_name_list) { + // convert LifoAlloc'd named capture info to NativeObject + if (!initializeNamedCaptures(cx, data.capture_name_list, data.capture_index_list)) { + return false; + } + } irregexp::RegExpCode code = irregexp::CompilePattern(cx, this, &data, input, false /* global() */, @@ -1259,17 +1317,27 @@ RegExpCompartment::createMatchResultTemplateObject(JSContext* cx) return matchResultTemplateObject_; // = nullptr } + /* Set dummy groups property */ + RootedValue groupsVal(cx, UndefinedValue()); + if (!NativeDefineProperty( + cx, templateObject, cx->names().groups, groupsVal, nullptr, nullptr, JSPROP_ENUMERATE)) { + return nullptr; + } + // Make sure that the properties are in the right slots. DebugOnly<Shape*> shape = templateObject->lastProperty(); - MOZ_ASSERT(shape->previous()->slot() == 0 && - shape->previous()->propidRef() == NameToId(cx->names().index)); - MOZ_ASSERT(shape->slot() == 1 && - shape->propidRef() == NameToId(cx->names().input)); + MOZ_ASSERT(shape->slot() == 2 && + shape->propidRef() == NameToId(cx->names().groups)); + MOZ_ASSERT(shape->previous()->slot() == 1 && + shape->previous()->propidRef() == NameToId(cx->names().input)); + MOZ_ASSERT(shape->previous()->previous()->slot() == 0 && + shape->previous()->previous()->propidRef() == NameToId(cx->names().index)); // Make sure type information reflects the indexed properties which might // be added. AddTypePropertyId(cx, templateObject, JSID_VOID, TypeSet::StringType()); AddTypePropertyId(cx, templateObject, JSID_VOID, TypeSet::UndefinedType()); + AddTypePropertyId(cx, templateObject, NameToId(cx->names().groups), TypeSet::AnyObjectType()); matchResultTemplateObject_.set(templateObject); @@ -1484,6 +1552,13 @@ ParseRegExpFlags(const CharT* chars, size_t length, RegExpFlag* flagsOut, char16 if (!HandleRegExpFlag(UnicodeFlag, flagsOut)) return false; break; + case 'v': + if (irregexp::kParseFlagUnicodeSetsAsUnicode) { + if (!HandleRegExpFlag(UnicodeFlag, flagsOut)) + return false; + break; + } + MOZ_FALLTHROUGH default: return false; } diff --git a/js/src/vm/RegExpObject.h b/js/src/vm/RegExpObject.h index ca7a39ec65..17d961eede 100644 --- a/js/src/vm/RegExpObject.h +++ b/js/src/vm/RegExpObject.h @@ -17,6 +17,7 @@ #include "proxy/Proxy.h" #include "vm/ArrayObject.h" #include "vm/Shape.h" +#include "irregexp/InfallibleVector.h" /* * JavaScript Regular Expressions @@ -133,6 +134,9 @@ class RegExpShared bool canStringMatch; bool marked_; + uint32_t numNamedCaptures_; + GCPtr<PlainObject*> groupsTemplate_; + RegExpCompilation compilationArray[4]; static int CompilationIndex(CompilationMode mode, bool latin1) { @@ -187,6 +191,11 @@ class RegExpShared /* Accounts for the "0" (whole match) pair. */ size_t pairCount() const { return getParenCount() + 1; } + // not public due to circular inclusion problems + bool initializeNamedCaptures(JSContext* cx, irregexp::CharacterVectorVector* names, irregexp::IntegerVector* indices); + PlainObject* getGroupsTemplate() { return groupsTemplate_; } + uint32_t numNamedCaptures() const { return numNamedCaptures_; } + JSAtom* getSource() const { return source; } RegExpFlag getFlags() const { return flags; } bool ignoreCase() const { return flags & IgnoreCaseFlag; } @@ -238,6 +247,10 @@ class RegExpShared + offsetof(RegExpCompilation, jitCode); } + static size_t offsetOfGroupsTemplate() { + return offsetof(RegExpShared, groupsTemplate_); + } + size_t sizeOfIncludingThis(mozilla::MallocSizeOf mallocSizeOf); #ifdef DEBUG diff --git a/js/src/vm/SelfHosting.cpp b/js/src/vm/SelfHosting.cpp index 06490f24df..686b2e9c28 100644 --- a/js/src/vm/SelfHosting.cpp +++ b/js/src/vm/SelfHosting.cpp @@ -1697,39 +1697,29 @@ static bool intrinsic_RegExpGetSubstitution(JSContext* cx, unsigned argc, Value* vp) { CallArgs args = CallArgsFromVp(argc, vp); - MOZ_ASSERT(args.length() == 6); - RootedString matched(cx, args[0].toString()); - RootedString string(cx, args[1].toString()); + RootedArrayObject matchResult(cx, &args[0].toObject().as<ArrayObject>()); + + RootedLinearString string(cx, args[1].toString()->ensureLinear(cx)); + if (!string) + return false; int32_t position = int32_t(args[2].toNumber()); MOZ_ASSERT(position >= 0); - RootedObject captures(cx, &args[3].toObject()); -#ifdef DEBUG - bool isArray = false; - MOZ_ALWAYS_TRUE(IsArray(cx, captures, &isArray)); - MOZ_ASSERT(isArray); -#endif - - RootedString replacement(cx, args[4].toString()); + RootedLinearString replacement(cx, args[3].toString()->ensureLinear(cx)); + if (!replacement) + return false; - int32_t firstDollarIndex = int32_t(args[5].toNumber()); + int32_t firstDollarIndex = int32_t(args[4].toNumber()); MOZ_ASSERT(firstDollarIndex >= 0); - RootedLinearString matchedLinear(cx, matched->ensureLinear(cx)); - if (!matchedLinear) - return false; - RootedLinearString stringLinear(cx, string->ensureLinear(cx)); - if (!stringLinear) - return false; - RootedLinearString replacementLinear(cx, replacement->ensureLinear(cx)); - if (!replacementLinear) - return false; + RootedValue namedCaptures(cx, args[5]); + MOZ_ASSERT(namedCaptures.isUndefined() || namedCaptures.isObject()); - return RegExpGetSubstitution(cx, matchedLinear, stringLinear, size_t(position), captures, - replacementLinear, size_t(firstDollarIndex), args.rval()); + return RegExpGetSubstitution(cx, matchResult, string, size_t(position), replacement, + size_t(firstDollarIndex), namedCaptures, args.rval()); } static bool |