summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--js/src/builtin/RegExp.cpp302
-rw-r--r--js/src/builtin/RegExp.h9
-rw-r--r--js/src/builtin/RegExp.js80
-rw-r--r--js/src/builtin/RegExpGlobalReplaceOpt.h.js23
-rw-r--r--js/src/builtin/RegExpLocalReplaceOpt.h.js18
-rw-r--r--js/src/builtin/TestingFunctions.cpp6
-rw-r--r--js/src/frontend/TokenStream.cpp3
-rw-r--r--js/src/irregexp/FeatureFlags.h20
-rw-r--r--js/src/irregexp/InfallibleVector.h103
-rw-r--r--js/src/irregexp/NativeRegExpMacroAssembler.cpp387
-rw-r--r--js/src/irregexp/NativeRegExpMacroAssembler.h15
-rw-r--r--js/src/irregexp/RegExpAST.cpp8
-rw-r--r--js/src/irregexp/RegExpAST.h44
-rw-r--r--js/src/irregexp/RegExpBytecode.h23
-rw-r--r--js/src/irregexp/RegExpCharRanges.cpp2069
-rw-r--r--js/src/irregexp/RegExpCharRanges.h235
-rw-r--r--js/src/irregexp/RegExpEngine.cpp685
-rw-r--r--js/src/irregexp/RegExpEngine.h163
-rw-r--r--js/src/irregexp/RegExpInterpreter.cpp76
-rw-r--r--js/src/irregexp/RegExpMacroAssembler.cpp21
-rw-r--r--js/src/irregexp/RegExpMacroAssembler.h19
-rw-r--r--js/src/irregexp/RegExpParser.cpp1039
-rw-r--r--js/src/irregexp/RegExpParser.h79
-rw-r--r--js/src/jit/CodeGenerator.cpp12
-rw-r--r--js/src/js.msg6
-rw-r--r--js/src/moz.build1
-rw-r--r--js/src/vm/CommonPropertyNames.h1
-rw-r--r--js/src/vm/RegExpObject.cpp87
-rw-r--r--js/src/vm/RegExpObject.h13
-rw-r--r--js/src/vm/SelfHosting.cpp36
30 files changed, 4166 insertions, 1417 deletions
diff --git a/js/src/builtin/RegExp.cpp b/js/src/builtin/RegExp.cpp
index 2456ef065d..9b0e1a7cb6 100644
--- a/js/src/builtin/RegExp.cpp
+++ b/js/src/builtin/RegExp.cpp
@@ -21,6 +21,7 @@
#include "vm/NativeObject-inl.h"
+
using namespace js;
using namespace js::unicode;
@@ -28,12 +29,15 @@ using mozilla::ArrayLength;
using mozilla::CheckedInt;
using mozilla::Maybe;
+using CapturesVector = GCVector<Value, 4>;
+
/*
- * ES 2017 draft rev 6a13789aa9e7c6de4e96b7d3e24d9e6eba6584ad 21.2.5.2.2
- * steps 3, 16-25.
+ * ES 2021 draft 21.2.5.2.2: Steps 16-28
+ * https://tc39.es/ecma262/#sec-regexpbuiltinexec
*/
bool
-js::CreateRegExpMatchResult(JSContext* cx, HandleString input, const MatchPairs& matches,
+js::CreateRegExpMatchResult(JSContext* cx, RegExpShared& re,
+ HandleString input, const MatchPairs& matches,
MutableHandleValue rval)
{
MOZ_ASSERT(input);
@@ -46,6 +50,7 @@ js::CreateRegExpMatchResult(JSContext* cx, HandleString input, const MatchPairs&
* 1..pairCount-1: paren matches
* input: input string
* index: start index for the match
+ * groups: named capture groups for the match
*/
/* Get the templateObject that defines the shape and type of the output object */
@@ -53,15 +58,16 @@ js::CreateRegExpMatchResult(JSContext* cx, HandleString input, const MatchPairs&
if (!templateObject)
return false;
+ // Step 16
size_t numPairs = matches.length();
MOZ_ASSERT(numPairs > 0);
- /* Step 17. */
+ /* Step 18-19. */
RootedArrayObject arr(cx, NewDenseFullyAllocatedArrayWithTemplate(cx, numPairs, templateObject));
if (!arr)
return false;
- /* Steps 22-24.
+ /* Steps 22-23 and 27 a-e
* Store a Value for each pair. */
for (size_t i = 0; i < numPairs; i++) {
const MatchPair& pair = matches[i];
@@ -79,6 +85,40 @@ js::CreateRegExpMatchResult(JSContext* cx, HandleString input, const MatchPairs&
}
}
+ // Step 24 (reordered)
+ RootedPlainObject groups(cx);
+ if (re.numNamedCaptures() > 0) {
+ // construct a new object from the template saved on RegExpShared
+ RootedPlainObject groupsTemplate(cx, re.getGroupsTemplate());
+ groups = NewObjectWithGivenProto<PlainObject>(cx, nullptr);
+ groups->setGroup(groupsTemplate->group());
+
+ // Step 27 f.
+ // The groups template object stores the names of the named captures in the
+ // the order in which they are defined.
+ // Grab the index into the match vector from the template object and define the
+ // corresponding property on the result
+ AutoIdVector keys(cx);
+ if (!GetPropertyKeys(cx, groupsTemplate, 0, &keys)) {
+ return false;
+ }
+ MOZ_ASSERT(keys.length() == re.numNamedCaptures());
+ RootedId key(cx);
+ RootedValue ival(cx);
+ RootedValue val(cx);
+ for (size_t i = 0; i < keys.length(); i++) {
+ key = keys[i];
+ // fetch the group's match index...
+ if (!NativeGetProperty(cx, groupsTemplate, key, &ival))
+ return false;
+ // ... and set it on groups
+ val = arr->getDenseElement(ival.toInt32());
+ if (!NativeDefineProperty(cx, groups, key, val, nullptr, nullptr, JSPROP_ENUMERATE)) {
+ return false;
+ }
+ }
+ }
+
/* Step 20 (reordered).
* Set the |index| property. (TemplateObject positions it in slot 0) */
arr->setSlot(0, Int32Value(matches[0].start));
@@ -87,6 +127,10 @@ js::CreateRegExpMatchResult(JSContext* cx, HandleString input, const MatchPairs&
* Set the |input| property. (TemplateObject positions it in slot 1) */
arr->setSlot(1, StringValue(input));
+ // Steps 25-26 (reordered)
+ // Set the |groups| property.
+ arr->setSlot(2, groups ? ObjectValue(*groups) : UndefinedValue());
+
#ifdef DEBUG
RootedValue test(cx);
RootedId id(cx, NameToId(cx->names().index));
@@ -168,7 +212,7 @@ js::ExecuteRegExpLegacy(JSContext* cx, RegExpStatics* res, Handle<RegExpObject*>
return true;
}
- return CreateRegExpMatchResult(cx, input, matches, rval);
+ return CreateRegExpMatchResult(cx, *shared, input, matches, rval);
}
static bool
@@ -1025,7 +1069,11 @@ RegExpMatcherImpl(JSContext* cx, HandleObject regexp, HandleString string,
}
/* Steps 16-25 */
- return CreateRegExpMatchResult(cx, string, matches, rval);
+ Rooted<RegExpObject*> reobj(cx, &regexp->as<RegExpObject>());
+ RegExpGuard shared(cx);
+ if (!RegExpObject::getShared(cx, reobj, &shared))
+ return false;
+ return CreateRegExpMatchResult(cx, *shared, string, matches, rval);
}
/*
@@ -1067,8 +1115,13 @@ js::RegExpMatcherRaw(JSContext* cx, HandleObject regexp, HandleString input,
// The MatchPairs will always be passed in, but RegExp execution was
// successful only if the pairs have actually been filled in.
- if (maybeMatches && maybeMatches->pairsRaw()[0] >= 0)
- return CreateRegExpMatchResult(cx, input, *maybeMatches, output);
+ if (maybeMatches && maybeMatches->pairsRaw()[0] >= 0) {
+ Rooted<RegExpObject*> reobj(cx, &regexp->as<RegExpObject>());
+ RegExpGuard shared(cx);
+ if (!RegExpObject::getShared(cx, reobj, &shared))
+ return false;
+ return CreateRegExpMatchResult(cx, *shared, input, *maybeMatches, output);
+ }
return RegExpMatcherImpl(cx, regexp, input, lastIndex,
UpdateRegExpStatics, output);
}
@@ -1265,10 +1318,10 @@ GetParen(JSLinearString* matched, const JS::Value& capture, JSSubString* out)
template <typename CharT>
static bool
InterpretDollar(JSLinearString* matched, JSLinearString* string, size_t position, size_t tailPos,
- MutableHandle<GCVector<Value>> captures, JSLinearString* replacement,
+ Handle<CapturesVector> captures, Handle<CapturesVector> namedCaptures, JSLinearString* replacement,
const CharT* replacementBegin, const CharT* currentDollar,
const CharT* replacementEnd,
- JSSubString* out, size_t* skip)
+ JSSubString* out, size_t* skip, uint32_t* currentNamedCapture)
{
MOZ_ASSERT(*currentDollar == '$');
@@ -1310,6 +1363,35 @@ InterpretDollar(JSLinearString* matched, JSLinearString* string, size_t position
return true;
}
+ // '$<': Named Captures
+ if (c == '<') {
+ // Step 1.
+ if (namedCaptures.length() == 0) {
+ return false;
+ }
+
+ // Step 2.b
+ const CharT* nameStart = currentDollar + 2;
+ const CharT* nameEnd = js_strchr_limit(nameStart, '>', replacementEnd);
+
+ // Step 2.c
+ if (!nameEnd) {
+ return false;
+ }
+
+ // Step 2.d
+ // We precompute named capture replacements in InitNamedCaptures.
+ // They are stored in the order in which we will need them, so here
+ // we can just take the next one in the list.
+ size_t nameLength = nameEnd - nameStart;
+ *skip = nameLength + 3; // $<...>
+
+ // Steps 2.d.iii-iv
+ GetParen(matched, namedCaptures[*currentNamedCapture], out);
+ *currentNamedCapture += 1;
+ return true;
+ }
+
*skip = 2;
switch (c) {
default:
@@ -1340,8 +1422,9 @@ InterpretDollar(JSLinearString* matched, JSLinearString* string, size_t position
template <typename CharT>
static bool
FindReplaceLengthString(JSContext* cx, HandleLinearString matched, HandleLinearString string,
- size_t position, size_t tailPos, MutableHandle<GCVector<Value>> captures,
- HandleLinearString replacement, size_t firstDollarIndex, size_t* sizep)
+ size_t position, size_t tailPos, Handle<CapturesVector> captures,
+ Handle<CapturesVector> namedCaptures, HandleLinearString replacement,
+ size_t firstDollarIndex, size_t* sizep)
{
CheckedInt<uint32_t> replen = replacement->length();
@@ -1350,11 +1433,13 @@ FindReplaceLengthString(JSContext* cx, HandleLinearString matched, HandleLinearS
const CharT* replacementBegin = replacement->chars<CharT>(nogc);
const CharT* currentDollar = replacementBegin + firstDollarIndex;
const CharT* replacementEnd = replacementBegin + replacement->length();
+ uint32_t currentNamedCapture = 0;
do {
JSSubString sub;
size_t skip;
- if (InterpretDollar(matched, string, position, tailPos, captures, replacement,
- replacementBegin, currentDollar, replacementEnd, &sub, &skip))
+ if (InterpretDollar(matched, string, position, tailPos, captures, namedCaptures,
+ replacement, replacementBegin, currentDollar, replacementEnd,
+ &sub, &skip, &currentNamedCapture))
{
if (sub.length > skip)
replen += sub.length - skip;
@@ -1379,14 +1464,14 @@ FindReplaceLengthString(JSContext* cx, HandleLinearString matched, HandleLinearS
static bool
FindReplaceLength(JSContext* cx, HandleLinearString matched, HandleLinearString string,
- size_t position, size_t tailPos, MutableHandle<GCVector<Value>> captures,
+ size_t position, size_t tailPos, Handle<CapturesVector> captures, Handle<CapturesVector> namedCaptures,
HandleLinearString replacement, size_t firstDollarIndex, size_t* sizep)
{
return replacement->hasLatin1Chars()
? FindReplaceLengthString<Latin1Char>(cx, matched, string, position, tailPos, captures,
- replacement, firstDollarIndex, sizep)
+ namedCaptures, replacement, firstDollarIndex, sizep)
: FindReplaceLengthString<char16_t>(cx, matched, string, position, tailPos, captures,
- replacement, firstDollarIndex, sizep);
+ namedCaptures, replacement, firstDollarIndex, sizep);
}
/*
@@ -1397,7 +1482,7 @@ FindReplaceLength(JSContext* cx, HandleLinearString matched, HandleLinearString
template <typename CharT>
static void
DoReplace(HandleLinearString matched, HandleLinearString string,
- size_t position, size_t tailPos, MutableHandle<GCVector<Value>> captures,
+ size_t position, size_t tailPos, Handle<CapturesVector> captures, Handle<CapturesVector> namedCaptures,
HandleLinearString replacement, size_t firstDollarIndex, StringBuffer &sb)
{
JS::AutoCheckCannotGC nogc;
@@ -1407,6 +1492,7 @@ DoReplace(HandleLinearString matched, HandleLinearString string,
MOZ_ASSERT(firstDollarIndex < replacement->length());
const CharT* currentDollar = replacementBegin + firstDollarIndex;
const CharT* replacementEnd = replacementBegin + replacement->length();
+ uint32_t currentNamedCapture = 0;
do {
/* Move one of the constant portions of the replacement value. */
size_t len = currentDollar - currentChar;
@@ -1415,8 +1501,8 @@ DoReplace(HandleLinearString matched, HandleLinearString string,
JSSubString sub;
size_t skip;
- if (InterpretDollar(matched, string, position, tailPos, captures, replacement,
- replacementBegin, currentDollar, replacementEnd, &sub, &skip))
+ if (InterpretDollar(matched, string, position, tailPos, captures, namedCaptures, replacement,
+ replacementBegin, currentDollar, replacementEnd, &sub, &skip, &currentNamedCapture))
{
sb.infallibleAppendSubstring(sub.base, sub.offset, sub.length);
currentChar += skip;
@@ -1430,9 +1516,117 @@ DoReplace(HandleLinearString matched, HandleLinearString string,
sb.infallibleAppend(currentChar, replacement->length() - (currentChar - replacementBegin));
}
+/*
+ * This function finds the list of named captures of the form
+ * "$<name>" in a replacement string and converts them into jsids, for
+ * use in InitNamedReplacements.
+ */
+template <typename CharT>
+static bool CollectNames(JSContext* cx, HandleLinearString replacement,
+ size_t firstDollarIndex,
+ MutableHandle<GCVector<jsid>> names) {
+ JS::AutoCheckCannotGC nogc;
+ MOZ_ASSERT(firstDollarIndex < replacement->length());
+
+ const CharT* replacementBegin = replacement->chars<CharT>(nogc);
+ const CharT* currentDollar = replacementBegin + firstDollarIndex;
+ const CharT* replacementEnd = replacementBegin + replacement->length();
+
+ // https://tc39.es/ecma262/#table-45, "$<" section
+ while (currentDollar && currentDollar + 1 < replacementEnd) {
+ if (currentDollar[1] == '<') {
+ // Step 2.b
+ const CharT* nameStart = currentDollar + 2;
+ const CharT* nameEnd = js_strchr_limit(nameStart, '>', replacementEnd);
+
+ // Step 2.c
+ if (!nameEnd) {
+ return true;
+ }
+
+ // Step 2.d.i
+ size_t nameLength = nameEnd - nameStart;
+ JSAtom* atom = AtomizeChars(cx, nameStart, nameLength);
+ if (!atom || !names.append(AtomToId(atom))) {
+ return false;
+ }
+ currentDollar = nameEnd + 1;
+ } else {
+ currentDollar += 2;
+ }
+ currentDollar = js_strchr_limit(currentDollar, '$', replacementEnd);
+ }
+ return true;
+}
+
+/*
+ * When replacing named captures, the spec requires us to perform
+ * `Get(match.groups, name)` for each "$<name>". These `Get`s can be
+ * script-visible; for example, RegExp can be extended with an `exec`
+ * method that wraps `groups` in a proxy. To make sure that we do the
+ * right thing, if a regexp has named captures, we find the named
+ * capture replacements before beginning the actual replacement.
+ * This guarantees that we will call GetProperty once and only once for
+ * each "$<name>" in the replacement string, in the correct order.
+ *
+ * This function precomputes the results of step 2 of the '$<' case
+ * here: https://tc39.es/proposal-regexp-named-groups/#table-45, so
+ * that when we need to access the nth named capture in InterpretDollar,
+ * we can just use the nth value stored in namedCaptures.
+ */
+static bool InitNamedCaptures(JSContext* cx, HandleLinearString replacement,
+ HandleObject groups, size_t firstDollarIndex,
+ MutableHandle<CapturesVector> namedCaptures) {
+ Rooted<GCVector<jsid>> names(cx, GCVector<jsid>(cx));
+ if (replacement->hasLatin1Chars()) {
+ if (!CollectNames<Latin1Char>(cx, replacement, firstDollarIndex, &names)) {
+ return false;
+ }
+ } else {
+ if (!CollectNames<char16_t>(cx, replacement, firstDollarIndex, &names)) {
+ return false;
+ }
+ }
+
+ // https://tc39.es/ecma262/#table-45, "$<" section
+ RootedId id(cx);
+ RootedValue capture(cx);
+ for (uint32_t i = 0; i < names.length(); i++) {
+ // Step 2.d.i
+ id = names[i];
+
+ // Step 2.d.ii
+ if (!GetProperty(cx, groups, groups, id, &capture)) {
+ return false;
+ }
+
+ // Step 2.d.iii
+ if (capture.isUndefined()) {
+ if (!namedCaptures.append(capture)) {
+ return false;
+ }
+ } else {
+ // Step 2.d.iv
+ JSString* str = ToString<CanGC>(cx, capture);
+ if (!str) {
+ return false;
+ }
+ JSLinearString* linear = str->ensureLinear(cx);
+ if (!linear) {
+ return false;
+ }
+ if (!namedCaptures.append(StringValue(linear))) {
+ return false;
+ }
+ }
+ }
+
+ return true;
+}
+
static bool
-NeedTwoBytes(HandleLinearString string, HandleLinearString replacement,
- HandleLinearString matched, Handle<GCVector<Value>> captures)
+NeedTwoBytes(HandleLinearString string, HandleLinearString replacement, HandleLinearString matched,
+ Handle<CapturesVector> captures, Handle<CapturesVector> namedCaptures)
{
if (string->hasTwoByteChars())
return true;
@@ -1449,19 +1643,38 @@ NeedTwoBytes(HandleLinearString string, HandleLinearString replacement,
return true;
}
+ for (size_t i = 0, len = namedCaptures.length(); i < len; i++) {
+ Value capture = namedCaptures[i];
+ if (capture.isUndefined())
+ continue;
+ if (capture.toString()->hasTwoByteChars())
+ return true;
+ }
+
return false;
}
/* ES 2016 draft Mar 25, 2016 21.1.3.14.1. */
bool
-js::RegExpGetSubstitution(JSContext* cx, HandleLinearString matched, HandleLinearString string,
- size_t position, HandleObject capturesObj, HandleLinearString replacement,
- size_t firstDollarIndex, MutableHandleValue rval)
+js::RegExpGetSubstitution(JSContext* cx, HandleArrayObject matchResult, HandleLinearString string,
+ size_t position, HandleLinearString replacement, size_t firstDollarIndex,
+ HandleValue groups, MutableHandleValue rval)
{
MOZ_ASSERT(firstDollarIndex < replacement->length());
// Step 1 (skipped).
+ // Step 10 (reordered).
+ uint32_t matchResultLength = matchResult->length();
+ MOZ_ASSERT(matchResultLength > 0);
+ MOZ_ASSERT(matchResultLength == matchResult->getDenseInitializedLength());
+
+ const Value& matchedValue = matchResult->getDenseElement(0);
+ RootedLinearString matched(cx, matchedValue.toString()->ensureLinear(cx));
+ if (!matched)
+ return false;
+
+
// Step 2.
size_t matchLength = matched->length();
@@ -1470,33 +1683,36 @@ js::RegExpGetSubstitution(JSContext* cx, HandleLinearString matched, HandleLinea
// Step 6.
MOZ_ASSERT(position <= string->length());
- // Step 10 (reordered).
- uint32_t nCaptures;
- if (!GetLengthProperty(cx, capturesObj, &nCaptures))
- return false;
-
- Rooted<GCVector<Value>> captures(cx, GCVector<Value>(cx));
+ uint32_t nCaptures = matchResultLength - 1;
+ Rooted<CapturesVector> captures(cx, CapturesVector(cx));
if (!captures.reserve(nCaptures))
return false;
// Step 7.
- RootedValue capture(cx);
- for (uint32_t i = 0; i < nCaptures; i++) {
- if (!GetElement(cx, capturesObj, capturesObj, i, &capture))
- return false;
+ for (uint32_t i = 1; i <= nCaptures; i++) {
+ const Value& capture = matchResult->getDenseElement(i);
if (capture.isUndefined()) {
captures.infallibleAppend(capture);
continue;
}
- MOZ_ASSERT(capture.isString());
- RootedLinearString captureLinear(cx, capture.toString()->ensureLinear(cx));
+ JSLinearString* captureLinear = capture.toString()->ensureLinear(cx);
if (!captureLinear)
return false;
captures.infallibleAppend(StringValue(captureLinear));
}
+ Rooted<CapturesVector> namedCaptures(cx, CapturesVector(cx));
+ if (groups.isObject()) {
+ RootedObject groupsObj(cx, &groups.toObject());
+ if (!InitNamedCaptures(cx, replacement, groupsObj, firstDollarIndex, &namedCaptures)) {
+ return false;
+ }
+ } else {
+ MOZ_ASSERT(groups.isUndefined());
+ }
+
// Step 8 (skipped).
// Step 9.
@@ -1511,14 +1727,14 @@ js::RegExpGetSubstitution(JSContext* cx, HandleLinearString matched, HandleLinea
// Step 11.
size_t reserveLength;
- if (!FindReplaceLength(cx, matched, string, position, tailPos, &captures, replacement,
- firstDollarIndex, &reserveLength))
+ if (!FindReplaceLength(cx, matched, string, position, tailPos, captures, namedCaptures,
+ replacement, firstDollarIndex, &reserveLength))
{
return false;
}
StringBuffer result(cx);
- if (NeedTwoBytes(string, replacement, matched, captures)) {
+ if (NeedTwoBytes(string, replacement, matched, captures, namedCaptures)) {
if (!result.ensureTwoByteChars())
return false;
}
@@ -1527,10 +1743,10 @@ js::RegExpGetSubstitution(JSContext* cx, HandleLinearString matched, HandleLinea
return false;
if (replacement->hasLatin1Chars()) {
- DoReplace<Latin1Char>(matched, string, position, tailPos, &captures,
+ DoReplace<Latin1Char>(matched, string, position, tailPos, captures, namedCaptures,
replacement, firstDollarIndex, result);
} else {
- DoReplace<char16_t>(matched, string, position, tailPos, &captures,
+ DoReplace<char16_t>(matched, string, position, tailPos, captures, namedCaptures,
replacement, firstDollarIndex, result);
}
diff --git a/js/src/builtin/RegExp.h b/js/src/builtin/RegExp.h
index 275efd7ce3..f66c9b1b81 100644
--- a/js/src/builtin/RegExp.h
+++ b/js/src/builtin/RegExp.h
@@ -36,7 +36,8 @@ ExecuteRegExpLegacy(JSContext* cx, RegExpStatics* res, Handle<RegExpObject*> reo
/* Translation from MatchPairs to a JS array in regexp_exec()'s output format. */
MOZ_MUST_USE bool
-CreateRegExpMatchResult(JSContext* cx, HandleString input, const MatchPairs& matches,
+CreateRegExpMatchResult(JSContext* cx, RegExpShared& re,
+ HandleString input, const MatchPairs& matches,
MutableHandleValue rval);
extern MOZ_MUST_USE bool
@@ -121,9 +122,9 @@ extern MOZ_MUST_USE bool
RegExpInstanceOptimizableRaw(JSContext* cx, JSObject* obj, JSObject* proto);
extern MOZ_MUST_USE bool
-RegExpGetSubstitution(JSContext* cx, HandleLinearString matched, HandleLinearString string,
- size_t position, HandleObject capturesObj, HandleLinearString replacement,
- size_t firstDollarIndex, MutableHandleValue rval);
+RegExpGetSubstitution(JSContext* cx, HandleArrayObject matchResult, HandleLinearString string,
+ size_t position, HandleLinearString replacement, size_t firstDollarIndex,
+ HandleValue namedCaptures, MutableHandleValue rval);
extern MOZ_MUST_USE bool
GetFirstDollarIndex(JSContext* cx, unsigned argc, Value* vp);
diff --git a/js/src/builtin/RegExp.js b/js/src/builtin/RegExp.js
index 879375b988..ab4d76f4ca 100644
--- a/js/src/builtin/RegExp.js
+++ b/js/src/builtin/RegExp.js
@@ -395,9 +395,8 @@ function RegExpReplaceSlowPath(rx, S, lengthS, replaceValue,
var n, capN, replacement;
if (functionalReplace || firstDollarIndex !== -1) {
- // Steps 14.g-j.
+ // Steps 14.g-k.
replacement = RegExpGetComplexReplacement(result, matched, S, position,
-
nCaptures, replaceValue,
functionalReplace, firstDollarIndex);
} else {
@@ -411,16 +410,21 @@ function RegExpReplaceSlowPath(rx, S, lengthS, replaceValue,
if (capN !== undefined)
ToString(capN);
}
+ // Step 14.j, 14.l., GetSubstitution Step 11.
+ // We don't need namedCaptures, but ToObject is visible to script.
+ var namedCaptures = result.groups;
+ if (namedCaptures !== undefined)
+ ToObject(namedCaptures);
replacement = replaceValue;
}
- // Step 14.l.
+ // Step 14.m.
if (position >= nextSourcePosition) {
- // Step 14.l.ii.
+ // Step 14.m.ii.
accumulatedResult += Substring(S, nextSourcePosition,
position - nextSourcePosition) + replacement;
- // Step 14.l.iii.
+ // Step 14.m.iii.
nextSourcePosition = position + matchLength;
}
}
@@ -433,15 +437,14 @@ function RegExpReplaceSlowPath(rx, S, lengthS, replaceValue,
return accumulatedResult + Substring(S, nextSourcePosition, lengthS - nextSourcePosition);
}
-// ES 2017 draft rev 03bfda119d060aca4099d2b77cf43f6d4f11cfa2 21.2.5.8
-// steps 14.g-k.
+// ES 2021 draft 21.2.5.10
+// https://tc39.es/ecma262/#sec-regexp.prototype-@@replace
+// steps 14.g-l.
// Calculates functional/substitution replaceement from match result.
// Used in the following functions:
// * RegExpGlobalReplaceOptFunc
// * RegExpGlobalReplaceOptElemBase
-// * RegExpGlobalReplaceOptSubst
// * RegExpLocalReplaceOptFunc
-// * RegExpLocalReplaceOptSubst
// * RegExpReplaceSlowPath
function RegExpGetComplexReplacement(result, matched, S, position,
nCaptures, replaceValue,
@@ -451,13 +454,8 @@ function RegExpGetComplexReplacement(result, matched, S, position,
var captures = [];
var capturesLength = 0;
- // Step 14.j.i (reordered).
- // For `nCaptures` <= 4 case, call `replaceValue` directly, otherwise
- // use `std_Function_apply` with all arguments stored in `captures`.
- // In latter case, store `matched` as the first element here, to
- // avoid unshift later.
- if (functionalReplace && nCaptures > 4)
- _DefineDataProperty(captures, capturesLength++, matched);
+ // Step 14.k.i (reordered).
+ _DefineDataProperty(captures, capturesLength++, matched);
// Step 14.g, 14.i, 14.i.iv.
for (var n = 1; n <= nCaptures; n++) {
@@ -473,29 +471,41 @@ function RegExpGetComplexReplacement(result, matched, S, position,
}
// Step 14.j.
+ var namedCaptures = result.groups;
+
+ // Step 14.k.
if (functionalReplace) {
- switch (nCaptures) {
- case 0:
- return ToString(replaceValue(matched, position, S));
- case 1:
- return ToString(replaceValue(matched, SPREAD(captures, 1), position, S));
- case 2:
- return ToString(replaceValue(matched, SPREAD(captures, 2), position, S));
- case 3:
- return ToString(replaceValue(matched, SPREAD(captures, 3), position, S));
- case 4:
- return ToString(replaceValue(matched, SPREAD(captures, 4), position, S));
- default:
- // Steps 14.j.ii-v.
- _DefineDataProperty(captures, capturesLength++, position);
- _DefineDataProperty(captures, capturesLength++, S);
- return ToString(callFunction(std_Function_apply, replaceValue, null, captures));
+ // For `nCaptures` <= 4 case, call `replaceValue` directly, otherwise
+ // use `std_Function_apply` with all arguments stored in `captures`.
+ if (namedCaptures === undefined) {
+ switch (nCaptures) {
+ case 0:
+ return ToString(replaceValue(SPREAD(captures, 1), position, S));
+ case 1:
+ return ToString(replaceValue(SPREAD(captures, 2), position, S));
+ case 2:
+ return ToString(replaceValue(SPREAD(captures, 3), position, S));
+ case 3:
+ return ToString(replaceValue(SPREAD(captures, 4), position, S));
+ case 4:
+ return ToString(replaceValue(SPREAD(captures, 5), position, S));
+ }
+ }
+ // Steps 14.k.ii-v.
+ _DefineDataProperty(captures, capturesLength++, position);
+ _DefineDataProperty(captures, capturesLength++, S);
+ if (namedCaptures !== undefined) {
+ _DefineDataProperty(captures, capturesLength++, namedCaptures);
}
+ return ToString(callFunction(std_Function_apply, replaceValue, undefined, captures));
}
- // Steps 14.k.i.
- return RegExpGetSubstitution(matched, S, position, captures, replaceValue,
- firstDollarIndex);
+ // Step 14.l.
+ if (namedCaptures !== undefined) {
+ namedCaptures = ToObject(namedCaptures);
+ }
+ return RegExpGetSubstitution(captures, S, position, replaceValue, firstDollarIndex,
+ namedCaptures);
}
// ES 2017 draft rev 03bfda119d060aca4099d2b77cf43f6d4f11cfa2 21.2.5.8
diff --git a/js/src/builtin/RegExpGlobalReplaceOpt.h.js b/js/src/builtin/RegExpGlobalReplaceOpt.h.js
index fbe50a3f9c..8b82fc31d4 100644
--- a/js/src/builtin/RegExpGlobalReplaceOpt.h.js
+++ b/js/src/builtin/RegExpGlobalReplaceOpt.h.js
@@ -53,7 +53,7 @@ function FUNC_NAME(rx, S, lengthS, replaceValue, fullUnicode
break;
var nCaptures;
-#if defined(FUNCTIONAL) || defined(SUBSTITUTION)
+#if defined(FUNCTIONAL)
// Steps 14.a-b.
nCaptures = std_Math_max(result.length - 1, 0);
#endif
@@ -68,18 +68,19 @@ function FUNC_NAME(rx, S, lengthS, replaceValue, fullUnicode
var position = result.index;
lastIndex = position + matchLength;
- // Steps g-j.
+ // Steps g-l.
var replacement;
#if defined(FUNCTIONAL)
replacement = RegExpGetComplexReplacement(result, matched, S, position,
-
nCaptures, replaceValue,
true, -1);
-#elif defined(SUBSTITUTION)
- replacement = RegExpGetComplexReplacement(result, matched, S, position,
-
- nCaptures, replaceValue,
- false, firstDollarIndex);
+#elif defined(SUBSTITUTION) // Step l.i
+ var namedCaptures = result.groups;
+ if (namedCaptures !== undefined) {
+ namedCaptures = ToObject(namedCaptures);
+ }
+ // Step l.ii
+ replacement = RegExpGetSubstitution(result, S, position, replaceValue, firstDollarIndex, namedCaptures);
#elif defined(ELEMBASE)
if (IsObject(elemBase)) {
var prop = GetStringDataProperty(elemBase, matched);
@@ -96,7 +97,6 @@ function FUNC_NAME(rx, S, lengthS, replaceValue, fullUnicode
nCaptures = std_Math_max(result.length - 1, 0);
replacement = RegExpGetComplexReplacement(result, matched, S, position,
-
nCaptures, replaceValue,
true, -1);
}
@@ -104,11 +104,11 @@ function FUNC_NAME(rx, S, lengthS, replaceValue, fullUnicode
replacement = replaceValue;
#endif
- // Step 14.l.ii.
+ // Step 14.m.ii.
accumulatedResult += Substring(S, nextSourcePosition,
position - nextSourcePosition) + replacement;
- // Step 14.l.iii.
+ // Step 14.m.iii.
nextSourcePosition = lastIndex;
// Step 11.c.iii.2.
@@ -116,6 +116,7 @@ function FUNC_NAME(rx, S, lengthS, replaceValue, fullUnicode
lastIndex = fullUnicode ? AdvanceStringIndex(S, lastIndex) : lastIndex + 1;
if (lastIndex > lengthS)
break;
+ lastIndex |= 0;
}
}
diff --git a/js/src/builtin/RegExpLocalReplaceOpt.h.js b/js/src/builtin/RegExpLocalReplaceOpt.h.js
index 1acd6a73a4..ac74d17ada 100644
--- a/js/src/builtin/RegExpLocalReplaceOpt.h.js
+++ b/js/src/builtin/RegExpLocalReplaceOpt.h.js
@@ -60,9 +60,9 @@ function FUNC_NAME(rx, S, lengthS, replaceValue
return S;
}
- // Steps 11.c, 12-13, 14.a-b (skipped).
+ // Steps 11.c, 12-13 (skipped).
-#if defined(FUNCTIONAL) || defined(SUBSTITUTION)
+#if defined(FUNCTIONAL)
// Steps 14.a-b.
var nCaptures = std_Math_max(result.length - 1, 0);
#endif
@@ -88,19 +88,21 @@ function FUNC_NAME(rx, S, lengthS, replaceValue
// Steps g-j.
#if defined(FUNCTIONAL)
replacement = RegExpGetComplexReplacement(result, matched, S, position,
-
nCaptures, replaceValue,
true, -1);
#elif defined(SUBSTITUTION)
- replacement = RegExpGetComplexReplacement(result, matched, S, position,
-
- nCaptures, replaceValue,
- false, firstDollarIndex);
+ // Step l.i
+ var namedCaptures = result.groups;
+ if (namedCaptures !== undefined) {
+ namedCaptures = ToObject(namedCaptures);
+ }
+ // Step l.ii
+ replacement = RegExpGetSubstitution(result, S, position, replaceValue, firstDollarIndex, namedCaptures);
#else
replacement = replaceValue;
#endif
- // Step 14.l.ii.
+ // Step 14.m.ii.
var accumulatedResult = Substring(S, 0, position) + replacement;
// Step 15.
diff --git a/js/src/builtin/TestingFunctions.cpp b/js/src/builtin/TestingFunctions.cpp
index 8bcae4d826..cb691893f2 100644
--- a/js/src/builtin/TestingFunctions.cpp
+++ b/js/src/builtin/TestingFunctions.cpp
@@ -3827,10 +3827,10 @@ ConvertRegExpTreeToObject(JSContext* cx, irregexp::RegExpTree* tree)
return nullptr;
return obj;
}
- if (tree->IsLookahead()) {
- if (!StringProp(cx, obj, "type", "Lookahead"))
+ if (tree->IsLookaround()) {
+ if (!StringProp(cx, obj, "type", "Lookaround"))
return nullptr;
- irregexp::RegExpLookahead* t = tree->AsLookahead();
+ irregexp::RegExpLookaround* t = tree->AsLookaround();
if (!BooleanProp(cx, obj, "is_positive", t->is_positive()))
return nullptr;
if (!TreeProp(cx, obj, "body", t->body()))
diff --git a/js/src/frontend/TokenStream.cpp b/js/src/frontend/TokenStream.cpp
index b464b23048..2539249ad9 100644
--- a/js/src/frontend/TokenStream.cpp
+++ b/js/src/frontend/TokenStream.cpp
@@ -24,6 +24,7 @@
#include "frontend/BytecodeCompiler.h"
#include "frontend/ReservedWords.h"
+#include "irregexp/FeatureFlags.h"
#include "js/CharacterEncoding.h"
#include "js/UniquePtr.h"
#include "vm/HelperThreads.h"
@@ -1942,6 +1943,8 @@ TokenStream::getTokenInternal(TokenKind* ttp, Modifier modifier)
reflags = RegExpFlag(reflags | UnicodeFlag);
else if (c == 's' && !(reflags & DotAllFlag))
reflags = RegExpFlag(reflags | DotAllFlag);
+ else if (c == 'v' && irregexp::kParseFlagUnicodeSetsAsUnicode && !(reflags & UnicodeFlag))
+ reflags = RegExpFlag(reflags | UnicodeFlag);
else
break;
getChar();
diff --git a/js/src/irregexp/FeatureFlags.h b/js/src/irregexp/FeatureFlags.h
new file mode 100644
index 0000000000..1e0178b926
--- /dev/null
+++ b/js/src/irregexp/FeatureFlags.h
@@ -0,0 +1,20 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef irregexp_FeatureFlags_h
+#define irregexp_FeatureFlags_h
+
+namespace js {
+
+namespace irregexp {
+
+// Feature flag to treat /../v as /../u (https://v8.dev/features/regexp-v-flag)
+// We don't support Set Notation or the changed Case Insenstive handling
+// but we have Property Sequences and want them in unit test runs.
+static const bool kParseFlagUnicodeSetsAsUnicode = false;
+
+} } // namespace js::irregexp
+
+#endif // irregexp_FeatureFlags_h
diff --git a/js/src/irregexp/InfallibleVector.h b/js/src/irregexp/InfallibleVector.h
new file mode 100644
index 0000000000..7363ecb1e1
--- /dev/null
+++ b/js/src/irregexp/InfallibleVector.h
@@ -0,0 +1,103 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+
+// Copyright 2012 the V8 project authors. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef V8_INFALLIBLEVECTOR_H_
+#define V8_INFALLIBLEVECTOR_H_
+
+namespace js {
+namespace irregexp {
+
+// InfallibleVector is like Vector, but all its methods are infallible (they
+// crash on OOM). We use this class instead of Vector to avoid a ton of
+// MOZ_MUST_USE warnings in irregexp code (imported from V8).
+template<typename T, size_t N>
+class InfallibleVector
+{
+ Vector<T, N, LifoAllocPolicy<Infallible>> vector_;
+
+ InfallibleVector(const InfallibleVector&) = delete;
+ void operator=(const InfallibleVector&) = delete;
+
+ public:
+ explicit InfallibleVector(const LifoAllocPolicy<Infallible>& alloc) : vector_(alloc) {}
+
+ void append(const T& t) { MOZ_ALWAYS_TRUE(vector_.append(t)); }
+ void append(const T* begin, size_t length) { MOZ_ALWAYS_TRUE(vector_.append(begin, length)); }
+
+ // Move a number of elements in a zonelist to another position
+ // in the same list. Handles overlapping source and target areas.
+ void moveReplace(int from, int to, int count)
+ {
+ T* array = begin();
+ if (from < to) {
+ for (int i = count - 1; i >= 0; i--)
+ array[to + i] = array[from + i];
+ } else {
+ for (int i = 0; i < count; i++)
+ array[to + i] = array[from + i];
+ }
+ }
+
+ void clear() { vector_.clear(); }
+ void popBack() { vector_.popBack(); }
+ void reserve(size_t n) { MOZ_ALWAYS_TRUE(vector_.reserve(n)); }
+
+
+ size_t length() const { return vector_.length(); }
+ T popCopy() { return vector_.popCopy(); }
+
+ T* begin() { return vector_.begin(); }
+ const T* begin() const { return vector_.begin(); }
+
+ T* end() { return vector_.end(); }
+ const T* end() const { return vector_.end(); }
+
+ T& operator[](size_t index) { return vector_[index]; }
+ const T& operator[](size_t index) const { return vector_[index]; }
+
+ InfallibleVector& operator=(InfallibleVector&& rhs) { vector_ = Move(rhs.vector_); return *this; }
+
+ bool equals(const InfallibleVector& other) const {
+ if (length() != other.length()) {
+ return false;
+ }
+ return 0 == memcmp(begin(), other.begin(), length() * sizeof(T));
+ }
+ inline bool operator==(const InfallibleVector& rhs) const {
+ return equals(rhs);
+ }
+};
+
+typedef InfallibleVector<char16_t, 10> CharacterVector;
+typedef InfallibleVector<CharacterVector*, 1> CharacterVectorVector;
+typedef InfallibleVector<int32_t, 10> IntegerVector;
+
+} } // namespace js::irregexp
+
+#endif // V8_INFALLIBLEVECTOR_H_ \ No newline at end of file
diff --git a/js/src/irregexp/NativeRegExpMacroAssembler.cpp b/js/src/irregexp/NativeRegExpMacroAssembler.cpp
index a3756f5fff..41c1951bc2 100644
--- a/js/src/irregexp/NativeRegExpMacroAssembler.cpp
+++ b/js/src/irregexp/NativeRegExpMacroAssembler.cpp
@@ -71,13 +71,13 @@ NativeRegExpMacroAssembler::NativeRegExpMacroAssembler(LifoAlloc* alloc, RegExpS
// Find physical registers for each compiler register.
AllocatableGeneralRegisterSet regs(GeneralRegisterSet::All());
+ temp0 = regs.takeAny();
+ temp1 = regs.takeAny();
+ temp2 = regs.takeAny();
input_end_pointer = regs.takeAny();
current_character = regs.takeAny();
current_position = regs.takeAny();
backtrack_stack_pointer = regs.takeAny();
- temp0 = regs.takeAny();
- temp1 = regs.takeAny();
- temp2 = regs.takeAny();
JitSpew(JitSpew_Codegen,
"Starting RegExp (input_end_pointer %s) (current_character %s)"
@@ -548,39 +548,28 @@ NativeRegExpMacroAssembler::Bind(Label* label)
}
void
-NativeRegExpMacroAssembler::CheckAtStart(Label* on_at_start)
-{
- JitSpew(SPEW_PREFIX "CheckAtStart");
-
- Label not_at_start;
-
- // Did we start the match at the start of the string at all?
- Address startIndex(masm.getStackPointer(), offsetof(FrameData, startIndex));
- masm.branchPtr(Assembler::NotEqual, startIndex, ImmWord(0), &not_at_start);
-
- // If we did, are we still at the start of the input?
- masm.computeEffectiveAddress(BaseIndex(input_end_pointer, current_position, TimesOne), temp0);
+NativeRegExpMacroAssembler::CheckAtStartImpl(int cp_offset, Label* on_cond,
+ Assembler::Condition cond) {
+ masm.computeEffectiveAddress(BaseIndex(input_end_pointer, current_position, TimesOne, cp_offset * char_size()), temp0);
Address inputStart(masm.getStackPointer(), offsetof(FrameData, inputStart));
- masm.branchPtr(Assembler::Equal, inputStart, temp0, BranchOrBacktrack(on_at_start));
+ masm.branchPtr(cond, inputStart, temp0, BranchOrBacktrack(on_cond));
+}
+
+void
+NativeRegExpMacroAssembler::CheckAtStart(int cp_offset, Label* on_at_start)
+{
+ JitSpew(SPEW_PREFIX "CheckAtStart");
- masm.bind(&not_at_start);
+ CheckAtStartImpl(cp_offset, on_at_start, Assembler::Equal);
}
void
-NativeRegExpMacroAssembler::CheckNotAtStart(Label* on_not_at_start)
+NativeRegExpMacroAssembler::CheckNotAtStart(int cp_offset, Label* on_not_at_start)
{
JitSpew(SPEW_PREFIX "CheckNotAtStart");
- // Did we start the match at the start of the string at all?
- Address startIndex(masm.getStackPointer(), offsetof(FrameData, startIndex));
- masm.branchPtr(Assembler::NotEqual, startIndex, ImmWord(0), BranchOrBacktrack(on_not_at_start));
-
- // If we did, are we still at the start of the input?
- masm.computeEffectiveAddress(BaseIndex(input_end_pointer, current_position, TimesOne), temp0);
-
- Address inputStart(masm.getStackPointer(), offsetof(FrameData, inputStart));
- masm.branchPtr(Assembler::NotEqual, inputStart, temp0, BranchOrBacktrack(on_not_at_start));
+ CheckAtStartImpl(cp_offset, on_not_at_start, Assembler::NotEqual);
}
void
@@ -659,211 +648,204 @@ NativeRegExpMacroAssembler::CheckGreedyLoop(Label* on_tos_equals_current_positio
}
void
-NativeRegExpMacroAssembler::CheckNotBackReference(int start_reg, Label* on_no_match)
+NativeRegExpMacroAssembler::CheckNotBackReferenceImpl(int start_reg, bool read_backward,
+ Label* on_no_match,
+ bool unicode, bool ignore_case)
{
- JitSpew(SPEW_PREFIX "CheckNotBackReference(%d)", start_reg);
-
Label fallthrough;
- Label success;
- Label fail;
-
- // Find length of back-referenced capture.
- masm.loadPtr(register_location(start_reg), current_character);
- masm.loadPtr(register_location(start_reg + 1), temp0);
- masm.subPtr(current_character, temp0); // Length to check.
- // Fail on partial or illegal capture (start of capture after end of capture).
- masm.branchPtr(Assembler::LessThan, temp0, ImmWord(0), BranchOrBacktrack(on_no_match));
+ // Captures are stored as a sequential pair of registers.
+ // Find the length of the back-referenced capture and load the
+ // capture's start index into current_character_
+ masm.loadPtr(register_location(start_reg), current_character); // Index of start of capture
+ masm.loadPtr(register_location(start_reg + 1), temp0); // Index of end of capture
+ masm.subPtr(current_character, temp0); // Length of capture.
- // Succeed on empty capture (including no capture).
+ // If length is zero, either the capture is empty or it is completely
+ // uncaptured. In either case succeed immediately.
masm.branchPtr(Assembler::Equal, temp0, ImmWord(0), &fallthrough);
// Check that there are sufficient characters left in the input.
- masm.movePtr(current_position, temp1);
- masm.addPtr(temp0, temp1);
- masm.branchPtr(Assembler::GreaterThan, temp1, ImmWord(0), BranchOrBacktrack(on_no_match));
-
- // Save register to make it available below.
- masm.push(backtrack_stack_pointer);
-
- // Compute pointers to match string and capture string
- masm.computeEffectiveAddress(BaseIndex(input_end_pointer, current_position, TimesOne), temp1); // Start of match.
- masm.addPtr(input_end_pointer, current_character); // Start of capture.
- masm.computeEffectiveAddress(BaseIndex(temp0, temp1, TimesOne), backtrack_stack_pointer); // End of match.
-
- Label loop;
- masm.bind(&loop);
- if (mode_ == ASCII) {
- masm.load8ZeroExtend(Address(current_character, 0), temp0);
- masm.load8ZeroExtend(Address(temp1, 0), temp2);
+ if (read_backward) {
+ // If start + len > current, there isn't enough room for a
+ // lookbehind backreference.
+ Address inputStart(masm.getStackPointer(), offsetof(FrameData, inputStart));
+ masm.loadPtr(inputStart, temp1);
+ masm.subPtr(input_end_pointer, temp1);
+ masm.addPtr(temp0, temp1);
+ masm.branchPtr(Assembler::GreaterThan, temp1, current_position,
+ BranchOrBacktrack(on_no_match));
} else {
- MOZ_ASSERT(mode_ == CHAR16);
- masm.load16ZeroExtend(Address(current_character, 0), temp0);
- masm.load16ZeroExtend(Address(temp1, 0), temp2);
+ // current_position is the negative offset from the end.
+ // If current + len > 0, there isn't enough room for a backreference.
+ masm.movePtr(current_position, temp1);
+ masm.addPtr(temp0, temp1);
+ masm.branchPtr(Assembler::GreaterThan, temp1, ImmWord(0),
+ BranchOrBacktrack(on_no_match));
}
- masm.branch32(Assembler::NotEqual, temp0, temp2, &fail);
- // Increment pointers into capture and match string.
- masm.addPtr(Imm32(char_size()), current_character);
- masm.addPtr(Imm32(char_size()), temp1);
-
- // Check if we have reached end of match area.
- masm.branchPtr(Assembler::Below, temp1, backtrack_stack_pointer, &loop);
- masm.jump(&success);
-
- masm.bind(&fail);
-
- // Restore backtrack stack pointer.
- masm.pop(backtrack_stack_pointer);
- JumpOrBacktrack(on_no_match);
-
- masm.bind(&success);
-
- // Move current character position to position after match.
- masm.movePtr(backtrack_stack_pointer, current_position);
- masm.subPtr(input_end_pointer, current_position);
-
- // Restore backtrack stack pointer.
- masm.pop(backtrack_stack_pointer);
-
- masm.bind(&fallthrough);
-}
-
-void
-NativeRegExpMacroAssembler::CheckNotBackReferenceIgnoreCase(int start_reg, Label* on_no_match,
- bool unicode)
-{
- JitSpew(SPEW_PREFIX "CheckNotBackReferenceIgnoreCase(%d, %d)", start_reg, unicode);
-
- Label fallthrough;
+ if (mode_ == CHAR16 && ignore_case) {
+ // We call a helper function for case-insensitive non-latin1 strings.
+ // Save volatile regs. temp1, temp2, and current_character
+ // don't need to be saved. current_position needs to be saved
+ // even if it's non-volatile, because we modify it to use as an argument.
+ LiveGeneralRegisterSet volatileRegs(GeneralRegisterSet::Volatile());
+ volatileRegs.addUnchecked(current_position);
+ volatileRegs.takeUnchecked(temp1);
+ volatileRegs.takeUnchecked(temp2);
+ volatileRegs.takeUnchecked(current_character);
+ masm.PushRegsInMask(volatileRegs);
- masm.loadPtr(register_location(start_reg), current_character); // Index of start of capture
- masm.loadPtr(register_location(start_reg + 1), temp1); // Index of end of capture
- masm.subPtr(current_character, temp1); // Length of capture.
+ // Parameters are
+ // Address byte_offset1 - Address captured substring's start.
+ // Address byte_offset2 - Address of current character position.
+ // size_t byte_length - length of capture in bytes(!)
- // The length of a capture should not be negative. This can only happen
- // if the end of the capture is unrecorded, or at a point earlier than
- // the start of the capture.
- masm.branchPtr(Assembler::LessThan, temp1, ImmWord(0), BranchOrBacktrack(on_no_match));
+ // Set byte_offset1.
+ // Start of capture, where current_character already holds string-end negative offset.
+ masm.addPtr(input_end_pointer, current_character);
- // If length is zero, either the capture is empty or it is completely
- // uncaptured. In either case succeed immediately.
- masm.branchPtr(Assembler::Equal, temp1, ImmWord(0), &fallthrough);
+ // Set byte_offset2.
+ // Found by adding negative string-end offset of current position
+ // to end of string.
+ masm.addPtr(input_end_pointer, current_position);
+ if (read_backward) {
+ // Offset by length when matching backwards.
+ masm.subPtr(temp1, current_position);
+ }
- // Check that there are sufficient characters left in the input.
- masm.movePtr(current_position, temp0);
- masm.addPtr(temp1, temp0);
- masm.branchPtr(Assembler::GreaterThan, temp0, ImmWord(0), BranchOrBacktrack(on_no_match));
+ masm.setupUnalignedABICall(temp1);
+ masm.passABIArg(current_character);
+ masm.passABIArg(current_position);
+ masm.passABIArg(temp0);
+ if (unicode) {
+ int (*fun)(const char16_t*, const char16_t*, size_t) = CaseInsensitiveCompareUCStrings;
+ masm.callWithABI(JS_FUNC_TO_DATA_PTR(void*, fun));
+ } else {
+ int (*fun)(const char16_t*, const char16_t*, size_t) = CaseInsensitiveCompareStrings;
+ masm.callWithABI(JS_FUNC_TO_DATA_PTR(void*, fun));
+ }
+ masm.storeCallInt32Result(temp1);
+ masm.PopRegsInMask(volatileRegs);
+ // Check if function returned non-zero for success or zero for failure.
+ masm.branchTest32(Assembler::Zero, temp1, temp1, BranchOrBacktrack(on_no_match));
- if (mode_ == ASCII) {
- Label success, fail;
+ // On success, advance position by length of capture
+ if (read_backward) {
+ masm.subPtr(temp0, current_position);
+ } else {
+ masm.addPtr(temp0, current_position);
+ }
+ } else {
+ MOZ_ASSERT(mode_ == ASCII || !ignore_case);
// Save register contents to make the registers available below. After
- // this, the temp0, temp2, and current_position registers are available.
+ // this, the temp1, temp2, and current_position registers are available.
masm.push(current_position);
+ // Make offset values into pointers
masm.addPtr(input_end_pointer, current_character); // Start of capture.
masm.addPtr(input_end_pointer, current_position); // Start of text to match against capture.
- masm.addPtr(current_position, temp1); // End of text to match against capture.
-
- Label loop, loop_increment;
- masm.bind(&loop);
- masm.load8ZeroExtend(Address(current_position, 0), temp0);
- masm.load8ZeroExtend(Address(current_character, 0), temp2);
- masm.branch32(Assembler::Equal, temp0, temp2, &loop_increment);
-
- // Mismatch, try case-insensitive match (converting letters to lower-case).
- masm.or32(Imm32(0x20), temp0); // Convert match character to lower-case.
-
- // Is temp0 a lowercase letter?
- Label convert_capture;
- masm.computeEffectiveAddress(Address(temp0, -'a'), temp2);
- masm.branch32(Assembler::BelowOrEqual, temp2, Imm32(static_cast<int32_t>('z' - 'a')),
- &convert_capture);
- // Latin-1: Check for values in range [224,254] but not 247.
- masm.sub32(Imm32(224 - 'a'), temp2);
- masm.branch32(Assembler::Above, temp2, Imm32(254 - 224), &fail);
-
- // Check for 247.
- masm.branch32(Assembler::Equal, temp2, Imm32(247 - 224), &fail);
+ if (read_backward) {
+ // Offset by length when matching backwards.
+ masm.subPtr(temp0, current_position);
+ }
- masm.bind(&convert_capture);
+ // End of text to match against capture (temp0 is pointer now)
+ masm.addPtr(current_position, temp0);
- // Also convert capture character.
- masm.load8ZeroExtend(Address(current_character, 0), temp2);
- masm.or32(Imm32(0x20), temp2);
+ Label success, fail, loop;
+ masm.bind(&loop);
- masm.branch32(Assembler::NotEqual, temp0, temp2, &fail);
+ // Load next character from each string.
+ if (mode_ == ASCII) {
+ masm.load8ZeroExtend(Address(current_character, 0), temp1);
+ masm.load8ZeroExtend(Address(current_position, 0), temp2);
+ } else {
+ masm.load16ZeroExtend(Address(current_character, 0), temp1);
+ masm.load16ZeroExtend(Address(current_position, 0), temp2);
+ }
- masm.bind(&loop_increment);
+ if (ignore_case) {
+ MOZ_ASSERT(mode_ == ASCII);
+ Label loop_increment, convert_match;
+
+ // Try exact match.
+ masm.branch32(Assembler::Equal, temp1, temp2, &loop_increment);
+
+ // Mismatch, try case-insensitive match (converting letters to lower-case).
+ masm.or32(Imm32(0x20), temp1); // Convert match character to lower-case.
+
+ // Is temp1 a lowercase letter [a,z]?
+ masm.computeEffectiveAddress(Address(temp1, -'a'), temp2);
+ masm.branch32(Assembler::BelowOrEqual, temp2, Imm32(static_cast<int32_t>('z' - 'a')),
+ &convert_match);
+ // Latin-1: Check for values in range [224,254] but not 247 (U+00F7 DIVISION SIGN).
+ masm.sub32(Imm32(224 - 'a'), temp2);
+ masm.branch32(Assembler::Above, temp2, Imm32(254 - 224), &fail);
+ // Check for 247.
+ masm.branch32(Assembler::Equal, temp2, Imm32(247 - 224), &fail);
+
+ // Capture character is lower case. Convert match character to lower case and compare
+ masm.bind(&convert_match);
+ // Reload latin1 character since temp2 was clobbered above
+ masm.load8ZeroExtend(Address(current_position, 0), temp2);
+ masm.or32(Imm32(0x20), temp2);
+ masm.branch32(Assembler::NotEqual, temp1, temp2, &fail);
+
+ masm.bind(&loop_increment);
+ } else {
+ // Fail if characters do not match.
+ masm.branch32(Assembler::NotEqual, temp1, temp2, &fail);
+ }
// Increment pointers into match and capture strings.
- masm.addPtr(Imm32(1), current_character);
- masm.addPtr(Imm32(1), current_position);
+ masm.addPtr(Imm32(char_size()), current_character);
+ masm.addPtr(Imm32(char_size()), current_position);
- // Compare to end of match, and loop if not done.
- masm.branchPtr(Assembler::Below, current_position, temp1, &loop);
+ // Loop if we have not reached the end of the match string.
+ masm.branchPtr(Assembler::Below, current_position, temp0, &loop);
masm.jump(&success);
- masm.bind(&fail);
-
// Restore original values before failing.
+ masm.bind(&fail);
masm.pop(current_position);
JumpOrBacktrack(on_no_match);
masm.bind(&success);
-
// Drop original character position value.
- masm.addToStackPtr(Imm32(sizeof(uintptr_t)));
+ masm.pop(temp0);
- // Compute new value of character position after the matched part.
+ // current_position is a pointer (now at the end of the consumed characters). Convert it back to an offset.
masm.subPtr(input_end_pointer, current_position);
- } else {
- MOZ_ASSERT(mode_ == CHAR16);
-
- // Note: temp1 needs to be saved/restored if it is volatile, as it is used after the call.
- LiveGeneralRegisterSet volatileRegs(GeneralRegisterSet::Volatile());
- volatileRegs.takeUnchecked(temp0);
- volatileRegs.takeUnchecked(temp2);
- masm.PushRegsInMask(volatileRegs);
-
- // Set byte_offset1.
- // Start of capture, where current_character already holds string-end negative offset.
- masm.addPtr(input_end_pointer, current_character);
- // Set byte_offset2.
- // Found by adding negative string-end offset of current position
- // to end of string.
- masm.addPtr(input_end_pointer, current_position);
-
- // Parameters are
- // Address byte_offset1 - Address captured substring's start.
- // Address byte_offset2 - Address of current character position.
- // size_t byte_length - length of capture in bytes(!)
- masm.setupUnalignedABICall(temp0);
- masm.passABIArg(current_character);
- masm.passABIArg(current_position);
- masm.passABIArg(temp1);
- if (!unicode) {
- int (*fun)(const char16_t*, const char16_t*, size_t) = CaseInsensitiveCompareStrings;
- masm.callWithABI(JS_FUNC_TO_DATA_PTR(void*, fun));
- } else {
- int (*fun)(const char16_t*, const char16_t*, size_t) = CaseInsensitiveCompareUCStrings;
- masm.callWithABI(JS_FUNC_TO_DATA_PTR(void*, fun));
+ if (read_backward) {
+ // Subtract match length if we matched backward
+ masm.addPtr(register_location(start_reg), current_position);
+ masm.subPtr(register_location(start_reg + 1), current_position);
}
- masm.storeCallInt32Result(temp0);
+ }
- masm.PopRegsInMask(volatileRegs);
+ // Fallthrough if capture length was zero
+ masm.bind(&fallthrough);
+}
- // Check if function returned non-zero for success or zero for failure.
- masm.branchTest32(Assembler::Zero, temp0, temp0, BranchOrBacktrack(on_no_match));
+void
+NativeRegExpMacroAssembler::CheckNotBackReference(int start_reg, bool read_backward, Label* on_no_match)
+{
+ JitSpew(SPEW_PREFIX "CheckNotBackReference(%d)", start_reg);
- // On success, increment position by length of capture.
- masm.addPtr(temp1, current_position);
- }
+ CheckNotBackReferenceImpl(start_reg, read_backward, on_no_match, /*unicode = */ false, /*ignore_case = */ false);
+}
- masm.bind(&fallthrough);
+void
+NativeRegExpMacroAssembler::CheckNotBackReferenceIgnoreCase(int start_reg, bool read_backward,
+ Label* on_no_match, bool unicode)
+{
+ JitSpew(SPEW_PREFIX "CheckNotBackReferenceIgnoreCase(%d, %d)", start_reg, unicode);
+
+ CheckNotBackReferenceImpl(start_reg, read_backward, on_no_match, unicode, /*ignore_case = */ true);
}
void
@@ -961,10 +943,13 @@ NativeRegExpMacroAssembler::LoadCurrentCharacter(int cp_offset, Label* on_end_of
{
JitSpew(SPEW_PREFIX "LoadCurrentCharacter(%d, %d)", cp_offset, characters);
- MOZ_ASSERT(cp_offset >= -1); // ^ and \b can look behind one character.
MOZ_ASSERT(cp_offset < (1<<30)); // Be sane! (And ensure negation works)
if (check_bounds)
- CheckPosition(cp_offset + characters - 1, on_end_of_input);
+ if (cp_offset >= 0) {
+ CheckPosition(cp_offset + characters - 1, on_end_of_input);
+ } else {
+ CheckPosition(cp_offset, on_end_of_input);
+ }
LoadCurrentCharacterUnchecked(cp_offset, characters);
}
@@ -972,9 +957,8 @@ void
NativeRegExpMacroAssembler::LoadCurrentCharacterUnchecked(int cp_offset, int characters)
{
JitSpew(SPEW_PREFIX "LoadCurrentCharacterUnchecked(%d, %d)", cp_offset, characters);
-
+ BaseIndex address(input_end_pointer, current_position, TimesOne, cp_offset * char_size());
if (mode_ == ASCII) {
- BaseIndex address(input_end_pointer, current_position, TimesOne, cp_offset);
if (characters == 4) {
masm.load32(address, current_character);
} else if (characters == 2) {
@@ -986,7 +970,6 @@ NativeRegExpMacroAssembler::LoadCurrentCharacterUnchecked(int cp_offset, int cha
} else {
MOZ_ASSERT(mode_ == CHAR16);
MOZ_ASSERT(characters <= 2);
- BaseIndex address(input_end_pointer, current_position, TimesOne, cp_offset * sizeof(char16_t));
if (characters == 2)
masm.load32(address, current_character);
else
@@ -1096,10 +1079,11 @@ NativeRegExpMacroAssembler::CheckBacktrackStackLimit()
masm.moveStackPtrTo(temp2);
masm.call(&stack_overflow_label_);
- masm.bind(&no_stack_overflow);
// Exit with an exception if the call failed.
masm.branchTest32(Assembler::Zero, temp0, temp0, &exit_with_exception_label_);
+
+ masm.bind(&no_stack_overflow);
}
void
@@ -1213,8 +1197,21 @@ void
NativeRegExpMacroAssembler::CheckPosition(int cp_offset, Label* on_outside_input)
{
JitSpew(SPEW_PREFIX "CheckPosition(%d)", cp_offset);
- masm.branchPtr(Assembler::GreaterThanOrEqual, current_position,
- ImmWord(-cp_offset * char_size()), BranchOrBacktrack(on_outside_input));
+ if (cp_offset >= 0) {
+ // end + current + offset >= end
+ // <=> current + offset >= 0
+ // <=> current >= -offset
+ masm.branchPtr(Assembler::GreaterThanOrEqual, current_position,
+ ImmWord(-cp_offset * char_size()), BranchOrBacktrack(on_outside_input));
+ } else {
+ // negative cp_offset means we're reading backwards, check against start of string
+ // Compute offset address
+ masm.computeEffectiveAddress(BaseIndex(input_end_pointer, current_position, TimesOne, cp_offset * char_size()), temp0);
+
+ // Compare to start of input.
+ Address inputStart(masm.getStackPointer(), offsetof(FrameData, inputStart));
+ masm.branchPtr(Assembler::GreaterThan, inputStart, temp0, BranchOrBacktrack(on_outside_input));
+ }
}
Label*
diff --git a/js/src/irregexp/NativeRegExpMacroAssembler.h b/js/src/irregexp/NativeRegExpMacroAssembler.h
index 6bb14ab662..857900cabf 100644
--- a/js/src/irregexp/NativeRegExpMacroAssembler.h
+++ b/js/src/irregexp/NativeRegExpMacroAssembler.h
@@ -98,15 +98,16 @@ class MOZ_STACK_CLASS NativeRegExpMacroAssembler final : public RegExpMacroAssem
void AdvanceRegister(int reg, int by);
void Backtrack();
void Bind(jit::Label* label);
- void CheckAtStart(jit::Label* on_at_start);
+ void CheckAtStart(int cp_offset, jit::Label* on_at_start);
void CheckCharacter(unsigned c, jit::Label* on_equal);
void CheckCharacterAfterAnd(unsigned c, unsigned and_with, jit::Label* on_equal);
void CheckCharacterGT(char16_t limit, jit::Label* on_greater);
void CheckCharacterLT(char16_t limit, jit::Label* on_less);
void CheckGreedyLoop(jit::Label* on_tos_equals_current_position);
- void CheckNotAtStart(jit::Label* on_not_at_start);
- void CheckNotBackReference(int start_reg, jit::Label* on_no_match);
- void CheckNotBackReferenceIgnoreCase(int start_reg, jit::Label* on_no_match, bool unicode);
+ void CheckNotAtStart(int cp_offset, jit::Label* on_not_at_start);
+ void CheckNotBackReference(int start_reg, bool read_backward, jit::Label* on_no_match);
+ void CheckNotBackReferenceIgnoreCase(int start_reg, bool read_backward,
+ jit::Label* on_no_match, bool unicode);
void CheckNotCharacter(unsigned c, jit::Label* on_not_equal);
void CheckNotCharacterAfterAnd(unsigned c, unsigned and_with, jit::Label* on_not_equal);
void CheckNotCharacterAfterMinusAnd(char16_t c, char16_t minus, char16_t and_with,
@@ -202,13 +203,17 @@ class MOZ_STACK_CLASS NativeRegExpMacroAssembler final : public RegExpMacroAssem
Vector<LabelPatch, 4, SystemAllocPolicy> labelPatches;
- // See RegExpMacroAssembler.cpp for the meaning of these registers.
+ // See NativeRegExpMacroAssembler.cpp for the meaning of these registers.
jit::Register input_end_pointer;
jit::Register current_character;
jit::Register current_position;
jit::Register backtrack_stack_pointer;
jit::Register temp0, temp1, temp2;
+ void CheckAtStartImpl(int cp_offset, jit::Label* on_cond, jit::Assembler::Condition cond);
+ void CheckNotBackReferenceImpl(int start_reg, bool read_backward, jit::Label* on_no_match,
+ bool unicode, bool ignore_case);
+
// The frame_pointer-relative location of a regexp register.
jit::Address register_location(int register_index) {
checkRegister(register_index);
diff --git a/js/src/irregexp/RegExpAST.cpp b/js/src/irregexp/RegExpAST.cpp
index 14dfe8cea5..dc8d3b4c2c 100644
--- a/js/src/irregexp/RegExpAST.cpp
+++ b/js/src/irregexp/RegExpAST.cpp
@@ -249,16 +249,16 @@ RegExpCapture::CaptureRegisters()
}
// ----------------------------------------------------------------------------
-// RegExpLookahead
+// RegExpLookaround
Interval
-RegExpLookahead::CaptureRegisters()
+RegExpLookaround::CaptureRegisters()
{
return body()->CaptureRegisters();
}
bool
-RegExpLookahead::IsAnchoredAtStart()
+RegExpLookaround::IsAnchoredAtStart()
{
- return is_positive() && body()->IsAnchoredAtStart();
+ return is_positive() && type() == LOOKAHEAD && body()->IsAnchoredAtStart();
}
diff --git a/js/src/irregexp/RegExpAST.h b/js/src/irregexp/RegExpAST.h
index bff4ee81dd..9e023d537f 100644
--- a/js/src/irregexp/RegExpAST.h
+++ b/js/src/irregexp/RegExpAST.h
@@ -234,8 +234,6 @@ class RegExpCharacterClass : public RegExpTree
bool is_negated_;
};
-typedef InfallibleVector<char16_t, 10> CharacterVector;
-
class RegExpAtom : public RegExpTree
{
public:
@@ -341,7 +339,7 @@ class RegExpCapture : public RegExpTree
{
public:
explicit RegExpCapture(RegExpTree* body, int index)
- : body_(body), index_(index)
+ : body_(body), index_(index), name_(nullptr)
{}
virtual void* Accept(RegExpVisitor* visitor, void* data);
@@ -359,34 +357,42 @@ class RegExpCapture : public RegExpTree
virtual int min_match() { return body_->min_match(); }
virtual int max_match() { return body_->max_match(); }
RegExpTree* body() { return body_; }
+ void set_body(RegExpTree* body) { body_ = body; }
int index() { return index_; }
+ const CharacterVector* name() const { return name_; }
+ void set_name(const CharacterVector* name) { name_ = name; }
static int StartRegister(int index) { return index * 2; }
static int EndRegister(int index) { return index * 2 + 1; }
private:
RegExpTree* body_;
int index_;
+ const CharacterVector* name_;
};
-class RegExpLookahead : public RegExpTree
+class RegExpLookaround : public RegExpTree
{
public:
- RegExpLookahead(RegExpTree* body,
- bool is_positive,
- int capture_count,
- int capture_from)
+ enum Type { LOOKAHEAD, LOOKBEHIND };
+
+ RegExpLookaround(RegExpTree* body,
+ bool is_positive,
+ int capture_count,
+ int capture_from,
+ Type type)
: body_(body),
is_positive_(is_positive),
capture_count_(capture_count),
- capture_from_(capture_from)
+ capture_from_(capture_from),
+ type_(type)
{}
virtual void* Accept(RegExpVisitor* visitor, void* data);
virtual RegExpNode* ToNode(RegExpCompiler* compiler,
RegExpNode* on_success);
- virtual RegExpLookahead* AsLookahead();
+ virtual RegExpLookaround* AsLookaround();
virtual Interval CaptureRegisters();
- virtual bool IsLookahead();
+ virtual bool IsLookaround();
virtual bool IsAnchoredAtStart();
virtual int min_match() { return 0; }
virtual int max_match() { return 0; }
@@ -394,12 +400,14 @@ class RegExpLookahead : public RegExpTree
bool is_positive() { return is_positive_; }
int capture_count() { return capture_count_; }
int capture_from() { return capture_from_; }
+ Type type() { return type_; }
private:
RegExpTree* body_;
bool is_positive_;
int capture_count_;
int capture_from_;
+ Type type_;
};
typedef InfallibleVector<RegExpCapture*, 1> RegExpCaptureVector;
@@ -408,7 +416,7 @@ class RegExpBackReference : public RegExpTree
{
public:
explicit RegExpBackReference(RegExpCapture* capture)
- : capture_(capture)
+ : capture_(capture), name_(nullptr)
{}
virtual void* Accept(RegExpVisitor* visitor, void* data);
@@ -416,14 +424,22 @@ class RegExpBackReference : public RegExpTree
RegExpNode* on_success);
virtual RegExpBackReference* AsBackReference();
virtual bool IsBackReference();
- virtual int min_match() { return 0; }
- virtual int max_match() { return capture_->max_match(); }
+ virtual int min_match() override { return 0; }
+ // The back reference may be recursive, e.g. /(\2)(\1)/. To avoid infinite
+ // recursion, we give up. Ignorance is bliss.
+ int max_match() override { return kInfinity; }
int index() { return capture_->index(); }
RegExpCapture* capture() { return capture_; }
+ void set_capture(RegExpCapture* capture) { capture_ = capture; }
+ const CharacterVector* name() const { return name_; }
+ void set_name(const CharacterVector* name) { name_ = name; }
private:
RegExpCapture* capture_;
+ const CharacterVector* name_;
};
+typedef InfallibleVector<RegExpBackReference*, 1> RegExpBackReferenceVector;
+
class RegExpEmpty : public RegExpTree
{
public:
diff --git a/js/src/irregexp/RegExpBytecode.h b/js/src/irregexp/RegExpBytecode.h
index 7454f88f73..42326b3d47 100644
--- a/js/src/irregexp/RegExpBytecode.h
+++ b/js/src/irregexp/RegExpBytecode.h
@@ -81,16 +81,19 @@ V(CHECK_LT, 35, 8) /* bc8 pad8 uc16 addr32 */ \
V(CHECK_GT, 36, 8) /* bc8 pad8 uc16 addr32 */ \
V(CHECK_NOT_BACK_REF, 37, 8) /* bc8 reg_idx24 addr32 */ \
V(CHECK_NOT_BACK_REF_NO_CASE, 38, 8) /* bc8 reg_idx24 addr32 */ \
-V(CHECK_NOT_REGS_EQUAL, 39, 12) /* bc8 regidx24 reg_idx32 addr32 */ \
-V(CHECK_REGISTER_LT, 40, 12) /* bc8 reg_idx24 value32 addr32 */ \
-V(CHECK_REGISTER_GE, 41, 12) /* bc8 reg_idx24 value32 addr32 */ \
-V(CHECK_REGISTER_EQ_POS, 42, 8) /* bc8 reg_idx24 addr32 */ \
-V(CHECK_AT_START, 43, 8) /* bc8 pad24 addr32 */ \
-V(CHECK_NOT_AT_START, 44, 8) /* bc8 pad24 addr32 */ \
-V(CHECK_GREEDY, 45, 8) /* bc8 pad24 addr32 */ \
-V(ADVANCE_CP_AND_GOTO, 46, 8) /* bc8 offset24 addr32 */ \
-V(SET_CURRENT_POSITION_FROM_END, 47, 4) /* bc8 idx24 */ \
-V(CHECK_NOT_BACK_REF_NO_CASE_UNICODE, 48, 8) /* bc8 reg_idx24 addr32 */
+V(CHECK_NOT_BACK_REF_BACKWARD, 39, 8) /* bc8 reg_idx24 addr32 */ \
+V(CHECK_NOT_BACK_REF_NO_CASE_BACKWARD, 40, 8) /* bc8 reg_idx24 addr32 */ \
+V(CHECK_NOT_REGS_EQUAL, 41, 12) /* bc8 regidx24 reg_idx32 addr32 */ \
+V(CHECK_REGISTER_LT, 42, 12) /* bc8 reg_idx24 value32 addr32 */ \
+V(CHECK_REGISTER_GE, 43, 12) /* bc8 reg_idx24 value32 addr32 */ \
+V(CHECK_REGISTER_EQ_POS, 44, 8) /* bc8 reg_idx24 addr32 */ \
+V(CHECK_AT_START, 45, 8) /* bc8 pad24 addr32 */ \
+V(CHECK_NOT_AT_START, 46, 8) /* bc8 pad24 addr32 */ \
+V(CHECK_GREEDY, 47, 8) /* bc8 pad24 addr32 */ \
+V(ADVANCE_CP_AND_GOTO, 48, 8) /* bc8 offset24 addr32 */ \
+V(SET_CURRENT_POSITION_FROM_END, 49, 4) /* bc8 idx24 */ \
+V(CHECK_NOT_BACK_REF_NO_CASE_UNICODE, 50, 8) /* bc8 reg_idx24 addr32 */ \
+V(CHECK_NOT_BACK_REF_NO_CASE_BACKWARD_UNICODE, 51, 8) /* bc8 reg_idx24 addr32 */
#define DECLARE_BYTECODES(name, code, length) \
static const int BC_##name = code;
diff --git a/js/src/irregexp/RegExpCharRanges.cpp b/js/src/irregexp/RegExpCharRanges.cpp
new file mode 100644
index 0000000000..87a4f94aa1
--- /dev/null
+++ b/js/src/irregexp/RegExpCharRanges.cpp
@@ -0,0 +1,2069 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+
+// Copyright 2012 the V8 project authors. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "irregexp/RegExpCharRanges.h"
+
+#include "unicode/uniset.h"
+
+// Generated table
+#include "irregexp/RegExpCharacters-inl.h"
+
+using namespace js::irregexp;
+
+using mozilla::ArrayLength;
+
+void
+CharacterRange::AddCaseEquivalents(bool is_ascii, bool unicode, CharacterRangeVector* ranges)
+{
+ char16_t bottom = from();
+ char16_t top = to();
+
+ if (is_ascii && !RangeContainsLatin1Equivalents(*this, unicode)) {
+ if (bottom > kMaxOneByteCharCode)
+ return;
+ if (top > kMaxOneByteCharCode)
+ top = kMaxOneByteCharCode;
+ }
+
+ for (char16_t c = bottom;; c++) {
+ char16_t chars[kEcma262UnCanonicalizeMaxWidth];
+ size_t length = GetCaseIndependentLetters(c, is_ascii, unicode, chars);
+
+ for (size_t i = 0; i < length; i++) {
+ char16_t other = chars[i];
+ if (other == c)
+ continue;
+
+ // Try to combine with an existing range.
+ bool found = false;
+ for (size_t i = 0; i < ranges->length(); i++) {
+ CharacterRange& range = (*ranges)[i];
+ if (range.Contains(other)) {
+ found = true;
+ break;
+ } else if (other == range.from() - 1) {
+ range.set_from(other);
+ found = true;
+ break;
+ } else if (other == range.to() + 1) {
+ range.set_to(other);
+ found = true;
+ break;
+ }
+ }
+
+ if (!found)
+ ranges->append(CharacterRange::Singleton(other));
+ }
+
+ if (c == top)
+ break;
+ }
+}
+
+/* static */
+void
+CharacterRange::AddClass(const int* elmv, int elmc, CharacterRangeVector* ranges)
+{
+ elmc--;
+ MOZ_ASSERT(elmv[elmc] == 0x10000);
+ for (int i = 0; i < elmc; i += 2) {
+ MOZ_ASSERT(elmv[i] < elmv[i + 1]);
+ ranges->append(CharacterRange(elmv[i], elmv[i + 1] - 1));
+ }
+}
+
+/* static */ void
+CharacterRange::AddClassNegated(const int* elmv, int elmc, CharacterRangeVector* ranges)
+{
+ elmc--;
+ MOZ_ASSERT(elmv[elmc] == 0x10000);
+ MOZ_ASSERT(elmv[0] != 0x0000);
+ MOZ_ASSERT(elmv[elmc-1] != kMaxUtf16CodeUnit);
+ char16_t last = 0x0000;
+ for (int i = 0; i < elmc; i += 2) {
+ MOZ_ASSERT(last <= elmv[i] - 1);
+ MOZ_ASSERT(elmv[i] < elmv[i + 1]);
+ ranges->append(CharacterRange(last, elmv[i] - 1));
+ last = elmv[i + 1];
+ }
+ ranges->append(CharacterRange(last, kMaxUtf16CodeUnit));
+}
+
+/* static */ void
+CharacterRange::AddClassEscape(LifoAlloc* alloc, char16_t type,
+ CharacterRangeVector* ranges)
+{
+ switch (type) {
+ case 's':
+ AddClass(kSpaceRanges, kSpaceRangeCount, ranges);
+ break;
+ case 'S':
+ AddClassNegated(kSpaceRanges, kSpaceRangeCount, ranges);
+ break;
+ case 'w':
+ AddClass(kWordRanges, kWordRangeCount, ranges);
+ break;
+ case 'W':
+ AddClassNegated(kWordRanges, kWordRangeCount, ranges);
+ break;
+ case 'd':
+ AddClass(kDigitRanges, kDigitRangeCount, ranges);
+ break;
+ case 'D':
+ AddClassNegated(kDigitRanges, kDigitRangeCount, ranges);
+ break;
+ case '.':
+ AddClassNegated(kLineTerminatorRanges, kLineTerminatorRangeCount, ranges);
+ break;
+ // This is not a character range as defined by the spec but a
+ // convenient shorthand for a character class that matches any
+ // character.
+ case '*':
+ ranges->append(CharacterRange::Everything());
+ break;
+ // This is the set of characters matched by the $ and ^ symbols
+ // in multiline mode.
+ case 'n':
+ AddClass(kLineTerminatorRanges, kLineTerminatorRangeCount, ranges);
+ break;
+ default:
+ MOZ_CRASH("Bad character class escape");
+ }
+}
+
+// Add class escape, excluding surrogate pair range.
+/* static */ void
+CharacterRange::AddClassEscapeUnicode(LifoAlloc* alloc, char16_t type,
+ CharacterRangeVector* ranges, bool ignore_case)
+{
+ switch (type) {
+ case 's':
+ case 'd':
+ return AddClassEscape(alloc, type, ranges);
+ break;
+ case 'S':
+ AddClassNegated(kSpaceAndSurrogateRanges, kSpaceAndSurrogateRangeCount, ranges);
+ break;
+ case 'w':
+ if (ignore_case)
+ AddClass(kIgnoreCaseWordRanges, kIgnoreCaseWordRangeCount, ranges);
+ else
+ AddClassEscape(alloc, type, ranges);
+ break;
+ case 'W':
+ if (ignore_case) {
+ AddClass(kNegatedIgnoreCaseWordAndSurrogateRanges,
+ kNegatedIgnoreCaseWordAndSurrogateRangeCount, ranges);
+ } else {
+ AddClassNegated(kWordAndSurrogateRanges, kWordAndSurrogateRangeCount, ranges);
+ }
+ break;
+ case 'D':
+ AddClassNegated(kDigitAndSurrogateRanges, kDigitAndSurrogateRangeCount, ranges);
+ break;
+ default:
+ MOZ_CRASH("Bad type!");
+ }
+}
+
+/* static */ void
+CharacterRange::AddCharOrEscape(LifoAlloc* alloc, CharacterRangeVector* ranges,
+ char16_t char_class, widechar c)
+{
+ if (char_class != kNoCharClass)
+ AddClassEscape(alloc, char_class, ranges);
+ else
+ ranges->append(CharacterRange::Singleton(c));
+}
+
+/* static */ void
+CharacterRange::AddCharOrEscapeUnicode(LifoAlloc* alloc,
+ CharacterRangeVector* ranges,
+ CharacterRangeVector* lead_ranges,
+ CharacterRangeVector* trail_ranges,
+ WideCharRangeVector* wide_ranges,
+ char16_t char_class,
+ widechar c,
+ bool ignore_case)
+{
+ if (char_class != kNoCharClass) {
+ AddClassEscapeUnicode(alloc, char_class, ranges, ignore_case);
+ switch (char_class) {
+ case 'S':
+ case 'W':
+ case 'D':
+ lead_ranges->append(CharacterRange::LeadSurrogate());
+ trail_ranges->append(CharacterRange::TrailSurrogate());
+ wide_ranges->append(WideCharRange::NonBMP());
+ break;
+ case '.':
+ MOZ_CRASH("Bad char_class!");
+ }
+ return;
+ }
+
+ if (unicode::IsLeadSurrogate(c))
+ lead_ranges->append(CharacterRange::Singleton(c));
+ else if (unicode::IsTrailSurrogate(c))
+ trail_ranges->append(CharacterRange::Singleton(c));
+ else if (c >= unicode::NonBMPMin)
+ wide_ranges->append(WideCharRange::Singleton(c));
+ else
+ ranges->append(CharacterRange::Singleton(c));
+}
+
+/* static */ void
+CharacterRange::AddCharUnicode(LifoAlloc* alloc,
+ CharacterRangeVector* ranges,
+ CharacterRangeVector* lead_ranges,
+ CharacterRangeVector* trail_ranges,
+ WideCharRangeVector* wide_ranges,
+ widechar c)
+{
+ if (unicode::IsLeadSurrogate(c))
+ lead_ranges->append(CharacterRange::Singleton(c));
+ else if (unicode::IsTrailSurrogate(c))
+ trail_ranges->append(CharacterRange::Singleton(c));
+ else if (c >= unicode::NonBMPMin)
+ wide_ranges->append(WideCharRange::Singleton(c));
+ else
+ ranges->append(CharacterRange::Singleton(c));
+}
+
+/* static */ void
+CharacterRange::AddUnicodeRange(LifoAlloc* alloc,
+ CharacterRangeVector* ranges,
+ CharacterRangeVector* lead_ranges,
+ CharacterRangeVector* trail_ranges,
+ WideCharRangeVector* wide_ranges,
+ widechar first,
+ widechar next)
+{
+ MOZ_ASSERT(first <= next);
+ if (first < unicode::LeadSurrogateMin) {
+ if (next < unicode::LeadSurrogateMin) {
+ ranges->append(CharacterRange::Range(first, next));
+ return;
+ }
+ ranges->append(CharacterRange::Range(first, unicode::LeadSurrogateMin - 1));
+ first = unicode::LeadSurrogateMin;
+ }
+ if (first <= unicode::LeadSurrogateMax) {
+ if (next <= unicode::LeadSurrogateMax) {
+ lead_ranges->append(CharacterRange::Range(first, next));
+ return;
+ }
+ lead_ranges->append(CharacterRange::Range(first, unicode::LeadSurrogateMax));
+ first = unicode::LeadSurrogateMax + 1;
+ }
+ MOZ_ASSERT(unicode::LeadSurrogateMax + 1 == unicode::TrailSurrogateMin);
+ if (first <= unicode::TrailSurrogateMax) {
+ if (next <= unicode::TrailSurrogateMax) {
+ trail_ranges->append(CharacterRange::Range(first, next));
+ return;
+ }
+ trail_ranges->append(CharacterRange::Range(first, unicode::TrailSurrogateMax));
+ first = unicode::TrailSurrogateMax + 1;
+ }
+ if (first <= unicode::UTF16Max) {
+ if (next <= unicode::UTF16Max) {
+ ranges->append(CharacterRange::Range(first, next));
+ return;
+ }
+ ranges->append(CharacterRange::Range(first, unicode::UTF16Max));
+ first = unicode::NonBMPMin;
+ }
+ MOZ_ASSERT(unicode::UTF16Max + 1 == unicode::NonBMPMin);
+ wide_ranges->append(WideCharRange::Range(first, next));
+}
+
+/* static */ bool
+CharacterRange::RangesContainLatin1Equivalents(const CharacterRangeVector& ranges, bool unicode)
+{
+ for (size_t i = 0; i < ranges.length(); i++) {
+ // TODO(dcarney): this could be a lot more efficient.
+ if (RangeContainsLatin1Equivalents(ranges[i], unicode))
+ return true;
+ }
+ return false;
+}
+
+/* static */ bool
+CharacterRange::CompareRanges(const CharacterRangeVector& ranges, const int* special_class, size_t length)
+{
+ length--; // Remove final 0x10000.
+ MOZ_ASSERT(special_class[length] == 0x10000);
+ if (ranges.length() * 2 != length)
+ return false;
+ for (size_t i = 0; i < length; i += 2) {
+ CharacterRange range = ranges[i >> 1];
+ if (range.from() != special_class[i] || range.to() != special_class[i + 1] - 1)
+ return false;
+ }
+ return true;
+}
+
+/* static */ bool
+CharacterRange::CompareInverseRanges(const CharacterRangeVector& ranges, const int* special_class, size_t length)
+{
+ length--; // Remove final 0x10000.
+ MOZ_ASSERT(special_class[length] == 0x10000);
+ MOZ_ASSERT(ranges.length() != 0);
+ MOZ_ASSERT(length != 0);
+ MOZ_ASSERT(special_class[0] != 0);
+ if (ranges.length() != (length >> 1) + 1)
+ return false;
+ CharacterRange range = ranges[0];
+ if (range.from() != 0)
+ return false;
+ for (size_t i = 0; i < length; i += 2) {
+ if (special_class[i] != (range.to() + 1))
+ return false;
+ range = ranges[(i >> 1) + 1];
+ if (special_class[i+1] != range.from())
+ return false;
+ }
+ if (range.to() != 0xffff)
+ return false;
+ return true;
+}
+
+template <typename RangeType>
+/* static */ void
+CharacterRange::NegateUnicodeRanges(LifoAlloc* alloc, InfallibleVector<RangeType, 1>** ranges,
+ RangeType full_range)
+{
+ typedef InfallibleVector<RangeType, 1> RangeVector;
+ RangeVector* tmp_ranges = alloc->newInfallible<RangeVector>(*alloc);
+ tmp_ranges->append(full_range);
+ RangeVector* result_ranges = alloc->newInfallible<RangeVector>(*alloc);
+
+ // Perform the following calculation:
+ // result_ranges = tmp_ranges - ranges
+ // with the following steps:
+ // result_ranges = tmp_ranges - ranges[0]
+ // SWAP(result_ranges, tmp_ranges)
+ // result_ranges = tmp_ranges - ranges[1]
+ // SWAP(result_ranges, tmp_ranges)
+ // ...
+ // result_ranges = tmp_ranges - ranges[N-1]
+ // SWAP(result_ranges, tmp_ranges)
+ // The last SWAP is just for simplicity of the loop.
+ for (size_t i = 0; i < (*ranges)->length(); i++) {
+ result_ranges->clear();
+
+ const RangeType& range = (**ranges)[i];
+ for (size_t j = 0; j < tmp_ranges->length(); j++) {
+ const RangeType& tmpRange = (*tmp_ranges)[j];
+ auto from1 = tmpRange.from();
+ auto to1 = tmpRange.to();
+ auto from2 = range.from();
+ auto to2 = range.to();
+
+ if (from1 < from2) {
+ if (to1 < from2) {
+ result_ranges->append(tmpRange);
+ } else if (to1 <= to2) {
+ result_ranges->append(RangeType::Range(from1, from2 - 1));
+ } else {
+ result_ranges->append(RangeType::Range(from1, from2 - 1));
+ result_ranges->append(RangeType::Range(to2 + 1, to1));
+ }
+ } else if (from1 <= to2) {
+ if (to1 > to2)
+ result_ranges->append(RangeType::Range(to2 + 1, to1));
+ } else {
+ result_ranges->append(tmpRange);
+ }
+ }
+
+ auto tmp = tmp_ranges;
+ tmp_ranges = result_ranges;
+ result_ranges = tmp;
+ }
+
+ // After the loop, result is pointed at by tmp_ranges, instead of
+ // result_ranges.
+ *ranges = tmp_ranges;
+}
+
+// Explicit specialization for NegateUnicodeRanges
+template void CharacterRange::NegateUnicodeRanges<CharacterRange>(LifoAlloc* alloc, InfallibleVector<CharacterRange, 1>** ranges, CharacterRange full_range);
+template void CharacterRange::NegateUnicodeRanges<WideCharRange>(LifoAlloc* alloc, InfallibleVector<WideCharRange, 1>** ranges, WideCharRange full_range);
+
+
+namespace {
+// private namespace to not pollute js::irregexp
+
+bool IsExactPropertyAlias(const std::string& property_name, UProperty property) {
+ const char* short_name = u_getPropertyName(property, U_SHORT_PROPERTY_NAME);
+ if (short_name != nullptr && short_name == property_name)
+ return true;
+ for (int i = 0;; i++) {
+ const char* long_name = u_getPropertyName(property, static_cast<UPropertyNameChoice>(U_LONG_PROPERTY_NAME + i));
+ if (long_name == nullptr) break;
+ if (long_name == property_name) return true;
+ }
+ return false;
+}
+
+bool IsExactPropertyValueAlias(const std::string& property_value_name, UProperty property, int32_t property_value) {
+ const char* short_name = u_getPropertyValueName(property, property_value, U_SHORT_PROPERTY_NAME);
+ if (short_name != nullptr && short_name == property_value_name)
+ return true;
+ for (int i = 0;; i++) {
+ const char* long_name = u_getPropertyValueName(property, property_value,
+ static_cast<UPropertyNameChoice>(U_LONG_PROPERTY_NAME + i));
+ if (long_name == nullptr) break;
+ if (long_name == property_value_name) return true;
+ }
+ return false;
+}
+
+bool LookupPropertyValueName(LifoAlloc* alloc,
+ UProperty property,
+ const std::string& property_value_name, bool negate,
+ CharacterRangeVector* ranges,
+ CharacterRangeVector* lead_ranges,
+ CharacterRangeVector* trail_ranges,
+ WideCharRangeVector* wide_ranges) {
+ UProperty property_for_lookup = property;
+ if (property_for_lookup == UCHAR_SCRIPT_EXTENSIONS) {
+ // For the property Script_Extensions, we have to do the property value
+ // name lookup as if the property is Script.
+ property_for_lookup = UCHAR_SCRIPT;
+ }
+ int32_t property_value = u_getPropertyValueEnum(property_for_lookup, property_value_name.c_str());
+ if (property_value == UCHAR_INVALID_CODE) return false;
+
+ // We require the property name to match exactly to one of the property value
+ // aliases. However, u_getPropertyValueEnum uses loose matching.
+ if (!IsExactPropertyValueAlias(property_value_name, property_for_lookup, property_value)) {
+ return false;
+ }
+
+ UErrorCode ec = U_ZERO_ERROR;
+ icu::UnicodeSet set;
+ set.applyIntPropertyValue(property, property_value, ec);
+ bool success = ec == U_ZERO_ERROR && !set.isEmpty();
+
+ if (success) {
+ set.removeAllStrings();
+ if (negate) set.complement();
+ for (int i = 0; i < set.getRangeCount(); i++) {
+ CharacterRange::AddUnicodeRange(alloc, ranges, lead_ranges, trail_ranges, wide_ranges,
+ set.getRangeStart(i), set.getRangeEnd(i));
+ }
+ }
+ return success;
+}
+
+bool LookupSpecialPropertyValueName(LifoAlloc* alloc,
+ const std::string& name, bool negate,
+ CharacterRangeVector* ranges,
+ CharacterRangeVector* lead_ranges,
+ CharacterRangeVector* trail_ranges,
+ WideCharRangeVector* wide_ranges) {
+ if (name == "Any") {
+ if (negate) {
+ // Leave the list of character ranges empty, since the negation of 'Any'
+ // is the empty set.
+ } else {
+ CharacterRange::AddUnicodeRange(alloc, ranges, lead_ranges, trail_ranges, wide_ranges,
+ 0, unicode::NonBMPMax);
+ }
+ } else
+ if (name == "ASCII") {
+ if (negate) {
+ // negative ASCII contains all planes
+ CharacterRange::AddUnicodeRange(alloc, ranges, lead_ranges, trail_ranges, wide_ranges,
+ 0x80, unicode::NonBMPMax);
+ } else {
+ // positve ASCII is just low codepoints
+ ranges->append(CharacterRange::Range(0x00, 0x7F));
+ }
+ } else
+ if (name == "Assigned") {
+ return LookupPropertyValueName(alloc, UCHAR_GENERAL_CATEGORY, "Unassigned", !negate,
+ ranges, lead_ranges, trail_ranges, wide_ranges);
+ } else {
+ return false;
+ }
+ return true;
+}
+
+bool IsSupportedBinaryProperty(UProperty property) {
+ // Explicitly allowlist supported binary properties. The spec forbids supporting
+ // properties outside of this set to ensure interoperability.
+ switch (property) {
+ case UCHAR_ALPHABETIC:
+ // 'Any' is not supported by ICU. See LookupSpecialPropertyValueName.
+ // 'ASCII' is not supported by ICU. See LookupSpecialPropertyValueName.
+ case UCHAR_ASCII_HEX_DIGIT:
+ // 'Assigned' is not supported by ICU. See LookupSpecialPropertyValueName.
+ case UCHAR_BIDI_CONTROL:
+ case UCHAR_BIDI_MIRRORED:
+ case UCHAR_CASE_IGNORABLE:
+ case UCHAR_CASED:
+ case UCHAR_CHANGES_WHEN_CASEFOLDED:
+ case UCHAR_CHANGES_WHEN_CASEMAPPED:
+ case UCHAR_CHANGES_WHEN_LOWERCASED:
+ case UCHAR_CHANGES_WHEN_NFKC_CASEFOLDED:
+ case UCHAR_CHANGES_WHEN_TITLECASED:
+ case UCHAR_CHANGES_WHEN_UPPERCASED:
+ case UCHAR_DASH:
+ case UCHAR_DEFAULT_IGNORABLE_CODE_POINT:
+ case UCHAR_DEPRECATED:
+ case UCHAR_DIACRITIC:
+ case UCHAR_EMOJI:
+ case UCHAR_EMOJI_COMPONENT:
+ case UCHAR_EMOJI_MODIFIER_BASE:
+ case UCHAR_EMOJI_MODIFIER:
+ case UCHAR_EMOJI_PRESENTATION:
+ // case UCHAR_EXTENDED_PICTOGRAPHIC:
+ case UCHAR_EXTENDER:
+ case UCHAR_GRAPHEME_BASE:
+ case UCHAR_GRAPHEME_EXTEND:
+ case UCHAR_HEX_DIGIT:
+ case UCHAR_ID_CONTINUE:
+ case UCHAR_ID_START:
+ case UCHAR_IDEOGRAPHIC:
+ case UCHAR_IDS_BINARY_OPERATOR:
+ case UCHAR_IDS_TRINARY_OPERATOR:
+ case UCHAR_JOIN_CONTROL:
+ case UCHAR_LOGICAL_ORDER_EXCEPTION:
+ case UCHAR_LOWERCASE:
+ case UCHAR_MATH:
+ case UCHAR_NONCHARACTER_CODE_POINT:
+ case UCHAR_PATTERN_SYNTAX:
+ case UCHAR_PATTERN_WHITE_SPACE:
+ case UCHAR_QUOTATION_MARK:
+ case UCHAR_RADICAL:
+ case UCHAR_REGIONAL_INDICATOR:
+ case UCHAR_S_TERM:
+ case UCHAR_SOFT_DOTTED:
+ case UCHAR_TERMINAL_PUNCTUATION:
+ case UCHAR_UNIFIED_IDEOGRAPH:
+ case UCHAR_UPPERCASE:
+ case UCHAR_VARIATION_SELECTOR:
+ case UCHAR_WHITE_SPACE:
+ case UCHAR_XID_CONTINUE:
+ case UCHAR_XID_START:
+ return true;
+ default:
+ break;
+ }
+ return false;
+}
+
+} // namespace
+
+/* static */ bool
+CharacterRange::AddPropertyClassRange(LifoAlloc* alloc,
+ const std::string& name, const std::string& value,
+ bool negate, bool ignore_case,
+ CharacterRangeVector* ranges,
+ CharacterRangeVector* lead_ranges,
+ CharacterRangeVector* trail_ranges,
+ WideCharRangeVector* wide_ranges)
+{
+ MOZ_UNUSED(ignore_case); // Full support of unicodeSets flag will use it
+ if (value.empty()) {
+ // Only one name given. Check what it refers to.
+ // First attempt to interpret as general category property value name.
+ if (LookupPropertyValueName(alloc, UCHAR_GENERAL_CATEGORY_MASK, name, negate,
+ ranges, lead_ranges, trail_ranges, wide_ranges)) {
+ return true;
+ }
+ // Interpret hard-coded aliases defined by ES but not present in Unicode
+ if (LookupSpecialPropertyValueName(alloc, name, negate, ranges, lead_ranges,
+ trail_ranges, wide_ranges)) {
+ return true;
+ }
+ // Then attempt to interpret as binary property name with value name 'Y'.
+ UProperty property = u_getPropertyEnum(name.c_str());
+ if (!IsSupportedBinaryProperty(property)) return false;
+ if (!IsExactPropertyAlias(name, property)) return false;
+ return LookupPropertyValueName(alloc, property, negate ? "N" : "Y", false,
+ ranges, lead_ranges, trail_ranges, wide_ranges);
+ } else {
+ // Both property name and value name are specified. Attempt to interpret
+ // the property name as enumerated property.
+ UProperty property = u_getPropertyEnum(name.c_str());
+ if (!IsExactPropertyAlias(name, property)) return false;
+ if (property == UCHAR_GENERAL_CATEGORY) {
+ // We want to allow aggregate value names such as "Letter".
+ property = UCHAR_GENERAL_CATEGORY_MASK;
+ } else if (property != UCHAR_SCRIPT &&
+ property != UCHAR_SCRIPT_EXTENSIONS) {
+ // The only allowed property is Script=
+ return false;
+ }
+ return LookupPropertyValueName(alloc, property, value, negate,
+ ranges, lead_ranges, trail_ranges, wide_ranges);
+ }
+}
+
+/* static */ bool
+CharacterRange::IsCanonical(const CharacterRangeVector& ranges)
+{
+ int n = ranges.length();
+ if (n <= 1)
+ return true;
+
+ int max = ranges[0].to();
+ for (int i = 1; i < n; i++) {
+ CharacterRange next_range = ranges[i];
+ if (next_range.from() <= max + 1)
+ return false;
+ max = next_range.to();
+ }
+ return true;
+}
+
+/* static */ void
+CharacterRange::Canonicalize(CharacterRangeVector& character_ranges)
+{
+ if (character_ranges.length() <= 1) return;
+ // Check whether ranges are already canonical (increasing, non-overlapping,
+ // non-adjacent).
+ int n = character_ranges.length();
+ int max = character_ranges[0].to();
+ int i = 1;
+ while (i < n) {
+ CharacterRange current = character_ranges[i];
+ if (current.from() <= max + 1) {
+ break;
+ }
+ max = current.to();
+ i++;
+ }
+ // Canonical until the i'th range. If that's all of them, we are done.
+ if (i == n) return;
+
+ // The ranges at index i and forward are not canonicalized. Make them so by
+ // doing the equivalent of insertion sort (inserting each into the previous
+ // list, in order).
+ // Notice that inserting a range can reduce the number of ranges in the
+ // result due to combining of adjacent and overlapping ranges.
+ int read = i; // Range to insert.
+ size_t num_canonical = i; // Length of canonicalized part of list.
+ do {
+ num_canonical = InsertRangeInCanonicalList(character_ranges,
+ num_canonical,
+ character_ranges[read]);
+ read++;
+ } while (read < n);
+
+ while (character_ranges.length() > num_canonical)
+ character_ranges.popBack();
+
+ MOZ_ASSERT(IsCanonical(character_ranges));
+}
+
+/* static */ int
+CharacterRange::InsertRangeInCanonicalList(CharacterRangeVector& list,
+ int count,
+ CharacterRange insert)
+{
+ // Inserts a range into list[0..count[, which must be sorted
+ // by from value and non-overlapping and non-adjacent, using at most
+ // list[0..count] for the result. Returns the number of resulting
+ // canonicalized ranges. Inserting a range may collapse existing ranges into
+ // fewer ranges, so the return value can be anything in the range 1..count+1.
+ char16_t from = insert.from();
+ char16_t to = insert.to();
+ int start_pos = 0;
+ int end_pos = count;
+ for (int i = count - 1; i >= 0; i--) {
+ CharacterRange current = list[i];
+ if (current.from() > to + 1) {
+ end_pos = i;
+ } else if (current.to() + 1 < from) {
+ start_pos = i + 1;
+ break;
+ }
+ }
+
+ // Inserted range overlaps, or is adjacent to, ranges at positions
+ // [start_pos..end_pos[. Ranges before start_pos or at or after end_pos are
+ // not affected by the insertion.
+ // If start_pos == end_pos, the range must be inserted before start_pos.
+ // if start_pos < end_pos, the entire range from start_pos to end_pos
+ // must be merged with the insert range.
+
+ if (start_pos == end_pos) {
+ // Insert between existing ranges at position start_pos.
+ if (start_pos < count) {
+ list.moveReplace(start_pos, start_pos + 1, count - start_pos);
+ }
+ list[start_pos] = insert;
+ return count + 1;
+ }
+ if (start_pos + 1 == end_pos) {
+ // Replace single existing range at position start_pos.
+ CharacterRange to_replace = list[start_pos];
+ int new_from = Min(to_replace.from(), from);
+ int new_to = Max(to_replace.to(), to);
+ list[start_pos] = CharacterRange(new_from, new_to);
+ return count;
+ }
+ // Replace a number of existing ranges from start_pos to end_pos - 1.
+ // Move the remaining ranges down.
+
+ int new_from = Min(list[start_pos].from(), from);
+ int new_to = Max(list[end_pos - 1].to(), to);
+ if (end_pos < count) {
+ list.moveReplace(end_pos, start_pos + 1, count - end_pos);
+ }
+ list[start_pos] = CharacterRange(new_from, new_to);
+ return count - (end_pos - start_pos) + 1;
+}
+
+int
+irregexp::GetCaseIndependentLetters(char16_t character,
+ bool ascii_subject,
+ bool unicode,
+ const char16_t* choices,
+ size_t choices_length,
+ char16_t* letters)
+{
+ size_t count = 0;
+ for (size_t i = 0; i < choices_length; i++) {
+ char16_t c = choices[i];
+
+ // Skip characters that can't appear in one byte strings.
+ if (!unicode && ascii_subject && c > kMaxOneByteCharCode)
+ continue;
+
+ // Watch for duplicates.
+ bool found = false;
+ for (size_t j = 0; j < count; j++) {
+ if (letters[j] == c) {
+ found = true;
+ break;
+ }
+ }
+ if (found)
+ continue;
+
+ letters[count++] = c;
+ }
+
+ return count;
+}
+
+int
+irregexp::GetCaseIndependentLetters(char16_t character,
+ bool ascii_subject,
+ bool unicode,
+ char16_t* letters)
+{
+ if (unicode) {
+ const char16_t choices[] = {
+ character,
+ unicode::FoldCase(character),
+ unicode::ReverseFoldCase1(character),
+ unicode::ReverseFoldCase2(character),
+ unicode::ReverseFoldCase3(character),
+ };
+ return GetCaseIndependentLetters(character, ascii_subject, unicode,
+ choices, ArrayLength(choices), letters);
+ }
+
+ char16_t upper = unicode::ToUpperCase(character);
+ unicode::CodepointsWithSameUpperCase others(character);
+ char16_t other1 = others.other1();
+ char16_t other2 = others.other2();
+ char16_t other3 = others.other3();
+
+ // ES 2017 draft 996af87b7072b3c3dd2b1def856c66f456102215 21.2.4.2
+ // step 3.g.
+ // The standard requires that non-ASCII characters cannot have ASCII
+ // character codes in their equivalence class, even though this
+ // situation occurs multiple times in the Unicode tables.
+ static const unsigned kMaxAsciiCharCode = 127;
+ if (upper <= kMaxAsciiCharCode) {
+ if (character > kMaxAsciiCharCode) {
+ // If Canonicalize(character) == character, all other characters
+ // should be ignored.
+ return GetCaseIndependentLetters(character, ascii_subject, unicode,
+ &character, 1, letters);
+ }
+
+ if (other1 > kMaxAsciiCharCode)
+ other1 = character;
+ if (other2 > kMaxAsciiCharCode)
+ other2 = character;
+ if (other3 > kMaxAsciiCharCode)
+ other3 = character;
+ }
+
+ const char16_t choices[] = {
+ character,
+ upper,
+ other1,
+ other2,
+ other3
+ };
+ return GetCaseIndependentLetters(character, ascii_subject, unicode,
+ choices, ArrayLength(choices), letters);
+}
+
+/*
+Generated from following Node.js source:
+
+package.json
+
+```
+{
+ "private": true,
+ "dependencies": {
+ "unicode-12.0.0": "^0.7.9"
+ }
+}
+```
+
+generate-unicode-sequence-property-data.js
+
+```
+const toHex = (symbol) => {
+ return '0x' + symbol.codePointAt(0).toString(16)
+ .toUpperCase().padStart(6, '0');
+};
+
+const generateData = (property) => {
+ const sequences =
+ require(`unicode-12.0.0/Sequence_Property/${ property }/index.js`);
+ const id = property.replace(/_/g, '') + 's';
+ const buffer = [];
+ for (const sequence of sequences) {
+ const symbols = [...sequence];
+ const codePoints = symbols.map(symbol => toHex(symbol));
+ buffer.push(' ' + codePoints.join(', ') + ', 0,');
+ }
+ const output =
+ `const uc32 UnicodePropertySequences::k${ id }[] = {\n` +
+ `${ buffer.join('\n') }\n 0 // null-terminating the list\n};\n`;
+ return output;
+};
+
+const properties = [
+ 'Emoji_Flag_Sequence',
+ 'Emoji_Tag_Sequence',
+ 'Emoji_ZWJ_Sequence',
+];
+
+for (const property of properties) {
+ console.log(generateData(property));
+}
+```
+*/
+
+const widechar js::irregexp::kEmojiFlagSequences[] = {
+ 0x01F1E6, 0x01F1E8, 0,
+ 0x01F1FF, 0x01F1FC, 0,
+ 0x01F1E6, 0x01F1EA, 0,
+ 0x01F1E6, 0x01F1EB, 0,
+ 0x01F1E6, 0x01F1EC, 0,
+ 0x01F1E6, 0x01F1EE, 0,
+ 0x01F1E6, 0x01F1F1, 0,
+ 0x01F1E6, 0x01F1F2, 0,
+ 0x01F1E6, 0x01F1F4, 0,
+ 0x01F1E6, 0x01F1F6, 0,
+ 0x01F1E6, 0x01F1F7, 0,
+ 0x01F1E6, 0x01F1F8, 0,
+ 0x01F1E6, 0x01F1F9, 0,
+ 0x01F1E6, 0x01F1FA, 0,
+ 0x01F1E6, 0x01F1FC, 0,
+ 0x01F1E6, 0x01F1FD, 0,
+ 0x01F1E6, 0x01F1FF, 0,
+ 0x01F1E7, 0x01F1E6, 0,
+ 0x01F1E7, 0x01F1E7, 0,
+ 0x01F1E7, 0x01F1E9, 0,
+ 0x01F1E7, 0x01F1EA, 0,
+ 0x01F1E7, 0x01F1EB, 0,
+ 0x01F1E7, 0x01F1EC, 0,
+ 0x01F1E7, 0x01F1ED, 0,
+ 0x01F1E7, 0x01F1EE, 0,
+ 0x01F1E7, 0x01F1EF, 0,
+ 0x01F1E7, 0x01F1F1, 0,
+ 0x01F1E7, 0x01F1F2, 0,
+ 0x01F1E7, 0x01F1F3, 0,
+ 0x01F1E7, 0x01F1F4, 0,
+ 0x01F1E7, 0x01F1F6, 0,
+ 0x01F1E7, 0x01F1F7, 0,
+ 0x01F1E7, 0x01F1F8, 0,
+ 0x01F1E7, 0x01F1F9, 0,
+ 0x01F1E7, 0x01F1FB, 0,
+ 0x01F1E7, 0x01F1FC, 0,
+ 0x01F1E7, 0x01F1FE, 0,
+ 0x01F1E7, 0x01F1FF, 0,
+ 0x01F1E8, 0x01F1E6, 0,
+ 0x01F1E8, 0x01F1E8, 0,
+ 0x01F1E8, 0x01F1E9, 0,
+ 0x01F1E8, 0x01F1EB, 0,
+ 0x01F1E8, 0x01F1EC, 0,
+ 0x01F1E8, 0x01F1ED, 0,
+ 0x01F1E8, 0x01F1EE, 0,
+ 0x01F1E8, 0x01F1F0, 0,
+ 0x01F1E8, 0x01F1F1, 0,
+ 0x01F1E8, 0x01F1F2, 0,
+ 0x01F1E8, 0x01F1F3, 0,
+ 0x01F1E8, 0x01F1F4, 0,
+ 0x01F1E8, 0x01F1F5, 0,
+ 0x01F1E8, 0x01F1F7, 0,
+ 0x01F1E8, 0x01F1FA, 0,
+ 0x01F1E8, 0x01F1FB, 0,
+ 0x01F1E8, 0x01F1FC, 0,
+ 0x01F1E8, 0x01F1FD, 0,
+ 0x01F1E8, 0x01F1FE, 0,
+ 0x01F1E8, 0x01F1FF, 0,
+ 0x01F1E9, 0x01F1EA, 0,
+ 0x01F1E9, 0x01F1EC, 0,
+ 0x01F1E9, 0x01F1EF, 0,
+ 0x01F1E9, 0x01F1F0, 0,
+ 0x01F1E9, 0x01F1F2, 0,
+ 0x01F1E9, 0x01F1F4, 0,
+ 0x01F1E9, 0x01F1FF, 0,
+ 0x01F1EA, 0x01F1E6, 0,
+ 0x01F1EA, 0x01F1E8, 0,
+ 0x01F1EA, 0x01F1EA, 0,
+ 0x01F1EA, 0x01F1EC, 0,
+ 0x01F1EA, 0x01F1ED, 0,
+ 0x01F1EA, 0x01F1F7, 0,
+ 0x01F1EA, 0x01F1F8, 0,
+ 0x01F1EA, 0x01F1F9, 0,
+ 0x01F1EA, 0x01F1FA, 0,
+ 0x01F1EB, 0x01F1EE, 0,
+ 0x01F1EB, 0x01F1EF, 0,
+ 0x01F1EB, 0x01F1F0, 0,
+ 0x01F1EB, 0x01F1F2, 0,
+ 0x01F1EB, 0x01F1F4, 0,
+ 0x01F1EB, 0x01F1F7, 0,
+ 0x01F1EC, 0x01F1E6, 0,
+ 0x01F1EC, 0x01F1E7, 0,
+ 0x01F1EC, 0x01F1E9, 0,
+ 0x01F1EC, 0x01F1EA, 0,
+ 0x01F1EC, 0x01F1EB, 0,
+ 0x01F1EC, 0x01F1EC, 0,
+ 0x01F1EC, 0x01F1ED, 0,
+ 0x01F1EC, 0x01F1EE, 0,
+ 0x01F1EC, 0x01F1F1, 0,
+ 0x01F1EC, 0x01F1F2, 0,
+ 0x01F1EC, 0x01F1F3, 0,
+ 0x01F1EC, 0x01F1F5, 0,
+ 0x01F1EC, 0x01F1F6, 0,
+ 0x01F1EC, 0x01F1F7, 0,
+ 0x01F1EC, 0x01F1F8, 0,
+ 0x01F1EC, 0x01F1F9, 0,
+ 0x01F1EC, 0x01F1FA, 0,
+ 0x01F1EC, 0x01F1FC, 0,
+ 0x01F1EC, 0x01F1FE, 0,
+ 0x01F1ED, 0x01F1F0, 0,
+ 0x01F1ED, 0x01F1F2, 0,
+ 0x01F1ED, 0x01F1F3, 0,
+ 0x01F1ED, 0x01F1F7, 0,
+ 0x01F1ED, 0x01F1F9, 0,
+ 0x01F1ED, 0x01F1FA, 0,
+ 0x01F1EE, 0x01F1E8, 0,
+ 0x01F1EE, 0x01F1E9, 0,
+ 0x01F1EE, 0x01F1EA, 0,
+ 0x01F1EE, 0x01F1F1, 0,
+ 0x01F1EE, 0x01F1F2, 0,
+ 0x01F1EE, 0x01F1F3, 0,
+ 0x01F1EE, 0x01F1F4, 0,
+ 0x01F1EE, 0x01F1F6, 0,
+ 0x01F1EE, 0x01F1F7, 0,
+ 0x01F1EE, 0x01F1F8, 0,
+ 0x01F1EE, 0x01F1F9, 0,
+ 0x01F1EF, 0x01F1EA, 0,
+ 0x01F1EF, 0x01F1F2, 0,
+ 0x01F1EF, 0x01F1F4, 0,
+ 0x01F1EF, 0x01F1F5, 0,
+ 0x01F1F0, 0x01F1EA, 0,
+ 0x01F1F0, 0x01F1EC, 0,
+ 0x01F1F0, 0x01F1ED, 0,
+ 0x01F1F0, 0x01F1EE, 0,
+ 0x01F1F0, 0x01F1F2, 0,
+ 0x01F1F0, 0x01F1F3, 0,
+ 0x01F1F0, 0x01F1F5, 0,
+ 0x01F1F0, 0x01F1F7, 0,
+ 0x01F1F0, 0x01F1FC, 0,
+ 0x01F1E6, 0x01F1E9, 0,
+ 0x01F1F0, 0x01F1FF, 0,
+ 0x01F1F1, 0x01F1E6, 0,
+ 0x01F1F1, 0x01F1E7, 0,
+ 0x01F1F1, 0x01F1E8, 0,
+ 0x01F1F1, 0x01F1EE, 0,
+ 0x01F1F1, 0x01F1F0, 0,
+ 0x01F1F1, 0x01F1F7, 0,
+ 0x01F1F1, 0x01F1F8, 0,
+ 0x01F1F1, 0x01F1F9, 0,
+ 0x01F1F1, 0x01F1FA, 0,
+ 0x01F1F1, 0x01F1FB, 0,
+ 0x01F1F1, 0x01F1FE, 0,
+ 0x01F1F2, 0x01F1E6, 0,
+ 0x01F1F2, 0x01F1E8, 0,
+ 0x01F1F2, 0x01F1E9, 0,
+ 0x01F1F2, 0x01F1EA, 0,
+ 0x01F1F2, 0x01F1EB, 0,
+ 0x01F1F2, 0x01F1EC, 0,
+ 0x01F1F2, 0x01F1ED, 0,
+ 0x01F1F2, 0x01F1F0, 0,
+ 0x01F1F2, 0x01F1F1, 0,
+ 0x01F1F2, 0x01F1F2, 0,
+ 0x01F1F2, 0x01F1F3, 0,
+ 0x01F1F2, 0x01F1F4, 0,
+ 0x01F1F2, 0x01F1F5, 0,
+ 0x01F1F2, 0x01F1F6, 0,
+ 0x01F1F2, 0x01F1F7, 0,
+ 0x01F1F2, 0x01F1F8, 0,
+ 0x01F1F2, 0x01F1F9, 0,
+ 0x01F1F2, 0x01F1FA, 0,
+ 0x01F1F2, 0x01F1FB, 0,
+ 0x01F1F2, 0x01F1FC, 0,
+ 0x01F1F2, 0x01F1FD, 0,
+ 0x01F1F2, 0x01F1FE, 0,
+ 0x01F1F2, 0x01F1FF, 0,
+ 0x01F1F3, 0x01F1E6, 0,
+ 0x01F1F3, 0x01F1E8, 0,
+ 0x01F1F3, 0x01F1EA, 0,
+ 0x01F1F3, 0x01F1EB, 0,
+ 0x01F1F3, 0x01F1EC, 0,
+ 0x01F1F3, 0x01F1EE, 0,
+ 0x01F1F3, 0x01F1F1, 0,
+ 0x01F1F3, 0x01F1F4, 0,
+ 0x01F1F3, 0x01F1F5, 0,
+ 0x01F1F3, 0x01F1F7, 0,
+ 0x01F1F3, 0x01F1FA, 0,
+ 0x01F1F3, 0x01F1FF, 0,
+ 0x01F1F4, 0x01F1F2, 0,
+ 0x01F1F5, 0x01F1E6, 0,
+ 0x01F1F5, 0x01F1EA, 0,
+ 0x01F1F5, 0x01F1EB, 0,
+ 0x01F1F5, 0x01F1EC, 0,
+ 0x01F1F5, 0x01F1ED, 0,
+ 0x01F1F5, 0x01F1F0, 0,
+ 0x01F1F5, 0x01F1F1, 0,
+ 0x01F1F5, 0x01F1F2, 0,
+ 0x01F1F5, 0x01F1F3, 0,
+ 0x01F1F5, 0x01F1F7, 0,
+ 0x01F1F5, 0x01F1F8, 0,
+ 0x01F1F5, 0x01F1F9, 0,
+ 0x01F1F5, 0x01F1FC, 0,
+ 0x01F1F5, 0x01F1FE, 0,
+ 0x01F1F6, 0x01F1E6, 0,
+ 0x01F1F7, 0x01F1EA, 0,
+ 0x01F1F7, 0x01F1F4, 0,
+ 0x01F1F7, 0x01F1F8, 0,
+ 0x01F1F7, 0x01F1FA, 0,
+ 0x01F1F7, 0x01F1FC, 0,
+ 0x01F1F8, 0x01F1E6, 0,
+ 0x01F1F8, 0x01F1E7, 0,
+ 0x01F1F8, 0x01F1E8, 0,
+ 0x01F1F8, 0x01F1E9, 0,
+ 0x01F1F8, 0x01F1EA, 0,
+ 0x01F1F8, 0x01F1EC, 0,
+ 0x01F1F8, 0x01F1ED, 0,
+ 0x01F1F8, 0x01F1EE, 0,
+ 0x01F1F8, 0x01F1EF, 0,
+ 0x01F1F8, 0x01F1F0, 0,
+ 0x01F1F8, 0x01F1F1, 0,
+ 0x01F1F8, 0x01F1F2, 0,
+ 0x01F1F8, 0x01F1F3, 0,
+ 0x01F1F8, 0x01F1F4, 0,
+ 0x01F1F8, 0x01F1F7, 0,
+ 0x01F1F8, 0x01F1F8, 0,
+ 0x01F1F8, 0x01F1F9, 0,
+ 0x01F1F8, 0x01F1FB, 0,
+ 0x01F1F8, 0x01F1FD, 0,
+ 0x01F1F8, 0x01F1FE, 0,
+ 0x01F1F8, 0x01F1FF, 0,
+ 0x01F1F9, 0x01F1E6, 0,
+ 0x01F1F9, 0x01F1E8, 0,
+ 0x01F1F9, 0x01F1E9, 0,
+ 0x01F1F9, 0x01F1EB, 0,
+ 0x01F1F9, 0x01F1EC, 0,
+ 0x01F1F9, 0x01F1ED, 0,
+ 0x01F1F9, 0x01F1EF, 0,
+ 0x01F1F9, 0x01F1F0, 0,
+ 0x01F1F9, 0x01F1F1, 0,
+ 0x01F1F9, 0x01F1F2, 0,
+ 0x01F1F9, 0x01F1F3, 0,
+ 0x01F1F9, 0x01F1F4, 0,
+ 0x01F1F9, 0x01F1F7, 0,
+ 0x01F1F9, 0x01F1F9, 0,
+ 0x01F1F9, 0x01F1FB, 0,
+ 0x01F1F9, 0x01F1FC, 0,
+ 0x01F1F9, 0x01F1FF, 0,
+ 0x01F1FA, 0x01F1E6, 0,
+ 0x01F1FA, 0x01F1EC, 0,
+ 0x01F1FA, 0x01F1F2, 0,
+ 0x01F1FA, 0x01F1F3, 0,
+ 0x01F1FA, 0x01F1F8, 0,
+ 0x01F1FA, 0x01F1FE, 0,
+ 0x01F1FA, 0x01F1FF, 0,
+ 0x01F1FB, 0x01F1E6, 0,
+ 0x01F1FB, 0x01F1E8, 0,
+ 0x01F1FB, 0x01F1EA, 0,
+ 0x01F1FB, 0x01F1EC, 0,
+ 0x01F1FB, 0x01F1EE, 0,
+ 0x01F1FB, 0x01F1F3, 0,
+ 0x01F1FB, 0x01F1FA, 0,
+ 0x01F1FC, 0x01F1EB, 0,
+ 0x01F1FC, 0x01F1F8, 0,
+ 0x01F1FD, 0x01F1F0, 0,
+ 0x01F1FE, 0x01F1EA, 0,
+ 0x01F1FE, 0x01F1F9, 0,
+ 0x01F1FF, 0x01F1E6, 0,
+ 0x01F1FF, 0x01F1F2, 0,
+ 0x01F1F0, 0x01F1FE, 0,
+ 0 // null-terminating the list
+};
+
+const widechar js::irregexp::kEmojiTagSequences[] = {
+ 0x01F3F4, 0x0E0067, 0x0E0062, 0x0E0065, 0x0E006E, 0x0E0067, 0x0E007F, 0,
+ 0x01F3F4, 0x0E0067, 0x0E0062, 0x0E0073, 0x0E0063, 0x0E0074, 0x0E007F, 0,
+ 0x01F3F4, 0x0E0067, 0x0E0062, 0x0E0077, 0x0E006C, 0x0E0073, 0x0E007F, 0,
+ 0 // null-terminating the list
+};
+
+const widechar js::irregexp::kEmojiZWJSequences[] = {
+ 0x01F468, 0x00200D, 0x002764, 0x00FE0F, 0x00200D, 0x01F468, 0,
+ 0x01F441, 0x00FE0F, 0x00200D, 0x01F5E8, 0x00FE0F, 0,
+ 0x01F468, 0x00200D, 0x01F466, 0,
+ 0x01F468, 0x00200D, 0x01F466, 0x00200D, 0x01F466, 0,
+ 0x01F468, 0x00200D, 0x01F467, 0,
+ 0x01F468, 0x00200D, 0x01F467, 0x00200D, 0x01F466, 0,
+ 0x01F468, 0x00200D, 0x01F467, 0x00200D, 0x01F467, 0,
+ 0x01F468, 0x00200D, 0x01F468, 0x00200D, 0x01F466, 0,
+ 0x01F468, 0x00200D, 0x01F468, 0x00200D, 0x01F466, 0x00200D, 0x01F466, 0,
+ 0x01F468, 0x00200D, 0x01F468, 0x00200D, 0x01F467, 0,
+ 0x01F468, 0x00200D, 0x01F468, 0x00200D, 0x01F467, 0x00200D, 0x01F466, 0,
+ 0x01F468, 0x00200D, 0x01F468, 0x00200D, 0x01F467, 0x00200D, 0x01F467, 0,
+ 0x01F468, 0x00200D, 0x01F469, 0x00200D, 0x01F466, 0,
+ 0x01F468, 0x00200D, 0x01F469, 0x00200D, 0x01F466, 0x00200D, 0x01F466, 0,
+ 0x01F468, 0x00200D, 0x01F469, 0x00200D, 0x01F467, 0,
+ 0x01F468, 0x00200D, 0x01F469, 0x00200D, 0x01F467, 0x00200D, 0x01F466, 0,
+ 0x01F468, 0x00200D, 0x01F469, 0x00200D, 0x01F467, 0x00200D, 0x01F467, 0,
+ 0x01F468, 0x01F3FC, 0x00200D, 0x01F91D, 0x00200D, 0x01F468, 0x01F3FB, 0,
+ 0x01F468, 0x01F3FD, 0x00200D, 0x01F91D, 0x00200D, 0x01F468, 0x01F3FB, 0,
+ 0x01F468, 0x01F3FD, 0x00200D, 0x01F91D, 0x00200D, 0x01F468, 0x01F3FC, 0,
+ 0x01F468, 0x01F3FE, 0x00200D, 0x01F91D, 0x00200D, 0x01F468, 0x01F3FB, 0,
+ 0x01F468, 0x01F3FE, 0x00200D, 0x01F91D, 0x00200D, 0x01F468, 0x01F3FC, 0,
+ 0x01F468, 0x01F3FE, 0x00200D, 0x01F91D, 0x00200D, 0x01F468, 0x01F3FD, 0,
+ 0x01F468, 0x01F3FF, 0x00200D, 0x01F91D, 0x00200D, 0x01F468, 0x01F3FB, 0,
+ 0x01F468, 0x01F3FF, 0x00200D, 0x01F91D, 0x00200D, 0x01F468, 0x01F3FC, 0,
+ 0x01F468, 0x01F3FF, 0x00200D, 0x01F91D, 0x00200D, 0x01F468, 0x01F3FD, 0,
+ 0x01F468, 0x01F3FF, 0x00200D, 0x01F91D, 0x00200D, 0x01F468, 0x01F3FE, 0,
+ 0x01F469, 0x00200D, 0x002764, 0x00FE0F, 0x00200D, 0x01F468, 0,
+ 0x01F469, 0x00200D, 0x002764, 0x00FE0F, 0x00200D, 0x01F469, 0,
+ 0x01F469, 0x00200D, 0x002764, 0x00FE0F, 0x00200D, 0x01F48B, 0x00200D,
+ 0x01F468, 0,
+ 0x01F469, 0x00200D, 0x002764, 0x00FE0F, 0x00200D, 0x01F48B, 0x00200D,
+ 0x01F469, 0,
+ 0x01F469, 0x00200D, 0x01F466, 0,
+ 0x01F469, 0x00200D, 0x01F466, 0x00200D, 0x01F466, 0,
+ 0x01F469, 0x00200D, 0x01F467, 0,
+ 0x01F469, 0x00200D, 0x01F467, 0x00200D, 0x01F466, 0,
+ 0x01F469, 0x00200D, 0x01F467, 0x00200D, 0x01F467, 0,
+ 0x01F469, 0x00200D, 0x01F469, 0x00200D, 0x01F466, 0,
+ 0x01F469, 0x00200D, 0x01F469, 0x00200D, 0x01F466, 0x00200D, 0x01F466, 0,
+ 0x01F469, 0x00200D, 0x01F469, 0x00200D, 0x01F467, 0,
+ 0x01F469, 0x00200D, 0x01F469, 0x00200D, 0x01F467, 0x00200D, 0x01F466, 0,
+ 0x01F469, 0x00200D, 0x01F469, 0x00200D, 0x01F467, 0x00200D, 0x01F467, 0,
+ 0x01F469, 0x01F3FB, 0x00200D, 0x01F91D, 0x00200D, 0x01F468, 0x01F3FC, 0,
+ 0x01F469, 0x01F3FB, 0x00200D, 0x01F91D, 0x00200D, 0x01F468, 0x01F3FD, 0,
+ 0x01F469, 0x01F3FB, 0x00200D, 0x01F91D, 0x00200D, 0x01F468, 0x01F3FE, 0,
+ 0x01F469, 0x01F3FB, 0x00200D, 0x01F91D, 0x00200D, 0x01F468, 0x01F3FF, 0,
+ 0x01F469, 0x01F3FC, 0x00200D, 0x01F91D, 0x00200D, 0x01F468, 0x01F3FB, 0,
+ 0x01F469, 0x01F3FC, 0x00200D, 0x01F91D, 0x00200D, 0x01F468, 0x01F3FD, 0,
+ 0x01F469, 0x01F3FC, 0x00200D, 0x01F91D, 0x00200D, 0x01F468, 0x01F3FE, 0,
+ 0x01F469, 0x01F3FC, 0x00200D, 0x01F91D, 0x00200D, 0x01F468, 0x01F3FF, 0,
+ 0x01F469, 0x01F3FC, 0x00200D, 0x01F91D, 0x00200D, 0x01F469, 0x01F3FB, 0,
+ 0x01F469, 0x01F3FD, 0x00200D, 0x01F91D, 0x00200D, 0x01F468, 0x01F3FB, 0,
+ 0x01F469, 0x01F3FD, 0x00200D, 0x01F91D, 0x00200D, 0x01F468, 0x01F3FC, 0,
+ 0x01F469, 0x01F3FD, 0x00200D, 0x01F91D, 0x00200D, 0x01F468, 0x01F3FE, 0,
+ 0x01F469, 0x01F3FD, 0x00200D, 0x01F91D, 0x00200D, 0x01F468, 0x01F3FF, 0,
+ 0x01F469, 0x01F3FD, 0x00200D, 0x01F91D, 0x00200D, 0x01F469, 0x01F3FB, 0,
+ 0x01F469, 0x01F3FD, 0x00200D, 0x01F91D, 0x00200D, 0x01F469, 0x01F3FC, 0,
+ 0x01F469, 0x01F3FE, 0x00200D, 0x01F91D, 0x00200D, 0x01F468, 0x01F3FB, 0,
+ 0x01F469, 0x01F3FE, 0x00200D, 0x01F91D, 0x00200D, 0x01F468, 0x01F3FC, 0,
+ 0x01F469, 0x01F3FE, 0x00200D, 0x01F91D, 0x00200D, 0x01F468, 0x01F3FD, 0,
+ 0x01F469, 0x01F3FE, 0x00200D, 0x01F91D, 0x00200D, 0x01F468, 0x01F3FF, 0,
+ 0x01F469, 0x01F3FE, 0x00200D, 0x01F91D, 0x00200D, 0x01F469, 0x01F3FB, 0,
+ 0x01F469, 0x01F3FE, 0x00200D, 0x01F91D, 0x00200D, 0x01F469, 0x01F3FC, 0,
+ 0x01F469, 0x01F3FE, 0x00200D, 0x01F91D, 0x00200D, 0x01F469, 0x01F3FD, 0,
+ 0x01F469, 0x01F3FF, 0x00200D, 0x01F91D, 0x00200D, 0x01F468, 0x01F3FB, 0,
+ 0x01F469, 0x01F3FF, 0x00200D, 0x01F91D, 0x00200D, 0x01F468, 0x01F3FC, 0,
+ 0x01F469, 0x01F3FF, 0x00200D, 0x01F91D, 0x00200D, 0x01F468, 0x01F3FD, 0,
+ 0x01F469, 0x01F3FF, 0x00200D, 0x01F91D, 0x00200D, 0x01F468, 0x01F3FE, 0,
+ 0x01F469, 0x01F3FF, 0x00200D, 0x01F91D, 0x00200D, 0x01F469, 0x01F3FB, 0,
+ 0x01F469, 0x01F3FF, 0x00200D, 0x01F91D, 0x00200D, 0x01F469, 0x01F3FC, 0,
+ 0x01F469, 0x01F3FF, 0x00200D, 0x01F91D, 0x00200D, 0x01F469, 0x01F3FD, 0,
+ 0x01F469, 0x01F3FF, 0x00200D, 0x01F91D, 0x00200D, 0x01F469, 0x01F3FE, 0,
+ 0x01F9D1, 0x00200D, 0x01F91D, 0x00200D, 0x01F9D1, 0,
+ 0x01F9D1, 0x01F3FB, 0x00200D, 0x01F91D, 0x00200D, 0x01F9D1, 0x01F3FB, 0,
+ 0x01F9D1, 0x01F3FC, 0x00200D, 0x01F91D, 0x00200D, 0x01F9D1, 0x01F3FB, 0,
+ 0x01F9D1, 0x01F3FC, 0x00200D, 0x01F91D, 0x00200D, 0x01F9D1, 0x01F3FC, 0,
+ 0x01F9D1, 0x01F3FD, 0x00200D, 0x01F91D, 0x00200D, 0x01F9D1, 0x01F3FB, 0,
+ 0x01F9D1, 0x01F3FD, 0x00200D, 0x01F91D, 0x00200D, 0x01F9D1, 0x01F3FC, 0,
+ 0x01F9D1, 0x01F3FD, 0x00200D, 0x01F91D, 0x00200D, 0x01F9D1, 0x01F3FD, 0,
+ 0x01F9D1, 0x01F3FE, 0x00200D, 0x01F91D, 0x00200D, 0x01F9D1, 0x01F3FB, 0,
+ 0x01F9D1, 0x01F3FE, 0x00200D, 0x01F91D, 0x00200D, 0x01F9D1, 0x01F3FC, 0,
+ 0x01F9D1, 0x01F3FE, 0x00200D, 0x01F91D, 0x00200D, 0x01F9D1, 0x01F3FD, 0,
+ 0x01F9D1, 0x01F3FE, 0x00200D, 0x01F91D, 0x00200D, 0x01F9D1, 0x01F3FE, 0,
+ 0x01F9D1, 0x01F3FF, 0x00200D, 0x01F91D, 0x00200D, 0x01F9D1, 0x01F3FB, 0,
+ 0x01F9D1, 0x01F3FF, 0x00200D, 0x01F91D, 0x00200D, 0x01F9D1, 0x01F3FC, 0,
+ 0x01F9D1, 0x01F3FF, 0x00200D, 0x01F91D, 0x00200D, 0x01F9D1, 0x01F3FD, 0,
+ 0x01F9D1, 0x01F3FF, 0x00200D, 0x01F91D, 0x00200D, 0x01F9D1, 0x01F3FE, 0,
+ 0x01F9D1, 0x01F3FF, 0x00200D, 0x01F91D, 0x00200D, 0x01F9D1, 0x01F3FF, 0,
+ 0x01F468, 0x00200D, 0x002695, 0x00FE0F, 0,
+ 0x01F468, 0x00200D, 0x002696, 0x00FE0F, 0,
+ 0x01F468, 0x00200D, 0x002708, 0x00FE0F, 0,
+ 0x01F468, 0x00200D, 0x01F33E, 0,
+ 0x01F468, 0x00200D, 0x01F373, 0,
+ 0x01F468, 0x00200D, 0x01F393, 0,
+ 0x01F468, 0x00200D, 0x01F3A4, 0,
+ 0x01F468, 0x00200D, 0x01F3A8, 0,
+ 0x01F468, 0x00200D, 0x01F3EB, 0,
+ 0x01F468, 0x00200D, 0x01F3ED, 0,
+ 0x01F468, 0x00200D, 0x01F4BB, 0,
+ 0x01F468, 0x00200D, 0x01F4BC, 0,
+ 0x01F468, 0x00200D, 0x01F527, 0,
+ 0x01F468, 0x00200D, 0x01F52C, 0,
+ 0x01F468, 0x00200D, 0x01F680, 0,
+ 0x01F468, 0x00200D, 0x01F692, 0,
+ 0x01F468, 0x00200D, 0x01F9AF, 0,
+ 0x01F468, 0x00200D, 0x01F9BC, 0,
+ 0x01F468, 0x00200D, 0x01F9BD, 0,
+ 0x01F468, 0x01F3FB, 0x00200D, 0x002695, 0x00FE0F, 0,
+ 0x01F468, 0x01F3FB, 0x00200D, 0x002696, 0x00FE0F, 0,
+ 0x01F468, 0x01F3FB, 0x00200D, 0x002708, 0x00FE0F, 0,
+ 0x01F468, 0x01F3FB, 0x00200D, 0x01F33E, 0,
+ 0x01F468, 0x01F3FB, 0x00200D, 0x01F373, 0,
+ 0x01F468, 0x01F3FB, 0x00200D, 0x01F393, 0,
+ 0x01F468, 0x01F3FB, 0x00200D, 0x01F3A4, 0,
+ 0x01F468, 0x01F3FB, 0x00200D, 0x01F3A8, 0,
+ 0x01F468, 0x01F3FB, 0x00200D, 0x01F3EB, 0,
+ 0x01F468, 0x01F3FB, 0x00200D, 0x01F3ED, 0,
+ 0x01F468, 0x01F3FB, 0x00200D, 0x01F4BB, 0,
+ 0x01F468, 0x01F3FB, 0x00200D, 0x01F4BC, 0,
+ 0x01F468, 0x01F3FB, 0x00200D, 0x01F527, 0,
+ 0x01F468, 0x01F3FB, 0x00200D, 0x01F52C, 0,
+ 0x01F468, 0x01F3FB, 0x00200D, 0x01F680, 0,
+ 0x01F468, 0x01F3FB, 0x00200D, 0x01F692, 0,
+ 0x01F468, 0x01F3FB, 0x00200D, 0x01F9AF, 0,
+ 0x01F468, 0x01F3FB, 0x00200D, 0x01F9BC, 0,
+ 0x01F468, 0x01F3FB, 0x00200D, 0x01F9BD, 0,
+ 0x01F468, 0x01F3FC, 0x00200D, 0x002695, 0x00FE0F, 0,
+ 0x01F468, 0x01F3FC, 0x00200D, 0x002696, 0x00FE0F, 0,
+ 0x01F468, 0x01F3FC, 0x00200D, 0x002708, 0x00FE0F, 0,
+ 0x01F468, 0x01F3FC, 0x00200D, 0x01F33E, 0,
+ 0x01F468, 0x01F3FC, 0x00200D, 0x01F373, 0,
+ 0x01F468, 0x01F3FC, 0x00200D, 0x01F393, 0,
+ 0x01F468, 0x01F3FC, 0x00200D, 0x01F3A4, 0,
+ 0x01F468, 0x01F3FC, 0x00200D, 0x01F3A8, 0,
+ 0x01F468, 0x01F3FC, 0x00200D, 0x01F3EB, 0,
+ 0x01F468, 0x01F3FC, 0x00200D, 0x01F3ED, 0,
+ 0x01F468, 0x01F3FC, 0x00200D, 0x01F4BB, 0,
+ 0x01F468, 0x01F3FC, 0x00200D, 0x01F4BC, 0,
+ 0x01F468, 0x01F3FC, 0x00200D, 0x01F527, 0,
+ 0x01F468, 0x01F3FC, 0x00200D, 0x01F52C, 0,
+ 0x01F468, 0x01F3FC, 0x00200D, 0x01F680, 0,
+ 0x01F468, 0x01F3FC, 0x00200D, 0x01F692, 0,
+ 0x01F468, 0x01F3FC, 0x00200D, 0x01F9AF, 0,
+ 0x01F468, 0x01F3FC, 0x00200D, 0x01F9BC, 0,
+ 0x01F468, 0x01F3FC, 0x00200D, 0x01F9BD, 0,
+ 0x01F468, 0x01F3FD, 0x00200D, 0x002695, 0x00FE0F, 0,
+ 0x01F468, 0x01F3FD, 0x00200D, 0x002696, 0x00FE0F, 0,
+ 0x01F468, 0x01F3FD, 0x00200D, 0x002708, 0x00FE0F, 0,
+ 0x01F468, 0x01F3FD, 0x00200D, 0x01F33E, 0,
+ 0x01F468, 0x01F3FD, 0x00200D, 0x01F373, 0,
+ 0x01F468, 0x01F3FD, 0x00200D, 0x01F393, 0,
+ 0x01F468, 0x01F3FD, 0x00200D, 0x01F3A4, 0,
+ 0x01F468, 0x01F3FD, 0x00200D, 0x01F3A8, 0,
+ 0x01F468, 0x01F3FD, 0x00200D, 0x01F3EB, 0,
+ 0x01F468, 0x01F3FD, 0x00200D, 0x01F3ED, 0,
+ 0x01F468, 0x01F3FD, 0x00200D, 0x01F4BB, 0,
+ 0x01F468, 0x01F3FD, 0x00200D, 0x01F4BC, 0,
+ 0x01F468, 0x01F3FD, 0x00200D, 0x01F527, 0,
+ 0x01F468, 0x01F3FD, 0x00200D, 0x01F52C, 0,
+ 0x01F468, 0x01F3FD, 0x00200D, 0x01F680, 0,
+ 0x01F468, 0x01F3FD, 0x00200D, 0x01F692, 0,
+ 0x01F468, 0x01F3FD, 0x00200D, 0x01F9AF, 0,
+ 0x01F468, 0x01F3FD, 0x00200D, 0x01F9BC, 0,
+ 0x01F468, 0x01F3FD, 0x00200D, 0x01F9BD, 0,
+ 0x01F468, 0x01F3FE, 0x00200D, 0x002695, 0x00FE0F, 0,
+ 0x01F468, 0x01F3FE, 0x00200D, 0x002696, 0x00FE0F, 0,
+ 0x01F468, 0x01F3FE, 0x00200D, 0x002708, 0x00FE0F, 0,
+ 0x01F468, 0x01F3FE, 0x00200D, 0x01F33E, 0,
+ 0x01F468, 0x01F3FE, 0x00200D, 0x01F373, 0,
+ 0x01F468, 0x01F3FE, 0x00200D, 0x01F393, 0,
+ 0x01F468, 0x01F3FE, 0x00200D, 0x01F3A4, 0,
+ 0x01F468, 0x01F3FE, 0x00200D, 0x01F3A8, 0,
+ 0x01F468, 0x01F3FE, 0x00200D, 0x01F3EB, 0,
+ 0x01F468, 0x01F3FE, 0x00200D, 0x01F3ED, 0,
+ 0x01F468, 0x01F3FE, 0x00200D, 0x01F4BB, 0,
+ 0x01F468, 0x01F3FE, 0x00200D, 0x01F4BC, 0,
+ 0x01F468, 0x01F3FE, 0x00200D, 0x01F527, 0,
+ 0x01F468, 0x01F3FE, 0x00200D, 0x01F52C, 0,
+ 0x01F468, 0x01F3FE, 0x00200D, 0x01F680, 0,
+ 0x01F468, 0x01F3FE, 0x00200D, 0x01F692, 0,
+ 0x01F468, 0x01F3FE, 0x00200D, 0x01F9AF, 0,
+ 0x01F468, 0x01F3FE, 0x00200D, 0x01F9BC, 0,
+ 0x01F468, 0x01F3FE, 0x00200D, 0x01F9BD, 0,
+ 0x01F468, 0x01F3FF, 0x00200D, 0x002695, 0x00FE0F, 0,
+ 0x01F468, 0x01F3FF, 0x00200D, 0x002696, 0x00FE0F, 0,
+ 0x01F468, 0x01F3FF, 0x00200D, 0x002708, 0x00FE0F, 0,
+ 0x01F468, 0x01F3FF, 0x00200D, 0x01F33E, 0,
+ 0x01F468, 0x01F3FF, 0x00200D, 0x01F373, 0,
+ 0x01F468, 0x01F3FF, 0x00200D, 0x01F393, 0,
+ 0x01F468, 0x01F3FF, 0x00200D, 0x01F3A4, 0,
+ 0x01F468, 0x01F3FF, 0x00200D, 0x01F3A8, 0,
+ 0x01F468, 0x01F3FF, 0x00200D, 0x01F3EB, 0,
+ 0x01F468, 0x01F3FF, 0x00200D, 0x01F3ED, 0,
+ 0x01F468, 0x01F3FF, 0x00200D, 0x01F4BB, 0,
+ 0x01F468, 0x01F3FF, 0x00200D, 0x01F4BC, 0,
+ 0x01F468, 0x01F3FF, 0x00200D, 0x01F527, 0,
+ 0x01F468, 0x01F3FF, 0x00200D, 0x01F52C, 0,
+ 0x01F468, 0x01F3FF, 0x00200D, 0x01F680, 0,
+ 0x01F468, 0x01F3FF, 0x00200D, 0x01F692, 0,
+ 0x01F468, 0x01F3FF, 0x00200D, 0x01F9AF, 0,
+ 0x01F468, 0x01F3FF, 0x00200D, 0x01F9BC, 0,
+ 0x01F468, 0x01F3FF, 0x00200D, 0x01F9BD, 0,
+ 0x01F469, 0x00200D, 0x002695, 0x00FE0F, 0,
+ 0x01F469, 0x00200D, 0x002696, 0x00FE0F, 0,
+ 0x01F469, 0x00200D, 0x002708, 0x00FE0F, 0,
+ 0x01F469, 0x00200D, 0x01F33E, 0,
+ 0x01F469, 0x00200D, 0x01F373, 0,
+ 0x01F469, 0x00200D, 0x01F393, 0,
+ 0x01F469, 0x00200D, 0x01F3A4, 0,
+ 0x01F469, 0x00200D, 0x01F3A8, 0,
+ 0x01F469, 0x00200D, 0x01F3EB, 0,
+ 0x01F469, 0x00200D, 0x01F3ED, 0,
+ 0x01F469, 0x00200D, 0x01F4BB, 0,
+ 0x01F469, 0x00200D, 0x01F4BC, 0,
+ 0x01F469, 0x00200D, 0x01F527, 0,
+ 0x01F469, 0x00200D, 0x01F52C, 0,
+ 0x01F469, 0x00200D, 0x01F680, 0,
+ 0x01F469, 0x00200D, 0x01F692, 0,
+ 0x01F469, 0x00200D, 0x01F9AF, 0,
+ 0x01F469, 0x00200D, 0x01F9BC, 0,
+ 0x01F469, 0x00200D, 0x01F9BD, 0,
+ 0x01F469, 0x01F3FB, 0x00200D, 0x002695, 0x00FE0F, 0,
+ 0x01F469, 0x01F3FB, 0x00200D, 0x002696, 0x00FE0F, 0,
+ 0x01F469, 0x01F3FB, 0x00200D, 0x002708, 0x00FE0F, 0,
+ 0x01F469, 0x01F3FB, 0x00200D, 0x01F33E, 0,
+ 0x01F469, 0x01F3FB, 0x00200D, 0x01F373, 0,
+ 0x01F469, 0x01F3FB, 0x00200D, 0x01F393, 0,
+ 0x01F469, 0x01F3FB, 0x00200D, 0x01F3A4, 0,
+ 0x01F469, 0x01F3FB, 0x00200D, 0x01F3A8, 0,
+ 0x01F469, 0x01F3FB, 0x00200D, 0x01F3EB, 0,
+ 0x01F469, 0x01F3FB, 0x00200D, 0x01F3ED, 0,
+ 0x01F469, 0x01F3FB, 0x00200D, 0x01F4BB, 0,
+ 0x01F469, 0x01F3FB, 0x00200D, 0x01F4BC, 0,
+ 0x01F469, 0x01F3FB, 0x00200D, 0x01F527, 0,
+ 0x01F469, 0x01F3FB, 0x00200D, 0x01F52C, 0,
+ 0x01F469, 0x01F3FB, 0x00200D, 0x01F680, 0,
+ 0x01F469, 0x01F3FB, 0x00200D, 0x01F692, 0,
+ 0x01F469, 0x01F3FB, 0x00200D, 0x01F9AF, 0,
+ 0x01F469, 0x01F3FB, 0x00200D, 0x01F9BC, 0,
+ 0x01F469, 0x01F3FB, 0x00200D, 0x01F9BD, 0,
+ 0x01F469, 0x01F3FC, 0x00200D, 0x002695, 0x00FE0F, 0,
+ 0x01F469, 0x01F3FC, 0x00200D, 0x002696, 0x00FE0F, 0,
+ 0x01F469, 0x01F3FC, 0x00200D, 0x002708, 0x00FE0F, 0,
+ 0x01F469, 0x01F3FC, 0x00200D, 0x01F33E, 0,
+ 0x01F469, 0x01F3FC, 0x00200D, 0x01F373, 0,
+ 0x01F469, 0x01F3FC, 0x00200D, 0x01F393, 0,
+ 0x01F469, 0x01F3FC, 0x00200D, 0x01F3A4, 0,
+ 0x01F469, 0x01F3FC, 0x00200D, 0x01F3A8, 0,
+ 0x01F469, 0x01F3FC, 0x00200D, 0x01F3EB, 0,
+ 0x01F469, 0x01F3FC, 0x00200D, 0x01F3ED, 0,
+ 0x01F469, 0x01F3FC, 0x00200D, 0x01F4BB, 0,
+ 0x01F469, 0x01F3FC, 0x00200D, 0x01F4BC, 0,
+ 0x01F469, 0x01F3FC, 0x00200D, 0x01F527, 0,
+ 0x01F469, 0x01F3FC, 0x00200D, 0x01F52C, 0,
+ 0x01F469, 0x01F3FC, 0x00200D, 0x01F680, 0,
+ 0x01F469, 0x01F3FC, 0x00200D, 0x01F692, 0,
+ 0x01F469, 0x01F3FC, 0x00200D, 0x01F9AF, 0,
+ 0x01F469, 0x01F3FC, 0x00200D, 0x01F9BC, 0,
+ 0x01F469, 0x01F3FC, 0x00200D, 0x01F9BD, 0,
+ 0x01F469, 0x01F3FD, 0x00200D, 0x002695, 0x00FE0F, 0,
+ 0x01F469, 0x01F3FD, 0x00200D, 0x002696, 0x00FE0F, 0,
+ 0x01F469, 0x01F3FD, 0x00200D, 0x002708, 0x00FE0F, 0,
+ 0x01F469, 0x01F3FD, 0x00200D, 0x01F33E, 0,
+ 0x01F469, 0x01F3FD, 0x00200D, 0x01F373, 0,
+ 0x01F469, 0x01F3FD, 0x00200D, 0x01F393, 0,
+ 0x01F469, 0x01F3FD, 0x00200D, 0x01F3A4, 0,
+ 0x01F469, 0x01F3FD, 0x00200D, 0x01F3A8, 0,
+ 0x01F469, 0x01F3FD, 0x00200D, 0x01F3EB, 0,
+ 0x01F469, 0x01F3FD, 0x00200D, 0x01F3ED, 0,
+ 0x01F469, 0x01F3FD, 0x00200D, 0x01F4BB, 0,
+ 0x01F469, 0x01F3FD, 0x00200D, 0x01F4BC, 0,
+ 0x01F469, 0x01F3FD, 0x00200D, 0x01F527, 0,
+ 0x01F469, 0x01F3FD, 0x00200D, 0x01F52C, 0,
+ 0x01F469, 0x01F3FD, 0x00200D, 0x01F680, 0,
+ 0x01F469, 0x01F3FD, 0x00200D, 0x01F692, 0,
+ 0x01F469, 0x01F3FD, 0x00200D, 0x01F9AF, 0,
+ 0x01F469, 0x01F3FD, 0x00200D, 0x01F9BC, 0,
+ 0x01F469, 0x01F3FD, 0x00200D, 0x01F9BD, 0,
+ 0x01F469, 0x01F3FE, 0x00200D, 0x002695, 0x00FE0F, 0,
+ 0x01F469, 0x01F3FE, 0x00200D, 0x002696, 0x00FE0F, 0,
+ 0x01F469, 0x01F3FE, 0x00200D, 0x002708, 0x00FE0F, 0,
+ 0x01F469, 0x01F3FE, 0x00200D, 0x01F33E, 0,
+ 0x01F469, 0x01F3FE, 0x00200D, 0x01F373, 0,
+ 0x01F469, 0x01F3FE, 0x00200D, 0x01F393, 0,
+ 0x01F469, 0x01F3FE, 0x00200D, 0x01F3A4, 0,
+ 0x01F469, 0x01F3FE, 0x00200D, 0x01F3A8, 0,
+ 0x01F469, 0x01F3FE, 0x00200D, 0x01F3EB, 0,
+ 0x01F469, 0x01F3FE, 0x00200D, 0x01F3ED, 0,
+ 0x01F469, 0x01F3FE, 0x00200D, 0x01F4BB, 0,
+ 0x01F469, 0x01F3FE, 0x00200D, 0x01F4BC, 0,
+ 0x01F469, 0x01F3FE, 0x00200D, 0x01F527, 0,
+ 0x01F469, 0x01F3FE, 0x00200D, 0x01F52C, 0,
+ 0x01F469, 0x01F3FE, 0x00200D, 0x01F680, 0,
+ 0x01F469, 0x01F3FE, 0x00200D, 0x01F692, 0,
+ 0x01F469, 0x01F3FE, 0x00200D, 0x01F9AF, 0,
+ 0x01F469, 0x01F3FE, 0x00200D, 0x01F9BC, 0,
+ 0x01F469, 0x01F3FE, 0x00200D, 0x01F9BD, 0,
+ 0x01F469, 0x01F3FF, 0x00200D, 0x002695, 0x00FE0F, 0,
+ 0x01F469, 0x01F3FF, 0x00200D, 0x002696, 0x00FE0F, 0,
+ 0x01F469, 0x01F3FF, 0x00200D, 0x002708, 0x00FE0F, 0,
+ 0x01F469, 0x01F3FF, 0x00200D, 0x01F33E, 0,
+ 0x01F469, 0x01F3FF, 0x00200D, 0x01F373, 0,
+ 0x01F469, 0x01F3FF, 0x00200D, 0x01F393, 0,
+ 0x01F469, 0x01F3FF, 0x00200D, 0x01F3A4, 0,
+ 0x01F469, 0x01F3FF, 0x00200D, 0x01F3A8, 0,
+ 0x01F469, 0x01F3FF, 0x00200D, 0x01F3EB, 0,
+ 0x01F469, 0x01F3FF, 0x00200D, 0x01F3ED, 0,
+ 0x01F469, 0x01F3FF, 0x00200D, 0x01F4BB, 0,
+ 0x01F469, 0x01F3FF, 0x00200D, 0x01F4BC, 0,
+ 0x01F469, 0x01F3FF, 0x00200D, 0x01F527, 0,
+ 0x01F469, 0x01F3FF, 0x00200D, 0x01F52C, 0,
+ 0x01F469, 0x01F3FF, 0x00200D, 0x01F680, 0,
+ 0x01F469, 0x01F3FF, 0x00200D, 0x01F692, 0,
+ 0x01F469, 0x01F3FF, 0x00200D, 0x01F9AF, 0,
+ 0x01F469, 0x01F3FF, 0x00200D, 0x01F9BC, 0,
+ 0x01F469, 0x01F3FF, 0x00200D, 0x01F9BD, 0,
+ 0x0026F9, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x0026F9, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x0026F9, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x0026F9, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x0026F9, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x0026F9, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x0026F9, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x0026F9, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x0026F9, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x0026F9, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x0026F9, 0x00FE0F, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x0026F9, 0x00FE0F, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F3C3, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F3C3, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F3C3, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F3C3, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F3C3, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F3C3, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F3C3, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F3C3, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F3C3, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F3C3, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F3C3, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F3C3, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F3C4, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F3C4, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F3C4, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F3C4, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F3C4, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F3C4, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F3C4, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F3C4, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F3C4, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F3C4, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F3C4, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F3C4, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F3CA, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F3CA, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F3CA, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F3CA, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F3CA, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F3CA, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F3CA, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F3CA, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F3CA, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F3CA, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F3CA, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F3CA, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F3CB, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F3CB, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F3CB, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F3CB, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F3CB, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F3CB, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F3CB, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F3CB, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F3CB, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F3CB, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F3CB, 0x00FE0F, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F3CB, 0x00FE0F, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F3CC, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F3CC, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F3CC, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F3CC, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F3CC, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F3CC, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F3CC, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F3CC, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F3CC, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F3CC, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F3CC, 0x00FE0F, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F3CC, 0x00FE0F, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F46E, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F46E, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F46E, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F46E, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F46E, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F46E, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F46E, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F46E, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F46E, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F46E, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F46E, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F46E, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F46F, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F46F, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F471, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F471, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F471, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F471, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F471, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F471, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F471, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F471, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F471, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F471, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F471, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F471, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F473, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F473, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F473, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F473, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F473, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F473, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F473, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F473, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F473, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F473, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F473, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F473, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F477, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F477, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F477, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F477, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F477, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F477, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F477, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F477, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F477, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F477, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F477, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F477, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F481, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F481, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F481, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F481, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F481, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F481, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F481, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F481, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F481, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F481, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F481, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F481, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F482, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F482, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F468, 0x00200D, 0x002764, 0x00FE0F, 0x00200D, 0x01F48B, 0x00200D,
+ 0x01F468, 0,
+ 0x01F482, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F482, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F482, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F482, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F482, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F482, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F482, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F482, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F482, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F486, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F486, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F486, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F486, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F486, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F486, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F486, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F486, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F486, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F486, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F486, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F486, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F487, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F487, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F487, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F487, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F487, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F487, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F487, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F487, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F487, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F487, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F487, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F487, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F575, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F575, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F575, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F575, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F575, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F575, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F575, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F575, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F575, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F575, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F575, 0x00FE0F, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F575, 0x00FE0F, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F645, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F645, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F645, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F645, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F645, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F645, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F645, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F645, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F645, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F645, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F645, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F645, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F646, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F646, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F646, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F646, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F646, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F646, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F646, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F646, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F646, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F646, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F646, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F646, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F647, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F647, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F647, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F647, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F647, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F647, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F647, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F647, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F647, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F647, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F647, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F647, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F64B, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F64B, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F64B, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F64B, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F64B, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F64B, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F64B, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F64B, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F64B, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F64B, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F64B, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F64B, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F64D, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F64D, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F64D, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F64D, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F64D, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F64D, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F64D, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F64D, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F64D, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F64D, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F64D, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F64D, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F64E, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F64E, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F64E, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F64E, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F64E, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F64E, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F64E, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F64E, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F64E, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F64E, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F64E, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F64E, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F6A3, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F6A3, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F6A3, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F6A3, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F6A3, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F6A3, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F6A3, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F6A3, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F6A3, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F6A3, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F6A3, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F6A3, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F6B4, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F6B4, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F6B4, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F6B4, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F6B4, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F6B4, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F6B4, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F6B4, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F6B4, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F6B4, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F6B4, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F6B4, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F6B5, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F6B5, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F6B5, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F6B5, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F6B5, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F6B5, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F6B5, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F6B5, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F6B5, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F6B5, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F6B5, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F6B5, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F6B6, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F6B6, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F6B6, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F6B6, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F6B6, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F6B6, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F6B6, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F6B6, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F6B6, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F6B6, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F6B6, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F6B6, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F926, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F926, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F926, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F926, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F926, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F926, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F926, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F926, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F926, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F926, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F926, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F926, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F937, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F937, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F937, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F937, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F937, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F937, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F937, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F937, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F937, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F937, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F937, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F937, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F938, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F938, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F938, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F938, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F938, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F938, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F938, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F938, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F938, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F938, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F938, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F938, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F939, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F939, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F939, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F939, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F939, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F939, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F939, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F939, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F939, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F939, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F939, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F939, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F93C, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F93C, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F93D, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F93D, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F93D, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F93D, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F93D, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F93D, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F93D, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F93D, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F93D, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F93D, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F93D, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F93D, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F93E, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F93E, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F93E, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F93E, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F93E, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F93E, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F93E, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F93E, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F93E, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F93E, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F93E, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F93E, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F9B8, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F9B8, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F9B8, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F9B8, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F9B8, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F9B8, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F9B8, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F9B8, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F9B8, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F9B8, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F9B8, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F9B8, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F9B9, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F9B9, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F9B9, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F9B9, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F9B9, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F9B9, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F9B9, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F9B9, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F9B9, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F9B9, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F9B9, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F9B9, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F9CD, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F9CD, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F9CD, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F9CD, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F9CD, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F9CD, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F9CD, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F9CD, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F9CD, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F9CD, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F9CD, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F9CD, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F9CE, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F9CE, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F9CE, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F9CE, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F9CE, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F9CE, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F9CE, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F9CE, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F9CE, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F9CE, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F9CE, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F9CE, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F9CF, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F9CF, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F9CF, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F9CF, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F9CF, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F9CF, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F9CF, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F9CF, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F9CF, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F9CF, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F9CF, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F9CF, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F9D6, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F9D6, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F9D6, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F9D6, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F9D6, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F9D6, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F9D6, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F9D6, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F9D6, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F9D6, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F9D6, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F9D6, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F9D7, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F9D7, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F9D7, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F9D7, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F9D7, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F9D7, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F9D7, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F9D7, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F9D7, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F9D7, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F9D7, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F9D7, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F9D8, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F9D8, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F9D8, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F9D8, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F9D8, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F9D8, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F9D8, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F9D8, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F9D8, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F9D8, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F9D8, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F9D8, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F9D9, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F9D9, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F9D9, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F9D9, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F9D9, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F9D9, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F9D9, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F9D9, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F9D9, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F9D9, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F9D9, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F9D9, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F9DA, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F9DA, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F9DA, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F9DA, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F9DA, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F9DA, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F9DA, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F9DA, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F9DA, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F9DA, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F9DA, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F9DA, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F9DB, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F9DB, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F9DB, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F9DB, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F9DB, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F9DB, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F9DB, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F9DB, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F9DB, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F9DB, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F9DB, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F9DB, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F9DC, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F9DC, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F9DC, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F9DC, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F9DC, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F9DC, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F9DC, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F9DC, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F9DC, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F9DC, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F9DC, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F9DC, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F9DD, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F9DD, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F9DD, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F9DD, 0x01F3FB, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F9DD, 0x01F3FC, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F9DD, 0x01F3FC, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F9DD, 0x01F3FD, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F9DD, 0x01F3FD, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F9DD, 0x01F3FE, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F9DD, 0x01F3FE, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F9DD, 0x01F3FF, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F9DD, 0x01F3FF, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F9DE, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F9DE, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F9DF, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0x01F9DF, 0x00200D, 0x002642, 0x00FE0F, 0,
+ 0x01F468, 0x00200D, 0x01F9B0, 0,
+ 0x01F468, 0x00200D, 0x01F9B1, 0,
+ 0x01F468, 0x00200D, 0x01F9B2, 0,
+ 0x01F468, 0x00200D, 0x01F9B3, 0,
+ 0x01F468, 0x01F3FB, 0x00200D, 0x01F9B0, 0,
+ 0x01F468, 0x01F3FB, 0x00200D, 0x01F9B1, 0,
+ 0x01F468, 0x01F3FB, 0x00200D, 0x01F9B2, 0,
+ 0x01F468, 0x01F3FB, 0x00200D, 0x01F9B3, 0,
+ 0x01F468, 0x01F3FC, 0x00200D, 0x01F9B0, 0,
+ 0x01F468, 0x01F3FC, 0x00200D, 0x01F9B1, 0,
+ 0x01F468, 0x01F3FC, 0x00200D, 0x01F9B2, 0,
+ 0x01F468, 0x01F3FC, 0x00200D, 0x01F9B3, 0,
+ 0x01F468, 0x01F3FD, 0x00200D, 0x01F9B0, 0,
+ 0x01F468, 0x01F3FD, 0x00200D, 0x01F9B1, 0,
+ 0x01F468, 0x01F3FD, 0x00200D, 0x01F9B2, 0,
+ 0x01F468, 0x01F3FD, 0x00200D, 0x01F9B3, 0,
+ 0x01F468, 0x01F3FE, 0x00200D, 0x01F9B0, 0,
+ 0x01F468, 0x01F3FE, 0x00200D, 0x01F9B1, 0,
+ 0x01F468, 0x01F3FE, 0x00200D, 0x01F9B2, 0,
+ 0x01F468, 0x01F3FE, 0x00200D, 0x01F9B3, 0,
+ 0x01F468, 0x01F3FF, 0x00200D, 0x01F9B0, 0,
+ 0x01F468, 0x01F3FF, 0x00200D, 0x01F9B1, 0,
+ 0x01F468, 0x01F3FF, 0x00200D, 0x01F9B2, 0,
+ 0x01F468, 0x01F3FF, 0x00200D, 0x01F9B3, 0,
+ 0x01F469, 0x00200D, 0x01F9B0, 0,
+ 0x01F469, 0x00200D, 0x01F9B1, 0,
+ 0x01F469, 0x00200D, 0x01F9B2, 0,
+ 0x01F469, 0x00200D, 0x01F9B3, 0,
+ 0x01F469, 0x01F3FB, 0x00200D, 0x01F9B0, 0,
+ 0x01F469, 0x01F3FB, 0x00200D, 0x01F9B1, 0,
+ 0x01F469, 0x01F3FB, 0x00200D, 0x01F9B2, 0,
+ 0x01F469, 0x01F3FB, 0x00200D, 0x01F9B3, 0,
+ 0x01F469, 0x01F3FC, 0x00200D, 0x01F9B0, 0,
+ 0x01F469, 0x01F3FC, 0x00200D, 0x01F9B1, 0,
+ 0x01F469, 0x01F3FC, 0x00200D, 0x01F9B2, 0,
+ 0x01F469, 0x01F3FC, 0x00200D, 0x01F9B3, 0,
+ 0x01F469, 0x01F3FD, 0x00200D, 0x01F9B0, 0,
+ 0x01F469, 0x01F3FD, 0x00200D, 0x01F9B1, 0,
+ 0x01F469, 0x01F3FD, 0x00200D, 0x01F9B2, 0,
+ 0x01F469, 0x01F3FD, 0x00200D, 0x01F9B3, 0,
+ 0x01F469, 0x01F3FE, 0x00200D, 0x01F9B0, 0,
+ 0x01F469, 0x01F3FE, 0x00200D, 0x01F9B1, 0,
+ 0x01F469, 0x01F3FE, 0x00200D, 0x01F9B2, 0,
+ 0x01F469, 0x01F3FE, 0x00200D, 0x01F9B3, 0,
+ 0x01F469, 0x01F3FF, 0x00200D, 0x01F9B0, 0,
+ 0x01F469, 0x01F3FF, 0x00200D, 0x01F9B1, 0,
+ 0x01F469, 0x01F3FF, 0x00200D, 0x01F9B2, 0,
+ 0x01F469, 0x01F3FF, 0x00200D, 0x01F9B3, 0,
+ 0x01F3F3, 0x00FE0F, 0x00200D, 0x01F308, 0,
+ 0x01F3F4, 0x00200D, 0x002620, 0x00FE0F, 0,
+ 0x01F415, 0x00200D, 0x01F9BA, 0,
+ 0x01F482, 0x01F3FB, 0x00200D, 0x002640, 0x00FE0F, 0,
+ 0 // null-terminating the list
+};
+
+
diff --git a/js/src/irregexp/RegExpCharRanges.h b/js/src/irregexp/RegExpCharRanges.h
new file mode 100644
index 0000000000..16a1c00b06
--- /dev/null
+++ b/js/src/irregexp/RegExpCharRanges.h
@@ -0,0 +1,235 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+
+// Copyright 2012 the V8 project authors. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef V8_JSREGEXPCHARRANGES_H_
+#define V8_JSREGEXPCHARRANGES_H_
+
+#include <string>
+
+#include "irregexp/RegExpCharacters.h"
+#include "irregexp/InfallibleVector.h"
+
+namespace js {
+
+namespace irregexp {
+
+// Characters parsed by RegExpParser can be either char16_t or kEndMarker.
+typedef uint32_t widechar;
+
+static const int kMaxOneByteCharCode = 0xff;
+static const int kMaxUtf16CodeUnit = 0xffff;
+static const size_t kEcma262UnCanonicalizeMaxWidth = 4;
+static const char16_t kNoCharClass = 0;
+
+extern const widechar kEmojiFlagSequences[];
+extern const widechar kEmojiTagSequences[];
+extern const widechar kEmojiZWJSequences[];
+
+static inline char16_t
+MaximumCharacter(bool ascii)
+{
+ return ascii ? kMaxOneByteCharCode : kMaxUtf16CodeUnit;
+}
+
+
+// Returns the number of characters in the equivalence class, omitting those
+// that cannot occur in the source string if it is a one byte string.
+int
+GetCaseIndependentLetters(char16_t character,
+ bool ascii_subject,
+ bool unicode,
+ const char16_t* choices,
+ size_t choices_length,
+ char16_t* letters);
+
+int
+GetCaseIndependentLetters(char16_t character,
+ bool ascii_subject,
+ bool unicode,
+ char16_t* letters);
+
+class CharacterRange;
+class WideCharRange;
+typedef InfallibleVector<CharacterRange, 1> CharacterRangeVector;
+typedef InfallibleVector<WideCharRange, 1> WideCharRangeVector;
+
+// Represents code units in the range from from_ to to_, both ends are
+// inclusive.
+class CharacterRange
+{
+ public:
+ // static methods for dealing with CharacterRangeVectors
+
+ static void AddClass(const int* elmv, int elmc, CharacterRangeVector* ranges);
+ static void AddClassNegated(const int* elmv, int elmc, CharacterRangeVector* ranges);
+ static void AddClassEscape(LifoAlloc* alloc, char16_t type, CharacterRangeVector* ranges);
+ static void AddClassEscapeUnicode(LifoAlloc* alloc, char16_t type,
+ CharacterRangeVector* ranges, bool ignoreCase);
+
+ // Adds a character or pre-defined character class to character ranges.
+ // If char_class is not kNoCharClass, it's interpreted as a class
+ // escape (i.e., 's' means whitespace, from '\s').
+ static void AddCharOrEscape(LifoAlloc* alloc, CharacterRangeVector* ranges,
+ char16_t char_class, widechar c);
+ static void AddCharOrEscapeUnicode(LifoAlloc* alloc,
+ CharacterRangeVector* ranges,
+ CharacterRangeVector* lead_ranges,
+ CharacterRangeVector* trail_ranges,
+ WideCharRangeVector* wide_ranges,
+ char16_t char_class,
+ widechar c,
+ bool ignore_case);
+ // Simplified version of AddUnicodeRange for single characters
+ static void AddCharUnicode(LifoAlloc* alloc,
+ CharacterRangeVector* ranges,
+ CharacterRangeVector* lead_ranges,
+ CharacterRangeVector* trail_ranges,
+ WideCharRangeVector* wide_ranges,
+ widechar c);
+ static void AddUnicodeRange(LifoAlloc* alloc,
+ CharacterRangeVector* ranges,
+ CharacterRangeVector* lead_ranges,
+ CharacterRangeVector* trail_ranges,
+ WideCharRangeVector* wide_ranges,
+ widechar first,
+ widechar next);
+
+ static bool RangesContainLatin1Equivalents(const CharacterRangeVector& ranges, bool unicode);
+ static bool CompareRanges(const CharacterRangeVector& ranges, const int* special_class, size_t length);
+ static bool CompareInverseRanges(const CharacterRangeVector& ranges, const int* special_class, size_t length);
+
+ // Negate a vector of ranges by subtracting its ranges from a range
+ // encompassing the full range of possible values.
+ template <typename RangeType>
+ static void NegateUnicodeRanges(LifoAlloc* alloc, InfallibleVector<RangeType, 1>** ranges,
+ RangeType full_range);
+
+ // static methods for Unicode Property Escapes
+ static bool AddPropertyClassRange(LifoAlloc* alloc,
+ const std::string& name, const std::string& value,
+ bool negate, bool ignore_case,
+ CharacterRangeVector* ranges,
+ CharacterRangeVector* lead_ranges,
+ CharacterRangeVector* trail_ranges,
+ WideCharRangeVector* wide_ranges);
+
+ // static methods for dealing with canonical CharacterRangeVectors
+
+ // Whether a range list is in canonical form: Ranges ordered by from value,
+ // and ranges non-overlapping and non-adjacent.
+ static bool IsCanonical(const CharacterRangeVector& ranges);
+
+ // Convert range list to canonical form. The characters covered by the ranges
+ // will still be the same, but no character is in more than one range, and
+ // adjacent ranges are merged. The resulting list may be shorter than the
+ // original, but cannot be longer.
+ static void Canonicalize(CharacterRangeVector& ranges);
+
+ static int InsertRangeInCanonicalList(CharacterRangeVector& list, int count, CharacterRange insert);
+
+ // Negate the contents of a character range in canonical form.
+ static void Negate(const LifoAlloc* alloc,
+ CharacterRangeVector src,
+ CharacterRangeVector* dst);
+ public:
+ CharacterRange()
+ : from_(0), to_(0)
+ {}
+
+ CharacterRange(char16_t from, char16_t to)
+ : from_(from), to_(to)
+ {}
+
+ static inline CharacterRange Singleton(char16_t value) {
+ return CharacterRange(value, value);
+ }
+ static inline CharacterRange Range(char16_t from, char16_t to) {
+ MOZ_ASSERT(from <= to);
+ return CharacterRange(from, to);
+ }
+ static inline CharacterRange Everything() {
+ return CharacterRange(0, kMaxUtf16CodeUnit);
+ }
+ static inline CharacterRange LeadSurrogate() {
+ return CharacterRange(unicode::LeadSurrogateMin, unicode::LeadSurrogateMax);
+ }
+ static inline CharacterRange TrailSurrogate() {
+ return CharacterRange(unicode::TrailSurrogateMin, unicode::TrailSurrogateMax);
+ }
+ bool Contains(char16_t i) { return from_ <= i && i <= to_; }
+ char16_t from() const { return from_; }
+ void set_from(char16_t value) { from_ = value; }
+ char16_t to() const { return to_; }
+ void set_to(char16_t value) { to_ = value; }
+ bool is_valid() { return from_ <= to_; }
+ bool IsEverything(char16_t max) { return from_ == 0 && to_ >= max; }
+ bool IsSingleton() { return (from_ == to_); }
+
+ void AddCaseEquivalents(bool is_ascii, bool unicode, CharacterRangeVector* ranges);
+ private:
+ char16_t from_;
+ char16_t to_;
+};
+
+
+class WideCharRange
+{
+ public:
+ WideCharRange()
+ : from_(0), to_(0)
+ {}
+
+ WideCharRange(widechar from, widechar to)
+ : from_(from), to_(to)
+ {}
+
+ static inline WideCharRange Singleton(widechar value) {
+ return WideCharRange(value, value);
+ }
+ static inline WideCharRange Range(widechar from, widechar to) {
+ MOZ_ASSERT(from <= to);
+ return WideCharRange(from, to);
+ }
+ static inline WideCharRange NonBMP() {
+ return WideCharRange(unicode::NonBMPMin, unicode::NonBMPMax);
+ }
+
+ bool Contains(widechar i) const { return from_ <= i && i <= to_; }
+ widechar from() const { return from_; }
+ widechar to() const { return to_; }
+
+ private:
+ widechar from_;
+ widechar to_;
+};
+
+
+} } // namespace js::irregexp
+
+#endif // V8_JSREGEXPCHARRANGES_H_
diff --git a/js/src/irregexp/RegExpEngine.cpp b/js/src/irregexp/RegExpEngine.cpp
index 07679e21b8..f3db7c1847 100644
--- a/js/src/irregexp/RegExpEngine.cpp
+++ b/js/src/irregexp/RegExpEngine.cpp
@@ -30,18 +30,14 @@
#include "irregexp/RegExpEngine.h"
#include "irregexp/NativeRegExpMacroAssembler.h"
-#include "irregexp/RegExpCharacters.h"
+#include "irregexp/RegExpCharacters.h"
#include "irregexp/RegExpMacroAssembler.h"
#include "jit/ExecutableAllocator.h"
#include "jit/JitCommon.h"
-// Generated table
-#include "irregexp/RegExpCharacters-inl.h"
-
using namespace js;
using namespace js::irregexp;
-using mozilla::ArrayLength;
using mozilla::DebugOnly;
using mozilla::Maybe;
@@ -64,317 +60,6 @@ RegExpNode::RegExpNode(LifoAlloc* alloc)
bm_info_[0] = bm_info_[1] = nullptr;
}
-static const int kMaxOneByteCharCode = 0xff;
-static const int kMaxUtf16CodeUnit = 0xffff;
-
-static char16_t
-MaximumCharacter(bool ascii)
-{
- return ascii ? kMaxOneByteCharCode : kMaxUtf16CodeUnit;
-}
-
-static void
-AddClass(const int* elmv, int elmc,
- CharacterRangeVector* ranges)
-{
- elmc--;
- MOZ_ASSERT(elmv[elmc] == 0x10000);
- for (int i = 0; i < elmc; i += 2) {
- MOZ_ASSERT(elmv[i] < elmv[i + 1]);
- ranges->append(CharacterRange(elmv[i], elmv[i + 1] - 1));
- }
-}
-
-static void
-AddClassNegated(const int* elmv,
- int elmc,
- CharacterRangeVector* ranges)
-{
- elmc--;
- MOZ_ASSERT(elmv[elmc] == 0x10000);
- MOZ_ASSERT(elmv[0] != 0x0000);
- MOZ_ASSERT(elmv[elmc-1] != kMaxUtf16CodeUnit);
- char16_t last = 0x0000;
- for (int i = 0; i < elmc; i += 2) {
- MOZ_ASSERT(last <= elmv[i] - 1);
- MOZ_ASSERT(elmv[i] < elmv[i + 1]);
- ranges->append(CharacterRange(last, elmv[i] - 1));
- last = elmv[i + 1];
- }
- ranges->append(CharacterRange(last, kMaxUtf16CodeUnit));
-}
-
-void
-CharacterRange::AddClassEscape(LifoAlloc* alloc, char16_t type,
- CharacterRangeVector* ranges)
-{
- switch (type) {
- case 's':
- AddClass(kSpaceRanges, kSpaceRangeCount, ranges);
- break;
- case 'S':
- AddClassNegated(kSpaceRanges, kSpaceRangeCount, ranges);
- break;
- case 'w':
- AddClass(kWordRanges, kWordRangeCount, ranges);
- break;
- case 'W':
- AddClassNegated(kWordRanges, kWordRangeCount, ranges);
- break;
- case 'd':
- AddClass(kDigitRanges, kDigitRangeCount, ranges);
- break;
- case 'D':
- AddClassNegated(kDigitRanges, kDigitRangeCount, ranges);
- break;
- case '.':
- AddClassNegated(kLineTerminatorRanges, kLineTerminatorRangeCount, ranges);
- break;
- // This is not a character range as defined by the spec but a
- // convenient shorthand for a character class that matches any
- // character.
- case '*':
- ranges->append(CharacterRange::Everything());
- break;
- // This is the set of characters matched by the $ and ^ symbols
- // in multiline mode.
- case 'n':
- AddClass(kLineTerminatorRanges, kLineTerminatorRangeCount, ranges);
- break;
- default:
- MOZ_CRASH("Bad character class escape");
- }
-}
-
-// Add class escape, excluding surrogate pair range.
-void
-CharacterRange::AddClassEscapeUnicode(LifoAlloc* alloc, char16_t type,
- CharacterRangeVector* ranges, bool ignore_case)
-{
- switch (type) {
- case 's':
- case 'd':
- return AddClassEscape(alloc, type, ranges);
- break;
- case 'S':
- AddClassNegated(kSpaceAndSurrogateRanges, kSpaceAndSurrogateRangeCount, ranges);
- break;
- case 'w':
- if (ignore_case)
- AddClass(kIgnoreCaseWordRanges, kIgnoreCaseWordRangeCount, ranges);
- else
- AddClassEscape(alloc, type, ranges);
- break;
- case 'W':
- if (ignore_case) {
- AddClass(kNegatedIgnoreCaseWordAndSurrogateRanges,
- kNegatedIgnoreCaseWordAndSurrogateRangeCount, ranges);
- } else {
- AddClassNegated(kWordAndSurrogateRanges, kWordAndSurrogateRangeCount, ranges);
- }
- break;
- case 'D':
- AddClassNegated(kDigitAndSurrogateRanges, kDigitAndSurrogateRangeCount, ranges);
- break;
- default:
- MOZ_CRASH("Bad type!");
- }
-}
-
-static bool
-RangesContainLatin1Equivalents(const CharacterRangeVector& ranges, bool unicode)
-{
- for (size_t i = 0; i < ranges.length(); i++) {
- // TODO(dcarney): this could be a lot more efficient.
- if (RangeContainsLatin1Equivalents(ranges[i], unicode))
- return true;
- }
- return false;
-}
-
-static const size_t kEcma262UnCanonicalizeMaxWidth = 4;
-
-// Returns the number of characters in the equivalence class, omitting those
-// that cannot occur in the source string if it is a one byte string.
-static int
-GetCaseIndependentLetters(char16_t character,
- bool ascii_subject,
- bool unicode,
- const char16_t* choices,
- size_t choices_length,
- char16_t* letters)
-{
- size_t count = 0;
- for (size_t i = 0; i < choices_length; i++) {
- char16_t c = choices[i];
-
- // Skip characters that can't appear in one byte strings.
- if (!unicode && ascii_subject && c > kMaxOneByteCharCode)
- continue;
-
- // Watch for duplicates.
- bool found = false;
- for (size_t j = 0; j < count; j++) {
- if (letters[j] == c) {
- found = true;
- break;
- }
- }
- if (found)
- continue;
-
- letters[count++] = c;
- }
-
- return count;
-}
-
-static int
-GetCaseIndependentLetters(char16_t character,
- bool ascii_subject,
- bool unicode,
- char16_t* letters)
-{
- if (unicode) {
- const char16_t choices[] = {
- character,
- unicode::FoldCase(character),
- unicode::ReverseFoldCase1(character),
- unicode::ReverseFoldCase2(character),
- unicode::ReverseFoldCase3(character),
- };
- return GetCaseIndependentLetters(character, ascii_subject, unicode,
- choices, ArrayLength(choices), letters);
- }
-
- char16_t upper = unicode::ToUpperCase(character);
- unicode::CodepointsWithSameUpperCase others(character);
- char16_t other1 = others.other1();
- char16_t other2 = others.other2();
- char16_t other3 = others.other3();
-
- // ES 2017 draft 996af87b7072b3c3dd2b1def856c66f456102215 21.2.4.2
- // step 3.g.
- // The standard requires that non-ASCII characters cannot have ASCII
- // character codes in their equivalence class, even though this
- // situation occurs multiple times in the Unicode tables.
- static const unsigned kMaxAsciiCharCode = 127;
- if (upper <= kMaxAsciiCharCode) {
- if (character > kMaxAsciiCharCode) {
- // If Canonicalize(character) == character, all other characters
- // should be ignored.
- return GetCaseIndependentLetters(character, ascii_subject, unicode,
- &character, 1, letters);
- }
-
- if (other1 > kMaxAsciiCharCode)
- other1 = character;
- if (other2 > kMaxAsciiCharCode)
- other2 = character;
- if (other3 > kMaxAsciiCharCode)
- other3 = character;
- }
-
- const char16_t choices[] = {
- character,
- upper,
- other1,
- other2,
- other3
- };
- return GetCaseIndependentLetters(character, ascii_subject, unicode,
- choices, ArrayLength(choices), letters);
-}
-
-void
-CharacterRange::AddCaseEquivalents(bool is_ascii, bool unicode, CharacterRangeVector* ranges)
-{
- char16_t bottom = from();
- char16_t top = to();
-
- if (is_ascii && !RangeContainsLatin1Equivalents(*this, unicode)) {
- if (bottom > kMaxOneByteCharCode)
- return;
- if (top > kMaxOneByteCharCode)
- top = kMaxOneByteCharCode;
- }
-
- for (char16_t c = bottom;; c++) {
- char16_t chars[kEcma262UnCanonicalizeMaxWidth];
- size_t length = GetCaseIndependentLetters(c, is_ascii, unicode, chars);
-
- for (size_t i = 0; i < length; i++) {
- char16_t other = chars[i];
- if (other == c)
- continue;
-
- // Try to combine with an existing range.
- bool found = false;
- for (size_t i = 0; i < ranges->length(); i++) {
- CharacterRange& range = (*ranges)[i];
- if (range.Contains(other)) {
- found = true;
- break;
- } else if (other == range.from() - 1) {
- range.set_from(other);
- found = true;
- break;
- } else if (other == range.to() + 1) {
- range.set_to(other);
- found = true;
- break;
- }
- }
-
- if (!found)
- ranges->append(CharacterRange::Singleton(other));
- }
-
- if (c == top)
- break;
- }
-}
-
-static bool
-CompareInverseRanges(const CharacterRangeVector& ranges, const int* special_class, size_t length)
-{
- length--; // Remove final 0x10000.
- MOZ_ASSERT(special_class[length] == 0x10000);
- MOZ_ASSERT(ranges.length() != 0);
- MOZ_ASSERT(length != 0);
- MOZ_ASSERT(special_class[0] != 0);
- if (ranges.length() != (length >> 1) + 1)
- return false;
- CharacterRange range = ranges[0];
- if (range.from() != 0)
- return false;
- for (size_t i = 0; i < length; i += 2) {
- if (special_class[i] != (range.to() + 1))
- return false;
- range = ranges[(i >> 1) + 1];
- if (special_class[i+1] != range.from())
- return false;
- }
- if (range.to() != 0xffff)
- return false;
- return true;
-}
-
-static bool
-CompareRanges(const CharacterRangeVector& ranges, const int* special_class, size_t length)
-{
- length--; // Remove final 0x10000.
- MOZ_ASSERT(special_class[length] == 0x10000);
- if (ranges.length() * 2 != length)
- return false;
- for (size_t i = 0; i < length; i += 2) {
- CharacterRange range = ranges[i >> 1];
- if (range.from() != special_class[i] || range.to() != special_class[i + 1] - 1)
- return false;
- }
- return true;
-}
-
bool
RegExpCharacterClass::is_standard(LifoAlloc* alloc)
{
@@ -384,168 +69,37 @@ RegExpCharacterClass::is_standard(LifoAlloc* alloc)
return false;
if (set_.is_standard())
return true;
- if (CompareRanges(set_.ranges(alloc), kSpaceRanges, kSpaceRangeCount)) {
+ if (CharacterRange::CompareRanges(set_.ranges(alloc), kSpaceRanges, kSpaceRangeCount)) {
set_.set_standard_set_type('s');
return true;
}
- if (CompareInverseRanges(set_.ranges(alloc), kSpaceRanges, kSpaceRangeCount)) {
+ if (CharacterRange::CompareInverseRanges(set_.ranges(alloc), kSpaceRanges, kSpaceRangeCount)) {
set_.set_standard_set_type('S');
return true;
}
- if (CompareInverseRanges(set_.ranges(alloc),
+ if (CharacterRange::CompareInverseRanges(set_.ranges(alloc),
kLineTerminatorRanges,
kLineTerminatorRangeCount)) {
set_.set_standard_set_type('.');
return true;
}
- if (CompareRanges(set_.ranges(alloc),
+ if (CharacterRange::CompareRanges(set_.ranges(alloc),
kLineTerminatorRanges,
kLineTerminatorRangeCount)) {
set_.set_standard_set_type('n');
return true;
}
- if (CompareRanges(set_.ranges(alloc), kWordRanges, kWordRangeCount)) {
+ if (CharacterRange::CompareRanges(set_.ranges(alloc), kWordRanges, kWordRangeCount)) {
set_.set_standard_set_type('w');
return true;
}
- if (CompareInverseRanges(set_.ranges(alloc), kWordRanges, kWordRangeCount)) {
+ if (CharacterRange::CompareInverseRanges(set_.ranges(alloc), kWordRanges, kWordRangeCount)) {
set_.set_standard_set_type('W');
return true;
}
return false;
}
-bool
-CharacterRange::IsCanonical(const CharacterRangeVector& ranges)
-{
- int n = ranges.length();
- if (n <= 1)
- return true;
-
- int max = ranges[0].to();
- for (int i = 1; i < n; i++) {
- CharacterRange next_range = ranges[i];
- if (next_range.from() <= max + 1)
- return false;
- max = next_range.to();
- }
- return true;
-}
-
-// Move a number of elements in a zonelist to another position
-// in the same list. Handles overlapping source and target areas.
-static
-void MoveRanges(CharacterRangeVector& list, int from, int to, int count)
-{
- // Ranges are potentially overlapping.
- if (from < to) {
- for (int i = count - 1; i >= 0; i--)
- list[to + i] = list[from + i];
- } else {
- for (int i = 0; i < count; i++)
- list[to + i] = list[from + i];
- }
-}
-
-static int
-InsertRangeInCanonicalList(CharacterRangeVector& list,
- int count,
- CharacterRange insert)
-{
- // Inserts a range into list[0..count[, which must be sorted
- // by from value and non-overlapping and non-adjacent, using at most
- // list[0..count] for the result. Returns the number of resulting
- // canonicalized ranges. Inserting a range may collapse existing ranges into
- // fewer ranges, so the return value can be anything in the range 1..count+1.
- char16_t from = insert.from();
- char16_t to = insert.to();
- int start_pos = 0;
- int end_pos = count;
- for (int i = count - 1; i >= 0; i--) {
- CharacterRange current = list[i];
- if (current.from() > to + 1) {
- end_pos = i;
- } else if (current.to() + 1 < from) {
- start_pos = i + 1;
- break;
- }
- }
-
- // Inserted range overlaps, or is adjacent to, ranges at positions
- // [start_pos..end_pos[. Ranges before start_pos or at or after end_pos are
- // not affected by the insertion.
- // If start_pos == end_pos, the range must be inserted before start_pos.
- // if start_pos < end_pos, the entire range from start_pos to end_pos
- // must be merged with the insert range.
-
- if (start_pos == end_pos) {
- // Insert between existing ranges at position start_pos.
- if (start_pos < count) {
- MoveRanges(list, start_pos, start_pos + 1, count - start_pos);
- }
- list[start_pos] = insert;
- return count + 1;
- }
- if (start_pos + 1 == end_pos) {
- // Replace single existing range at position start_pos.
- CharacterRange to_replace = list[start_pos];
- int new_from = Min(to_replace.from(), from);
- int new_to = Max(to_replace.to(), to);
- list[start_pos] = CharacterRange(new_from, new_to);
- return count;
- }
- // Replace a number of existing ranges from start_pos to end_pos - 1.
- // Move the remaining ranges down.
-
- int new_from = Min(list[start_pos].from(), from);
- int new_to = Max(list[end_pos - 1].to(), to);
- if (end_pos < count) {
- MoveRanges(list, end_pos, start_pos + 1, count - end_pos);
- }
- list[start_pos] = CharacterRange(new_from, new_to);
- return count - (end_pos - start_pos) + 1;
-}
-
-void
-CharacterRange::Canonicalize(CharacterRangeVector& character_ranges)
-{
- if (character_ranges.length() <= 1) return;
- // Check whether ranges are already canonical (increasing, non-overlapping,
- // non-adjacent).
- int n = character_ranges.length();
- int max = character_ranges[0].to();
- int i = 1;
- while (i < n) {
- CharacterRange current = character_ranges[i];
- if (current.from() <= max + 1) {
- break;
- }
- max = current.to();
- i++;
- }
- // Canonical until the i'th range. If that's all of them, we are done.
- if (i == n) return;
-
- // The ranges at index i and forward are not canonicalized. Make them so by
- // doing the equivalent of insertion sort (inserting each into the previous
- // list, in order).
- // Notice that inserting a range can reduce the number of ranges in the
- // result due to combining of adjacent and overlapping ranges.
- int read = i; // Range to insert.
- size_t num_canonical = i; // Length of canonicalized part of list.
- do {
- num_canonical = InsertRangeInCanonicalList(character_ranges,
- num_canonical,
- character_ranges[read]);
- read++;
- } while (read < n);
-
- while (character_ranges.length() > num_canonical)
- character_ranges.popBack();
-
- MOZ_ASSERT(CharacterRange::IsCanonical(character_ranges));
-}
-
// -------------------------------------------------------------------
// SeqRegExpNode
@@ -720,6 +274,8 @@ ActionNode::EmptyMatchCheck(int start_register,
int
TextNode::EatsAtLeast(int still_to_find, int budget, bool not_at_start)
{
+ if (read_backward())
+ return 0;
int answer = Length();
if (answer >= still_to_find)
return answer;
@@ -735,8 +291,7 @@ TextNode::EatsAtLeast(int still_to_find, int budget, bool not_at_start)
int
TextNode::GreedyLoopTextLength()
{
- TextElement elm = elements()[elements().length() - 1];
- return elm.cp_offset() + elm.length();
+ return Length();
}
RegExpNode*
@@ -789,7 +344,7 @@ TextNode::FilterASCII(int depth, bool ignore_case, bool unicode)
ranges[0].to() >= kMaxOneByteCharCode)
{
// This will be handled in a later filter.
- if (ignore_case && RangesContainLatin1Equivalents(ranges, unicode))
+ if (ignore_case && CharacterRange::RangesContainLatin1Equivalents(ranges, unicode))
continue;
return set_replacement(nullptr);
}
@@ -798,7 +353,7 @@ TextNode::FilterASCII(int depth, bool ignore_case, bool unicode)
ranges[0].from() > kMaxOneByteCharCode)
{
// This will be handled in a later filter.
- if (ignore_case && RangesContainLatin1Equivalents(ranges, unicode))
+ if (ignore_case && CharacterRange::RangesContainLatin1Equivalents(ranges, unicode))
continue;
return set_replacement(nullptr);
}
@@ -886,6 +441,8 @@ AssertionNode::FillInBMInfo(int offset, int budget, BoyerMooreLookahead* bm, boo
int
BackReferenceNode::EatsAtLeast(int still_to_find, int budget, bool not_at_start)
{
+ if (read_backward())
+ return 0;
if (budget <= 0)
return 0;
return on_success()->EatsAtLeast(still_to_find, budget - 1, not_at_start);
@@ -1577,6 +1134,9 @@ class irregexp::RegExpCompiler
current_expansion_factor_ = value;
}
+ bool read_backward() { return read_backward_; }
+ void set_read_backward(bool value) { read_backward_ = value; }
+
JSContext* cx() const { return cx_; }
LifoAlloc* alloc() const { return alloc_; }
@@ -1594,6 +1154,7 @@ class irregexp::RegExpCompiler
bool unicode_;
bool reg_exp_too_big_;
int current_expansion_factor_;
+ bool read_backward_;
FrequencyCollator frequency_collator_;
JSContext* cx_;
LifoAlloc* alloc_;
@@ -1623,6 +1184,7 @@ RegExpCompiler::RegExpCompiler(JSContext* cx, LifoAlloc* alloc, int capture_coun
unicode_(unicode),
reg_exp_too_big_(false),
current_expansion_factor_(1),
+ read_backward_(false),
frequency_collator_(),
cx_(cx),
alloc_(alloc)
@@ -1746,7 +1308,7 @@ irregexp::CompilePattern(JSContext* cx, RegExpShared* shared, RegExpCompileData*
// at the start of input.
ChoiceNode* first_step_node = alloc.newInfallible<ChoiceNode>(&alloc, 2);
RegExpNode* char_class =
- alloc.newInfallible<TextNode>(alloc.newInfallible<RegExpCharacterClass>('*'), loop_node);
+ alloc.newInfallible<TextNode>(alloc.newInfallible<RegExpCharacterClass>('*'), false, loop_node);
first_step_node->AddAlternative(GuardedAlternative(captured_body));
first_step_node->AddAlternative(GuardedAlternative(char_class));
node = first_step_node;
@@ -1849,19 +1411,19 @@ RegExpAtom::ToNode(RegExpCompiler* compiler, RegExpNode* on_success)
TextElementVector* elms =
compiler->alloc()->newInfallible<TextElementVector>(*compiler->alloc());
elms->append(TextElement::Atom(this));
- return compiler->alloc()->newInfallible<TextNode>(elms, on_success);
+ return compiler->alloc()->newInfallible<TextNode>(elms, compiler->read_backward(), on_success);
}
RegExpNode*
RegExpText::ToNode(RegExpCompiler* compiler, RegExpNode* on_success)
{
- return compiler->alloc()->newInfallible<TextNode>(&elements_, on_success);
+ return compiler->alloc()->newInfallible<TextNode>(&elements_, compiler->read_backward(), on_success);
}
RegExpNode*
RegExpCharacterClass::ToNode(RegExpCompiler* compiler, RegExpNode* on_success)
{
- return compiler->alloc()->newInfallible<TextNode>(this, on_success);
+ return compiler->alloc()->newInfallible<TextNode>(this, compiler->read_backward(), on_success);
}
RegExpNode*
@@ -2002,7 +1564,9 @@ RegExpQuantifier::ToNode(int min,
alternation->AddAlternative(GuardedAlternative(body->ToNode(compiler, answer)));
}
answer = alternation;
- if (not_at_start) alternation->set_not_at_start();
+ if (not_at_start && !compiler->read_backward()) {
+ alternation->set_not_at_start();
+ }
}
return answer;
}
@@ -2014,8 +1578,9 @@ RegExpQuantifier::ToNode(int min,
int reg_ctr = needs_counter
? compiler->AllocateRegister()
: RegExpCompiler::kNoRegister;
- LoopChoiceNode* center = alloc->newInfallible<LoopChoiceNode>(alloc, body->min_match() == 0);
- if (not_at_start)
+ LoopChoiceNode* center = alloc->newInfallible<LoopChoiceNode>(alloc, body->min_match() == 0,
+ compiler->read_backward());
+ if (not_at_start && !compiler->read_backward())
center->set_not_at_start();
RegExpNode* loop_return = needs_counter
? static_cast<RegExpNode*>(ActionNode::IncrementRegister(reg_ctr, center))
@@ -2091,7 +1656,7 @@ RegExpAssertion::ToNode(RegExpCompiler* compiler,
CharacterRange::AddClassEscape(alloc, 'n', newline_ranges);
RegExpCharacterClass* newline_atom = alloc->newInfallible<RegExpCharacterClass>('n');
TextNode* newline_matcher =
- alloc->newInfallible<TextNode>(newline_atom,
+ alloc->newInfallible<TextNode>(newline_atom, false,
ActionNode::PositiveSubmatchSuccess(stack_pointer_register,
position_register,
0, // No captures inside.
@@ -2123,6 +1688,7 @@ RegExpBackReference::ToNode(RegExpCompiler* compiler, RegExpNode* on_success)
{
return compiler->alloc()->newInfallible<BackReferenceNode>(RegExpCapture::StartRegister(index()),
RegExpCapture::EndRegister(index()),
+ compiler->read_backward(),
on_success);
}
@@ -2133,7 +1699,7 @@ RegExpEmpty::ToNode(RegExpCompiler* compiler, RegExpNode* on_success)
}
RegExpNode*
-RegExpLookahead::ToNode(RegExpCompiler* compiler, RegExpNode* on_success)
+RegExpLookaround::ToNode(RegExpCompiler* compiler, RegExpNode* on_success)
{
int stack_pointer_register = compiler->AllocateRegister();
int position_register = compiler->AllocateRegister();
@@ -2144,6 +1710,10 @@ RegExpLookahead::ToNode(RegExpCompiler* compiler, RegExpNode* on_success)
int register_start =
register_of_first_capture + capture_from_ * registers_per_capture;
+ RegExpNode* result;
+ bool was_reading_backward = compiler->read_backward();
+ compiler->set_read_backward(type() == LOOKBEHIND);
+
if (is_positive()) {
RegExpNode* bodyNode =
body()->ToNode(compiler,
@@ -2152,37 +1722,39 @@ RegExpLookahead::ToNode(RegExpCompiler* compiler, RegExpNode* on_success)
register_count,
register_start,
on_success));
- return ActionNode::BeginSubmatch(stack_pointer_register,
+ result = ActionNode::BeginSubmatch(stack_pointer_register,
+ position_register,
+ bodyNode);
+ } else {
+ // We use a ChoiceNode for a negative lookahead because it has most of
+ // the characteristics we need. It has the body of the lookahead as its
+ // first alternative and the expression after the lookahead of the second
+ // alternative. If the first alternative succeeds then the
+ // NegativeSubmatchSuccess will unwind the stack including everything the
+ // choice node set up and backtrack. If the first alternative fails then
+ // the second alternative is tried, which is exactly the desired result
+ // for a negative lookahead. The NegativeLookaheadChoiceNode is a special
+ // ChoiceNode that knows to ignore the first exit when calculating quick
+ // checks.
+ LifoAlloc* alloc = compiler->alloc();
+
+ RegExpNode* success =
+ alloc->newInfallible<NegativeSubmatchSuccess>(alloc,
+ stack_pointer_register,
+ position_register,
+ register_count,
+ register_start);
+ GuardedAlternative body_alt(body()->ToNode(compiler, success));
+
+ ChoiceNode* choice_node =
+ alloc->newInfallible<NegativeLookaheadChoiceNode>(alloc, body_alt, GuardedAlternative(on_success));
+
+ result = ActionNode::BeginSubmatch(stack_pointer_register,
position_register,
- bodyNode);
- }
-
- // We use a ChoiceNode for a negative lookahead because it has most of
- // the characteristics we need. It has the body of the lookahead as its
- // first alternative and the expression after the lookahead of the second
- // alternative. If the first alternative succeeds then the
- // NegativeSubmatchSuccess will unwind the stack including everything the
- // choice node set up and backtrack. If the first alternative fails then
- // the second alternative is tried, which is exactly the desired result
- // for a negative lookahead. The NegativeLookaheadChoiceNode is a special
- // ChoiceNode that knows to ignore the first exit when calculating quick
- // checks.
- LifoAlloc* alloc = compiler->alloc();
-
- RegExpNode* success =
- alloc->newInfallible<NegativeSubmatchSuccess>(alloc,
- stack_pointer_register,
- position_register,
- register_count,
- register_start);
- GuardedAlternative body_alt(body()->ToNode(compiler, success));
-
- ChoiceNode* choice_node =
- alloc->newInfallible<NegativeLookaheadChoiceNode>(alloc, body_alt, GuardedAlternative(on_success));
-
- return ActionNode::BeginSubmatch(stack_pointer_register,
- position_register,
- choice_node);
+ choice_node);
+ }
+ compiler->set_read_backward(was_reading_backward);
+ return result;
}
RegExpNode*
@@ -2197,8 +1769,12 @@ RegExpCapture::ToNode(RegExpTree* body,
RegExpCompiler* compiler,
RegExpNode* on_success)
{
+ MOZ_ASSERT(body);
int start_reg = RegExpCapture::StartRegister(index);
int end_reg = RegExpCapture::EndRegister(index);
+ if (compiler->read_backward()) {
+ std::swap(start_reg, end_reg);
+ }
RegExpNode* store_end = ActionNode::StorePosition(end_reg, true, on_success);
RegExpNode* body_node = body->ToNode(compiler, store_end);
return ActionNode::StorePosition(start_reg, true, body_node);
@@ -2209,8 +1785,15 @@ RegExpAlternative::ToNode(RegExpCompiler* compiler, RegExpNode* on_success)
{
const RegExpTreeVector& children = nodes();
RegExpNode* current = on_success;
- for (int i = children.length() - 1; i >= 0; i--)
- current = children[i]->ToNode(compiler, current);
+ if (compiler->read_backward()) {
+ for (int i = 0; i < children.length(); i++) {
+ current = children[i]->ToNode(compiler, current);
+ }
+ } else {
+ for (int i = children.length() - 1; i >= 0; i--) {
+ current = children[i]->ToNode(compiler, current);
+ }
+ }
return current;
}
@@ -2574,6 +2157,7 @@ Trace::PerformDeferredActions(LifoAlloc* alloc,
{
// The "+1" is to avoid a push_limit of zero if stack_limit_slack() is 1.
const int push_limit = (assembler->stack_limit_slack() + 1) / 2;
+ static const int kNoStore = INT32_MIN;
// Count pushes performed to force a stack limit check occasionally.
int pushes = 0;
@@ -2590,7 +2174,7 @@ Trace::PerformDeferredActions(LifoAlloc* alloc,
int value = 0;
bool absolute = false;
bool clear = false;
- int store_position = -1;
+ int store_position = kNoStore;
// This is a little tricky because we are scanning the actions in reverse
// historical order (newest first).
for (DeferredAction* action = actions_;
@@ -2611,7 +2195,7 @@ Trace::PerformDeferredActions(LifoAlloc* alloc,
// we can set undo_action to IGNORE if we know there is no value to
// restore.
undo_action = DEFER_RESTORE;
- MOZ_ASSERT(store_position == -1);
+ MOZ_ASSERT(store_position == kNoStore);
MOZ_ASSERT(!clear);
break;
}
@@ -2619,14 +2203,14 @@ Trace::PerformDeferredActions(LifoAlloc* alloc,
if (!absolute) {
value++;
}
- MOZ_ASSERT(store_position == -1);
+ MOZ_ASSERT(store_position == kNoStore);
MOZ_ASSERT(!clear);
undo_action = DEFER_RESTORE;
break;
case ActionNode::STORE_POSITION: {
Trace::DeferredCapture* pc =
static_cast<Trace::DeferredCapture*>(action);
- if (!clear && store_position == -1) {
+ if (!clear && store_position == kNoStore) {
store_position = pc->cp_offset();
}
@@ -2650,7 +2234,7 @@ Trace::PerformDeferredActions(LifoAlloc* alloc,
// Since we're scanning in reverse order, if we've already
// set the position we have to ignore historically earlier
// clearing operations.
- if (store_position == -1) {
+ if (store_position == kNoStore) {
clear = true;
}
undo_action = DEFER_RESTORE;
@@ -2680,7 +2264,7 @@ Trace::PerformDeferredActions(LifoAlloc* alloc,
}
// Perform the chronologically last action (or accumulated increment)
// for the register.
- if (store_position != -1) {
+ if (store_position != kNoStore) {
assembler->WriteCurrentPositionToRegister(reg, store_position);
} else if (clear) {
assembler->ClearRegisters(reg, reg);
@@ -2763,7 +2347,6 @@ Trace::InvalidateCurrentCharacter()
void
Trace::AdvanceCurrentPositionInTrace(int by, RegExpCompiler* compiler)
{
- MOZ_ASSERT(by > 0);
// We don't have an instruction for shifting the current character register
// down or for using a shifted value for anything so lets just forget that
// we preloaded any characters into it.
@@ -2881,16 +2464,23 @@ EmitHat(RegExpCompiler* compiler, RegExpNode* on_success, Trace* trace)
Trace new_trace(*trace);
new_trace.InvalidateCurrentCharacter();
+ // A positive (> 0) cp_offset means we've already successfully matched a
+ // non-empty-width part of the pattern, and thus cannot be at or before the
+ // start of the subject string. We can thus skip both at-start and
+ // bounds-checks when loading the one-character lookbehind.
+ const bool may_be_at_or_before_subject_string_start = new_trace.cp_offset() <= 0;
+
jit::Label ok;
- if (new_trace.cp_offset() == 0) {
- // The start of input counts as a newline in this context, so skip to
- // ok if we are at the start.
- assembler->CheckAtStart(&ok);
+ if (may_be_at_or_before_subject_string_start) {
+ // The start of input counts as a newline in this context, so skip to ok if
+ // we are at the start.
+ assembler->CheckAtStart(new_trace.cp_offset(), &ok);
}
- // We already checked that we are not at the start of input so it must be
- // OK to load the previous character.
- assembler->LoadCurrentCharacter(new_trace.cp_offset() -1, new_trace.backtrack(), false);
+ // If we've already checked that we are not at the start of input, it's okay
+ // to load the previous character without bounds checks.
+ const bool can_skip_bounds_check = !may_be_at_or_before_subject_string_start;
+ assembler->LoadCurrentCharacter(new_trace.cp_offset() -1, new_trace.backtrack(), can_skip_bounds_check);
if (!assembler->CheckSpecialCharacterClass('n', new_trace.backtrack())) {
// Newline means \n, \r, 0x2028 or 0x2029.
@@ -2915,11 +2505,10 @@ EmitNotAfterLeadSurrogate(RegExpCompiler* compiler, RegExpNode* on_success, Trac
new_trace.InvalidateCurrentCharacter();
jit::Label ok;
- if (new_trace.cp_offset() == 0)
- assembler->CheckAtStart(&ok);
+ if (new_trace.cp_offset() <= 0) {
+ assembler->CheckAtStart(new_trace.cp_offset(), &ok);
+ }
- // We already checked that we are not at the start of input so it must be
- // OK to load the previous character.
assembler->LoadCurrentCharacter(new_trace.cp_offset() - 1, new_trace.backtrack(), false);
assembler->CheckCharacterInRange(unicode::LeadSurrogateMin, unicode::LeadSurrogateMax,
new_trace.backtrack());
@@ -2943,8 +2532,9 @@ EmitNotInSurrogatePair(RegExpCompiler* compiler, RegExpNode* on_success, Trace*
Trace new_trace(*trace);
new_trace.InvalidateCurrentCharacter();
- if (new_trace.cp_offset() == 0)
- assembler->CheckAtStart(&ok);
+ if (new_trace.cp_offset() <= 0) {
+ assembler->CheckAtStart(new_trace.cp_offset(), &ok);
+ }
// First check if next character is a trail surrogate.
assembler->LoadCurrentCharacter(new_trace.cp_offset(), new_trace.backtrack(), false);
@@ -3062,10 +2652,10 @@ AssertionNode::BacktrackIfPrevious(RegExpCompiler* compiler,
jit::Label* non_word = backtrack_if_previous == kIsNonWord ? new_trace.backtrack() : &fall_through;
jit::Label* word = backtrack_if_previous == kIsNonWord ? &fall_through : new_trace.backtrack();
- if (new_trace.cp_offset() == 0) {
+ if (new_trace.cp_offset() <= 0) {
// The start of input counts as a non-word character, so the question is
// decided if we are at the start.
- assembler->CheckAtStart(non_word);
+ assembler->CheckAtStart(new_trace.cp_offset(), non_word);
}
// We already checked that we are not at the start of input so it must be
// OK to load the previous character.
@@ -3108,9 +2698,9 @@ AssertionNode::Emit(RegExpCompiler* compiler, Trace* trace)
return;
}
if (trace->at_start() == Trace::UNKNOWN) {
- assembler->CheckNotAtStart(trace->backtrack());
+ assembler->CheckNotAtStart(trace->cp_offset(), trace->backtrack());
Trace at_start_trace = *trace;
- at_start_trace.set_at_start(true);
+ at_start_trace.set_at_start(Trace::TRUE_VALUE);
on_success()->Emit(compiler, &at_start_trace);
return;
}
@@ -3813,9 +3403,10 @@ TextNode::TextEmitPass(RegExpCompiler* compiler,
jit::Label* backtrack = trace->backtrack();
QuickCheckDetails* quick_check = trace->quick_check_performed();
int element_count = elements().length();
+ int backward_offset = read_backward() ? -Length() : 0;
for (int i = preloaded ? 0 : element_count - 1; i >= 0; i--) {
TextElement elm = elements()[i];
- int cp_offset = trace->cp_offset() + elm.cp_offset();
+ int cp_offset = trace->cp_offset() + elm.cp_offset() + backward_offset;
if (elm.text_type() == TextElement::ATOM) {
const CharacterVector& quarks = elm.atom()->data();
for (int j = preloaded ? 0 : quarks.length() - 1; j >= 0; j--) {
@@ -3843,11 +3434,12 @@ TextNode::TextEmitPass(RegExpCompiler* compiler,
break;
}
if (emit_function != nullptr) {
+ bool bounds_check = *checked_up_to < cp_offset + j || read_backward();
bool bound_checked = emit_function(compiler,
quarks[j],
backtrack,
cp_offset + j,
- *checked_up_to < cp_offset + j,
+ bounds_check,
preloaded);
if (bound_checked) UpdateBoundsCheck(cp_offset + j, checked_up_to);
}
@@ -3858,13 +3450,14 @@ TextNode::TextEmitPass(RegExpCompiler* compiler,
if (first_element_checked && i == 0) continue;
if (DeterminedAlready(quick_check, elm.cp_offset())) continue;
RegExpCharacterClass* cc = elm.char_class();
+ bool bounds_check = *checked_up_to < cp_offset || read_backward();
EmitCharClass(alloc(),
assembler,
cc,
ascii,
backtrack,
cp_offset,
- *checked_up_to < cp_offset,
+ bounds_check,
preloaded);
UpdateBoundsCheck(cp_offset, checked_up_to);
}
@@ -3944,8 +3537,11 @@ TextNode::Emit(RegExpCompiler* compiler, Trace* trace)
}
Trace successor_trace(*trace);
- successor_trace.set_at_start(false);
- successor_trace.AdvanceCurrentPositionInTrace(Length(), compiler);
+ // If we advance backward, we may end up at the start.
+ successor_trace.AdvanceCurrentPositionInTrace(
+ read_backward() ? -Length() : Length(), compiler);
+ successor_trace.set_at_start(read_backward() ? Trace::UNKNOWN
+ : Trace::FALSE_VALUE);
RecursionCheck rc(compiler);
on_success()->Emit(compiler, &successor_trace);
}
@@ -4117,6 +3713,8 @@ ChoiceNode::CalculatePreloadCharacters(RegExpCompiler* compiler, int eats_at_lea
RegExpNode*
TextNode::GetSuccessorOfOmnivorousTextNode(RegExpCompiler* compiler)
{
+ if (read_backward()) return nullptr;
+
if (elements().length() != 1)
return nullptr;
@@ -4164,7 +3762,7 @@ ChoiceNode::GreedyLoopTextLengthForAlternative(GuardedAlternative* alternative)
SeqRegExpNode* seq_node = static_cast<SeqRegExpNode*>(node);
node = seq_node->on_success();
}
- return length;
+ return read_backward() ? -length : length;
}
// Creates a list of AlternativeGenerations. If the list has a reasonable
@@ -4239,7 +3837,7 @@ ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace)
jit::Label greedy_loop_label;
Trace counter_backtrack_trace;
counter_backtrack_trace.set_backtrack(&greedy_loop_label);
- if (not_at_start()) counter_backtrack_trace.set_at_start(false);
+ if (not_at_start()) counter_backtrack_trace.set_at_start(Trace::FALSE_VALUE);
if (choice_count > 1 && text_length != kNodeIsTooComplexForGreedyLoops) {
// Here we have special handling for greedy loops containing only text nodes
@@ -4255,7 +3853,7 @@ ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace)
current_trace = &counter_backtrack_trace;
jit::Label greedy_match_failed;
Trace greedy_match_trace;
- if (not_at_start()) greedy_match_trace.set_at_start(false);
+ if (not_at_start()) greedy_match_trace.set_at_start(Trace::FALSE_VALUE);
greedy_match_trace.set_backtrack(&greedy_match_failed);
jit::Label loop_label;
macro_assembler->Bind(&loop_label);
@@ -4325,6 +3923,8 @@ ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace)
// For now we just call all choices one after the other. The idea ultimately
// is to use the Dispatch table to try only the relevant ones.
for (size_t i = first_normal_choice; i < choice_count; i++) {
+ bool is_last = i == choice_count - 1;
+ bool fall_through_on_failure = !is_last;
GuardedAlternative alternative = alternatives()[i];
AlternativeGeneration* alt_gen = alt_gens.at(i);
alt_gen->quick_check_details.set_characters(preload_characters);
@@ -4340,20 +3940,20 @@ ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace)
if (not_at_start_) new_trace.set_at_start(Trace::FALSE_VALUE);
alt_gen->expects_preload = preload_is_current;
bool generate_full_check_inline = false;
- if (try_to_emit_quick_check_for_alternative(i) &&
+ if (try_to_emit_quick_check_for_alternative(i == 0) &&
alternative.node()->EmitQuickCheck(compiler,
&new_trace,
preload_has_checked_bounds,
&alt_gen->possible_success,
&alt_gen->quick_check_details,
- i < choice_count - 1)) {
+ fall_through_on_failure)) {
// Quick check was generated for this choice.
preload_is_current = true;
preload_has_checked_bounds = true;
// On the last choice in the ChoiceNode we generated the quick
// check to fall through on possible success. So now we need to
// generate the full check inline.
- if (i == choice_count - 1) {
+ if (!fall_through_on_failure) {
macro_assembler->Bind(&alt_gen->possible_success);
new_trace.set_quick_check_performed(&alt_gen->quick_check_details);
new_trace.set_characters_preloaded(preload_characters);
@@ -4361,7 +3961,7 @@ ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace)
generate_full_check_inline = true;
}
} else if (alt_gen->quick_check_details.cannot_match()) {
- if (i == choice_count - 1 && !greedy_loop) {
+ if (!fall_through_on_failure && !greedy_loop) {
macro_assembler->JumpOrBacktrack(trace->backtrack());
}
continue;
@@ -4375,7 +3975,7 @@ ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace)
alt_gen->expects_preload = false;
new_trace.InvalidateCurrentCharacter();
}
- if (i < choice_count - 1) {
+ if (!is_last) {
new_trace.set_backtrack(&alt_gen->after);
}
generate_full_check_inline = true;
@@ -4413,12 +4013,14 @@ ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace)
if (new_trace.actions() != nullptr) {
new_trace.set_flush_budget(new_flush_budget);
}
+ bool next_expects_preload =
+ i == choice_count - 1 ? false : alt_gens.at(i + 1)->expects_preload;
EmitOutOfLineContinuation(compiler,
&new_trace,
alternatives()[i],
alt_gen,
preload_characters,
- alt_gens.at(i + 1)->expects_preload);
+ next_expects_preload);
}
}
@@ -4604,11 +4206,14 @@ BackReferenceNode::Emit(RegExpCompiler* compiler, Trace* trace)
MOZ_ASSERT(start_reg_ + 1 == end_reg_);
if (compiler->ignore_case()) {
assembler->CheckNotBackReferenceIgnoreCase(start_reg_,
+ read_backward(),
trace->backtrack(),
compiler->unicode());
} else {
- assembler->CheckNotBackReference(start_reg_, trace->backtrack());
+ assembler->CheckNotBackReference(start_reg_, read_backward(), trace->backtrack());
}
+ // We are going to advance backward, so we may end up at the start.
+ if (read_backward()) trace->set_at_start(Trace::UNKNOWN);
on_success()->Emit(compiler, trace);
}
@@ -4820,6 +4425,9 @@ TextNode::GetQuickCheckDetails(QuickCheckDetails* details,
int characters_filled_in,
bool not_at_start)
{
+ // Do not collect any quick check details if the text node reads backward,
+ // since it reads in the opposite direction than we use for quick checks.
+ if (read_backward()) return;
MOZ_ASSERT(characters_filled_in < details->characters());
int characters = details->characters();
int char_mask = MaximumCharacter(compiler->ascii());
@@ -4976,8 +4584,7 @@ QuickCheckDetails::Clear()
void
QuickCheckDetails::Advance(int by, bool ascii)
{
- MOZ_ASSERT(by >= 0);
- if (by >= characters_) {
+ if (by >= characters_ || by < 0) {
Clear();
return;
}
diff --git a/js/src/irregexp/RegExpEngine.h b/js/src/irregexp/RegExpEngine.h
index e2cabaa026..22e9d944a4 100644
--- a/js/src/irregexp/RegExpEngine.h
+++ b/js/src/irregexp/RegExpEngine.h
@@ -34,6 +34,9 @@
#include "ds/SplayTree.h"
#include "jit/Label.h"
+
+#include "irregexp/InfallibleVector.h"
+#include "irregexp/RegExpCharRanges.h"
#include "vm/RegExpObject.h"
namespace js {
@@ -57,13 +60,28 @@ struct RegExpCompileData
: tree(nullptr),
simple(true),
contains_anchor(false),
- capture_count(0)
+ capture_count(0),
+ capture_name_list(nullptr),
+ capture_index_list(nullptr)
{}
+ // The parsed AST as produced by the RegExpParser.
RegExpTree* tree;
+ // True, iff the pattern is a 'simple' atom with zero captures. In other
+ // words, the pattern consists of a string with no metacharacters and special
+ // regexp features, and can be implemented as a standard string search.
bool simple;
+
+ // True, iff the pattern is anchored at the start of the string with '^'.
bool contains_anchor;
+
+ // The number of capture groups, without the global capture \0.
int capture_count;
+
+ // Only use if the pattern contains named captures. If so, this contains a
+ // mapping of capture names to capture indices, as Values.
+ CharacterVectorVector* capture_name_list;
+ IntegerVector* capture_index_list;
};
struct RegExpCode
@@ -118,7 +136,7 @@ InterpretCode(JSContext* cx, const uint8_t* byteCode, const CharT* chars, size_t
VISIT(Atom) \
VISIT(Quantifier) \
VISIT(Capture) \
- VISIT(Lookahead) \
+ VISIT(Lookaround) \
VISIT(BackReference) \
VISIT(Empty) \
VISIT(Text)
@@ -127,108 +145,6 @@ InterpretCode(JSContext* cx, const uint8_t* byteCode, const CharT* chars, size_t
FOR_EACH_REG_EXP_TREE_TYPE(FORWARD_DECLARE)
#undef FORWARD_DECLARE
-// InfallibleVector is like Vector, but all its methods are infallible (they
-// crash on OOM). We use this class instead of Vector to avoid a ton of
-// MOZ_MUST_USE warnings in irregexp code (imported from V8).
-template<typename T, size_t N>
-class InfallibleVector
-{
- Vector<T, N, LifoAllocPolicy<Infallible>> vector_;
-
- InfallibleVector(const InfallibleVector&) = delete;
- void operator=(const InfallibleVector&) = delete;
-
- public:
- explicit InfallibleVector(const LifoAllocPolicy<Infallible>& alloc) : vector_(alloc) {}
-
- void append(const T& t) { MOZ_ALWAYS_TRUE(vector_.append(t)); }
- void append(const T* begin, size_t length) { MOZ_ALWAYS_TRUE(vector_.append(begin, length)); }
-
- void clear() { vector_.clear(); }
- void popBack() { vector_.popBack(); }
- void reserve(size_t n) { MOZ_ALWAYS_TRUE(vector_.reserve(n)); }
-
- size_t length() const { return vector_.length(); }
- T popCopy() { return vector_.popCopy(); }
-
- T* begin() { return vector_.begin(); }
- const T* begin() const { return vector_.begin(); }
-
- T& operator[](size_t index) { return vector_[index]; }
- const T& operator[](size_t index) const { return vector_[index]; }
-
- InfallibleVector& operator=(InfallibleVector&& rhs) { vector_ = Move(rhs.vector_); return *this; }
-};
-
-class CharacterRange;
-typedef InfallibleVector<CharacterRange, 1> CharacterRangeVector;
-
-// Represents code units in the range from from_ to to_, both ends are
-// inclusive.
-class CharacterRange
-{
- public:
- CharacterRange()
- : from_(0), to_(0)
- {}
-
- CharacterRange(char16_t from, char16_t to)
- : from_(from), to_(to)
- {}
-
- static void AddClassEscape(LifoAlloc* alloc, char16_t type, CharacterRangeVector* ranges);
- static void AddClassEscapeUnicode(LifoAlloc* alloc, char16_t type,
- CharacterRangeVector* ranges, bool ignoreCase);
-
- static inline CharacterRange Singleton(char16_t value) {
- return CharacterRange(value, value);
- }
- static inline CharacterRange Range(char16_t from, char16_t to) {
- MOZ_ASSERT(from <= to);
- return CharacterRange(from, to);
- }
- static inline CharacterRange Everything() {
- return CharacterRange(0, 0xFFFF);
- }
- bool Contains(char16_t i) { return from_ <= i && i <= to_; }
- char16_t from() const { return from_; }
- void set_from(char16_t value) { from_ = value; }
- char16_t to() const { return to_; }
- void set_to(char16_t value) { to_ = value; }
- bool is_valid() { return from_ <= to_; }
- bool IsEverything(char16_t max) { return from_ == 0 && to_ >= max; }
- bool IsSingleton() { return (from_ == to_); }
- void AddCaseEquivalents(bool is_ascii, bool unicode, CharacterRangeVector* ranges);
-
- static void Split(const LifoAlloc* alloc,
- CharacterRangeVector base,
- const Vector<int>& overlay,
- CharacterRangeVector* included,
- CharacterRangeVector* excluded);
-
- // Whether a range list is in canonical form: Ranges ordered by from value,
- // and ranges non-overlapping and non-adjacent.
- static bool IsCanonical(const CharacterRangeVector& ranges);
-
- // Convert range list to canonical form. The characters covered by the ranges
- // will still be the same, but no character is in more than one range, and
- // adjacent ranges are merged. The resulting list may be shorter than the
- // original, but cannot be longer.
- static void Canonicalize(CharacterRangeVector& ranges);
-
- // Negate the contents of a character range in canonical form.
- static void Negate(const LifoAlloc* alloc,
- CharacterRangeVector src,
- CharacterRangeVector* dst);
-
- static const int kStartMarker = (1 << 24);
- static const int kPayloadMask = (1 << 24) - 1;
-
- private:
- char16_t from_;
- char16_t to_;
-};
-
// A set of unsigned integers that behaves especially well on small
// integers (< 32).
class OutSet
@@ -524,7 +440,7 @@ class RegExpNode
int characters_filled_in,
bool not_at_start) = 0;
- static const int kNodeIsTooComplexForGreedyLoops = -1;
+ static const int kNodeIsTooComplexForGreedyLoops = INT32_MIN;
virtual int GreedyLoopTextLength() { return kNodeIsTooComplexForGreedyLoops; }
@@ -762,15 +678,19 @@ class TextNode : public SeqRegExpNode
{
public:
TextNode(TextElementVector* elements,
+ bool read_backward,
RegExpNode* on_success)
: SeqRegExpNode(on_success),
- elements_(elements)
+ elements_(elements),
+ read_backward_(read_backward)
{}
TextNode(RegExpCharacterClass* that,
+ bool read_backward,
RegExpNode* on_success)
: SeqRegExpNode(on_success),
- elements_(alloc()->newInfallible<TextElementVector>(*alloc()))
+ elements_(alloc()->newInfallible<TextElementVector>(*alloc())),
+ read_backward_(read_backward)
{
elements_->append(TextElement::CharClass(that));
}
@@ -783,6 +703,7 @@ class TextNode : public SeqRegExpNode
int characters_filled_in,
bool not_at_start);
TextElementVector& elements() { return *elements_; }
+ bool read_backward() { return read_backward_; }
void MakeCaseIndependent(bool is_ascii, bool unicode);
virtual int GreedyLoopTextLength();
virtual RegExpNode* GetSuccessorOfOmnivorousTextNode(
@@ -813,6 +734,7 @@ class TextNode : public SeqRegExpNode
int* checked_up_to);
int Length();
TextElementVector* elements_;
+ bool read_backward_;
};
class AssertionNode : public SeqRegExpNode
@@ -881,15 +803,18 @@ class BackReferenceNode : public SeqRegExpNode
public:
BackReferenceNode(int start_reg,
int end_reg,
+ bool read_backward,
RegExpNode* on_success)
: SeqRegExpNode(on_success),
start_reg_(start_reg),
- end_reg_(end_reg)
+ end_reg_(end_reg),
+ read_backward_(read_backward)
{}
virtual void Accept(NodeVisitor* visitor);
int start_register() { return start_reg_; }
int end_register() { return end_reg_; }
+ bool read_backward() { return read_backward_; }
virtual void Emit(RegExpCompiler* compiler, Trace* trace);
virtual int EatsAtLeast(int still_to_find,
int recursion_depth,
@@ -908,6 +833,7 @@ class BackReferenceNode : public SeqRegExpNode
private:
int start_reg_;
int end_reg_;
+ bool read_backward_;
};
class EndNode : public RegExpNode
@@ -1050,8 +976,11 @@ class ChoiceNode : public RegExpNode
bool not_at_start() { return not_at_start_; }
void set_not_at_start() { not_at_start_ = true; }
void set_being_calculated(bool b) { being_calculated_ = b; }
- virtual bool try_to_emit_quick_check_for_alternative(int i) { return true; }
+ virtual bool try_to_emit_quick_check_for_alternative(bool is_first) {
+ return true;
+ }
virtual RegExpNode* FilterASCII(int depth, bool ignore_case, bool unicode);
+ virtual bool read_backward() { return false; }
protected:
int GreedyLoopTextLengthForAlternative(GuardedAlternative* alternative);
@@ -1103,18 +1032,22 @@ class NegativeLookaheadChoiceNode : public ChoiceNode
// starts by loading enough characters for the alternative that takes fewest
// characters, but on a negative lookahead the negative branch did not take
// part in that calculation (EatsAtLeast) so the assumptions don't hold.
- virtual bool try_to_emit_quick_check_for_alternative(int i) { return i != 0; }
+ bool try_to_emit_quick_check_for_alternative(bool is_first) override {
+ return !is_first;
+ }
virtual RegExpNode* FilterASCII(int depth, bool ignore_case, bool unicode);
};
class LoopChoiceNode : public ChoiceNode
{
public:
- explicit LoopChoiceNode(LifoAlloc* alloc, bool body_can_be_zero_length)
+ explicit LoopChoiceNode(LifoAlloc* alloc, bool body_can_be_zero_length,
+ bool read_backward)
: ChoiceNode(alloc, 2),
loop_node_(nullptr),
continue_node_(nullptr),
- body_can_be_zero_length_(body_can_be_zero_length)
+ body_can_be_zero_length_(body_can_be_zero_length),
+ read_backward_(read_backward)
{}
void AddLoopAlternative(GuardedAlternative alt);
@@ -1132,6 +1065,7 @@ class LoopChoiceNode : public ChoiceNode
RegExpNode* loop_node() { return loop_node_; }
RegExpNode* continue_node() { return continue_node_; }
bool body_can_be_zero_length() { return body_can_be_zero_length_; }
+ virtual bool read_backward() { return read_backward_; }
virtual void Accept(NodeVisitor* visitor);
virtual RegExpNode* FilterASCII(int depth, bool ignore_case, bool unicode);
@@ -1146,6 +1080,7 @@ class LoopChoiceNode : public ChoiceNode
RegExpNode* loop_node_;
RegExpNode* continue_node_;
bool body_can_be_zero_length_;
+ bool read_backward_;
};
// Improve the speed that we scan for an initial point where a non-anchored
@@ -1421,8 +1356,8 @@ class Trace
}
TriBool at_start() { return at_start_; }
- void set_at_start(bool at_start) {
- at_start_ = at_start ? TRUE_VALUE : FALSE_VALUE;
+ void set_at_start(TriBool at_start) {
+ at_start_ = at_start;
}
jit::Label* backtrack() { return backtrack_; }
jit::Label* loop_label() { return loop_label_; }
diff --git a/js/src/irregexp/RegExpInterpreter.cpp b/js/src/irregexp/RegExpInterpreter.cpp
index 5d1f0ea805..f53acfb606 100644
--- a/js/src/irregexp/RegExpInterpreter.cpp
+++ b/js/src/irregexp/RegExpInterpreter.cpp
@@ -221,8 +221,8 @@ irregexp::InterpretCode(JSContext* cx, const uint8_t* byteCode, const CharT* cha
}
break;
BYTECODE(LOAD_CURRENT_CHAR) {
- size_t pos = current + (insn >> BYTECODE_SHIFT);
- if (pos >= length) {
+ int pos = current + (insn >> BYTECODE_SHIFT);
+ if (pos >= (int)length || pos < 0) {
pc = byteCode + Load32Aligned(pc + 4);
} else {
current_char = chars[pos];
@@ -237,8 +237,8 @@ irregexp::InterpretCode(JSContext* cx, const uint8_t* byteCode, const CharT* cha
break;
}
BYTECODE(LOAD_2_CURRENT_CHARS) {
- size_t pos = current + (insn >> BYTECODE_SHIFT);
- if (pos + 2 > length) {
+ int pos = current + (insn >> BYTECODE_SHIFT);
+ if (pos + 2 > (int)length || pos < 0) {
pc = byteCode + Load32Aligned(pc + 4);
} else {
CharT next = chars[pos + 1];
@@ -424,6 +424,30 @@ irregexp::InterpretCode(JSContext* cx, const uint8_t* byteCode, const CharT* cha
pc += BC_CHECK_NOT_BACK_REF_LENGTH;
break;
}
+ BYTECODE(CHECK_NOT_BACK_REF_BACKWARD) {
+ int from = registers[insn >> BYTECODE_SHIFT];
+ int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from;
+ if (from < 0 || len <= 0) {
+ pc += BC_CHECK_NOT_BACK_REF_BACKWARD_LENGTH;
+ break;
+ }
+ if (int(current) - len < 0) {
+ pc = byteCode + Load32Aligned(pc + 4);
+ break;
+ } else {
+ int i;
+ for (i = 0; i < len; i++) {
+ if (chars[from + i] != chars[int(current) - len + i]) {
+ pc = byteCode + Load32Aligned(pc + 4);
+ break;
+ }
+ }
+ if (i < len) break;
+ current -= len;
+ }
+ pc += BC_CHECK_NOT_BACK_REF_BACKWARD_LENGTH;
+ break;
+ }
BYTECODE(CHECK_NOT_BACK_REF_NO_CASE) {
int from = registers[insn >> BYTECODE_SHIFT];
int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from;
@@ -464,14 +488,54 @@ irregexp::InterpretCode(JSContext* cx, const uint8_t* byteCode, const CharT* cha
}
break;
}
+ BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_BACKWARD) {
+ int from = registers[insn >> BYTECODE_SHIFT];
+ int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from;
+ if (from < 0 || len <= 0) {
+ pc += BC_CHECK_NOT_BACK_REF_NO_CASE_BACKWARD_LENGTH;
+ break;
+ }
+ if (int(current) - len < 0) {
+ pc = byteCode + Load32Aligned(pc + 4);
+ break;
+ }
+ if (CaseInsensitiveCompareStrings(chars + from, chars + int(current) - len, len * sizeof(CharT))) {
+ current -= len;
+ pc += BC_CHECK_NOT_BACK_REF_NO_CASE_BACKWARD_LENGTH;
+ } else {
+ pc = byteCode + Load32Aligned(pc + 4);
+ }
+ break;
+
+ }
+ BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_BACKWARD_UNICODE) {
+ int from = registers[insn >> BYTECODE_SHIFT];
+ int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from;
+ if (from < 0 || len <= 0) {
+ pc += BC_CHECK_NOT_BACK_REF_NO_CASE_BACKWARD_LENGTH;
+ break;
+ }
+ if (int(current) - len < 0) {
+ pc = byteCode + Load32Aligned(pc + 4);
+ break;
+ }
+ if (CaseInsensitiveCompareUCStrings(chars + from, chars + int(current) - len, len * sizeof(CharT))) {
+ current -= len;
+ pc += BC_CHECK_NOT_BACK_REF_NO_CASE_BACKWARD_LENGTH;
+ } else {
+ pc = byteCode + Load32Aligned(pc + 4);
+ }
+ break;
+
+ }
BYTECODE(CHECK_AT_START)
- if (current == 0)
+ if (current + (insn >> BYTECODE_SHIFT) == 0)
pc = byteCode + Load32Aligned(pc + 4);
else
pc += BC_CHECK_AT_START_LENGTH;
break;
BYTECODE(CHECK_NOT_AT_START)
- if (current == 0)
+ if (current + (insn >> BYTECODE_SHIFT) == 0)
pc += BC_CHECK_NOT_AT_START_LENGTH;
else
pc = byteCode + Load32Aligned(pc + 4);
diff --git a/js/src/irregexp/RegExpMacroAssembler.cpp b/js/src/irregexp/RegExpMacroAssembler.cpp
index 94f6934d3f..2c4ec67ef5 100644
--- a/js/src/irregexp/RegExpMacroAssembler.cpp
+++ b/js/src/irregexp/RegExpMacroAssembler.cpp
@@ -172,9 +172,9 @@ InterpretedRegExpMacroAssembler::Bind(jit::Label* label)
}
void
-InterpretedRegExpMacroAssembler::CheckAtStart(jit::Label* on_at_start)
+InterpretedRegExpMacroAssembler::CheckAtStart(int cp_offset, jit::Label* on_at_start)
{
- Emit(BC_CHECK_AT_START, 0);
+ Emit(BC_CHECK_AT_START, cp_offset);
EmitOrLink(on_at_start);
}
@@ -225,32 +225,37 @@ InterpretedRegExpMacroAssembler::CheckGreedyLoop(jit::Label* on_tos_equals_curre
}
void
-InterpretedRegExpMacroAssembler::CheckNotAtStart(jit::Label* on_not_at_start)
+InterpretedRegExpMacroAssembler::CheckNotAtStart(int cp_offset, jit::Label* on_not_at_start)
{
- Emit(BC_CHECK_NOT_AT_START, 0);
+ Emit(BC_CHECK_NOT_AT_START, cp_offset);
EmitOrLink(on_not_at_start);
}
void
-InterpretedRegExpMacroAssembler::CheckNotBackReference(int start_reg, jit::Label* on_no_match)
+InterpretedRegExpMacroAssembler::CheckNotBackReference(int start_reg, bool read_backward,
+ jit::Label* on_no_match)
{
MOZ_ASSERT(start_reg >= 0);
MOZ_ASSERT(start_reg <= kMaxRegister);
- Emit(BC_CHECK_NOT_BACK_REF, start_reg);
+ Emit(read_backward ? BC_CHECK_NOT_BACK_REF_BACKWARD : BC_CHECK_NOT_BACK_REF,
+ start_reg);
EmitOrLink(on_no_match);
}
void
InterpretedRegExpMacroAssembler::CheckNotBackReferenceIgnoreCase(int start_reg,
+ bool read_backward,
jit::Label* on_no_match,
bool unicode)
{
MOZ_ASSERT(start_reg >= 0);
MOZ_ASSERT(start_reg <= kMaxRegister);
if (unicode)
- Emit(BC_CHECK_NOT_BACK_REF_NO_CASE_UNICODE, start_reg);
+ Emit(read_backward ? BC_CHECK_NOT_BACK_REF_NO_CASE_BACKWARD_UNICODE : BC_CHECK_NOT_BACK_REF_NO_CASE_UNICODE,
+ start_reg);
else
- Emit(BC_CHECK_NOT_BACK_REF_NO_CASE, start_reg);
+ Emit(read_backward ? BC_CHECK_NOT_BACK_REF_NO_CASE_BACKWARD : BC_CHECK_NOT_BACK_REF_NO_CASE,
+ start_reg);
EmitOrLink(on_no_match);
}
diff --git a/js/src/irregexp/RegExpMacroAssembler.h b/js/src/irregexp/RegExpMacroAssembler.h
index e8275faf40..4fa0ab5630 100644
--- a/js/src/irregexp/RegExpMacroAssembler.h
+++ b/js/src/irregexp/RegExpMacroAssembler.h
@@ -96,7 +96,7 @@ class MOZ_STACK_CLASS RegExpMacroAssembler
virtual void Backtrack() = 0;
virtual void Bind(jit::Label* label) = 0;
- virtual void CheckAtStart(jit::Label* on_at_start) = 0;
+ virtual void CheckAtStart(int cp_offset, jit::Label* on_at_start) = 0;
// Dispatch after looking the current character up in a 2-bits-per-entry
// map. The destinations vector has up to 4 labels.
@@ -109,10 +109,10 @@ class MOZ_STACK_CLASS RegExpMacroAssembler
virtual void CheckCharacterGT(char16_t limit, jit::Label* on_greater) = 0;
virtual void CheckCharacterLT(char16_t limit, jit::Label* on_less) = 0;
virtual void CheckGreedyLoop(jit::Label* on_tos_equals_current_position) = 0;
- virtual void CheckNotAtStart(jit::Label* on_not_at_start) = 0;
- virtual void CheckNotBackReference(int start_reg, jit::Label* on_no_match) = 0;
- virtual void CheckNotBackReferenceIgnoreCase(int start_reg, jit::Label* on_no_match,
- bool unicode) = 0;
+ virtual void CheckNotAtStart(int cp_offset, jit::Label* on_not_at_start) = 0;
+ virtual void CheckNotBackReference(int start_reg, bool read_backward, jit::Label* on_no_match) = 0;
+ virtual void CheckNotBackReferenceIgnoreCase(int start_reg, bool read_backward,
+ jit::Label* on_no_match, bool unicode) = 0;
// Check the current character for a match with a literal character. If we
// fail to match then goto the on_failure label. End of input always
@@ -238,15 +238,16 @@ class MOZ_STACK_CLASS InterpretedRegExpMacroAssembler final : public RegExpMacro
void AdvanceRegister(int reg, int by);
void Backtrack();
void Bind(jit::Label* label);
- void CheckAtStart(jit::Label* on_at_start);
+ void CheckAtStart(int cp_offset, jit::Label* on_at_start);
void CheckCharacter(unsigned c, jit::Label* on_equal);
void CheckCharacterAfterAnd(unsigned c, unsigned and_with, jit::Label* on_equal);
void CheckCharacterGT(char16_t limit, jit::Label* on_greater);
void CheckCharacterLT(char16_t limit, jit::Label* on_less);
void CheckGreedyLoop(jit::Label* on_tos_equals_current_position);
- void CheckNotAtStart(jit::Label* on_not_at_start);
- void CheckNotBackReference(int start_reg, jit::Label* on_no_match);
- void CheckNotBackReferenceIgnoreCase(int start_reg, jit::Label* on_no_match, bool unicode);
+ void CheckNotAtStart(int cp_offset, jit::Label* on_not_at_start);
+ void CheckNotBackReference(int start_reg, bool read_backward, jit::Label* on_no_match);
+ void CheckNotBackReferenceIgnoreCase(int start_reg, bool read_backward,
+ jit::Label* on_no_match, bool unicode);
void CheckNotCharacter(unsigned c, jit::Label* on_not_equal);
void CheckNotCharacterAfterAnd(unsigned c, unsigned and_with, jit::Label* on_not_equal);
void CheckNotCharacterAfterMinusAnd(char16_t c, char16_t minus, char16_t and_with,
diff --git a/js/src/irregexp/RegExpParser.cpp b/js/src/irregexp/RegExpParser.cpp
index d0b19d471e..0deb3c658d 100644
--- a/js/src/irregexp/RegExpParser.cpp
+++ b/js/src/irregexp/RegExpParser.cpp
@@ -90,6 +90,26 @@ RegExpBuilder::AddCharacter(char16_t c)
#endif
}
+// forward declare atom helpers from below
+static inline RegExpTree* SurrogatePairAtom(LifoAlloc* alloc, char16_t lead, char16_t trail, bool ignore_case);
+static inline RegExpTree* LeadSurrogateAtom(LifoAlloc* alloc, char16_t value);
+static inline RegExpTree* TrailSurrogateAtom(LifoAlloc* alloc, char16_t value);
+
+void
+RegExpBuilder::AddUnicodeCharacter(widechar c, bool ignore_case) {
+ if (c > unicode::UTF16Max) {
+ char16_t lead, trail;
+ unicode::UTF16Encode(c, &lead, &trail);
+ AddAtom(SurrogatePairAtom(alloc, lead, trail, ignore_case));
+ } else if (unicode::IsLeadSurrogate(c)) {
+ AddAtom(LeadSurrogateAtom(alloc, c));
+ } else if (unicode::IsTrailSurrogate(c)) {
+ AddAtom(TrailSurrogateAtom(alloc, c));
+ } else {
+ AddCharacter(static_cast<char16_t>(c));
+ }
+}
+
void
RegExpBuilder::AddEmpty()
{
@@ -225,7 +245,10 @@ RegExpParser<CharT>::RegExpParser(frontend::TokenStream& ts, LifoAlloc* alloc,
: ts(ts),
alloc(alloc),
captures_(nullptr),
+ named_captures_(nullptr),
+ named_back_references_(nullptr),
next_pos_(chars),
+ captures_started_(0),
end_(end),
current_(kEndMarker),
capture_count_(0),
@@ -236,7 +259,8 @@ RegExpParser<CharT>::RegExpParser(frontend::TokenStream& ts, LifoAlloc* alloc,
dotall_(dotall),
simple_(false),
contains_anchor_(false),
- is_scanned_for_captures_(false)
+ is_scanned_for_captures_(false),
+ has_named_captures_(false)
{
Advance();
}
@@ -251,6 +275,30 @@ RegExpParser<CharT>::ReportError(unsigned errorNumber, const char* param /* = nu
}
template <typename CharT>
+bool
+RegExpParser<CharT>::StoreNamedCaptureMap(CharacterVectorVector** names, IntegerVector** indices)
+{
+ // Any named captures defined at all?
+ if (!named_captures_ || !named_captures_->length()) {
+ return true;
+ }
+
+ CharacterVectorVector* nv = alloc->newInfallible<CharacterVectorVector>(*alloc);
+ IntegerVector* iv = alloc->newInfallible<IntegerVector>(*alloc);
+
+ for (size_t i=0; i<named_captures_->length(); i++) {
+ RegExpCapture* capture = (*named_captures_)[i];
+ const CharacterVector* cn = capture->name();
+ nv->append(const_cast<CharacterVector*>(cn));
+ iv->append(capture->index());
+ }
+
+ *names = nv;
+ *indices = iv;
+ return true;
+}
+
+template <typename CharT>
void
RegExpParser<CharT>::Advance()
{
@@ -363,6 +411,39 @@ RegExpParser<CharT>::ParseBracedHexEscape(widechar* value)
template <typename CharT>
bool
+RegExpParser<CharT>::ParseUnicodeEscape(widechar* value)
+{
+ // Parse a RegExpUnicodeEscapeSequence
+ // Both \uxxxx and \u{xxxxx} are allowed. \u has already been consumed.
+ const CharT* start = position();
+ if (current() == '{' && unicode_) {
+ bool result = ParseBracedHexEscape(value);
+ if (!result) {
+ Reset(start);
+ }
+ return result;
+ }
+ // \u but no {, or \u{...} escapes not allowed.
+ bool result = ParseHexEscape(4, value);
+ if (result && unicode_ && unicode::IsLeadSurrogate(static_cast<char16_t>(*value)) && current() == '\\') {
+ // Attempt to read trail surrogate.
+ const CharT* start = position();
+ if (Next() == 'u') {
+ Advance(2);
+ widechar trail;
+ if (ParseHexEscape(4, &trail) &&
+ unicode::IsTrailSurrogate(static_cast<char16_t>(trail))) {
+ *value = unicode::UTF16Decode(static_cast<char16_t>(*value), static_cast<char16_t>(trail));
+ return true;
+ }
+ }
+ Reset(start);
+ }
+ return result;
+}
+
+template <typename CharT>
+bool
RegExpParser<CharT>::ParseTrailSurrogate(widechar* value)
{
if (current() != '\\')
@@ -418,7 +499,8 @@ RangeAtom(LifoAlloc* alloc, char16_t from, char16_t to)
static inline RegExpTree*
NegativeLookahead(LifoAlloc* alloc, char16_t from, char16_t to)
{
- return alloc->newInfallible<RegExpLookahead>(RangeAtom(alloc, from, to), false, 0, 0);
+ return alloc->newInfallible<RegExpLookaround>(RangeAtom(alloc, from, to), false,
+ 0, 0, RegExpLookaround::LOOKAHEAD);
}
static bool
@@ -558,30 +640,13 @@ RegExpParser<CharT>::ParseClassCharacterEscape(widechar* code)
case 'u': {
Advance();
widechar value;
- if (unicode_) {
- if (current() == '{') {
- if (!ParseBracedHexEscape(&value))
- return false;
- *code = value;
- return true;
- }
- if (ParseHexEscape(4, &value)) {
- if (unicode::IsLeadSurrogate(value)) {
- widechar trail;
- if (ParseTrailSurrogate(&trail)) {
- *code = unicode::UTF16Decode(value, trail);
- return true;
- }
- }
- *code = value;
- return true;
- }
- ReportError(JSMSG_INVALID_UNICODE_ESCAPE);
- return false;
+ if (ParseUnicodeEscape(&value)) {
+ *code = value;
+ return true;
}
- if (ParseHexEscape(4, &value)) {
- *code = value;
- return true;
+ if (unicode_) {
+ ReportError(JSMSG_INVALID_UNICODE_ESCAPE);
+ return false;
}
// If \u is not followed by a four-digit or braced hexadecimal, treat it
// as an identity escape.
@@ -605,215 +670,6 @@ RegExpParser<CharT>::ParseClassCharacterEscape(widechar* code)
return true;
}
-class WideCharRange
-{
- public:
- WideCharRange()
- : from_(0), to_(0)
- {}
-
- WideCharRange(widechar from, widechar to)
- : from_(from), to_(to)
- {}
-
- static inline WideCharRange Singleton(widechar value) {
- return WideCharRange(value, value);
- }
- static inline WideCharRange Range(widechar from, widechar to) {
- MOZ_ASSERT(from <= to);
- return WideCharRange(from, to);
- }
-
- bool Contains(widechar i) const { return from_ <= i && i <= to_; }
- widechar from() const { return from_; }
- widechar to() const { return to_; }
-
- private:
- widechar from_;
- widechar to_;
-};
-
-typedef InfallibleVector<WideCharRange, 1> WideCharRangeVector;
-
-static inline CharacterRange
-LeadSurrogateRange()
-{
- return CharacterRange::Range(unicode::LeadSurrogateMin, unicode::LeadSurrogateMax);
-}
-
-static inline CharacterRange
-TrailSurrogateRange()
-{
- return CharacterRange::Range(unicode::TrailSurrogateMin, unicode::TrailSurrogateMax);
-}
-
-static inline WideCharRange
-NonBMPRange()
-{
- return WideCharRange::Range(unicode::NonBMPMin, unicode::NonBMPMax);
-}
-
-static const char16_t kNoCharClass = 0;
-
-// Adds a character or pre-defined character class to character ranges.
-// If char_class is not kInvalidClass, it's interpreted as a class
-// escape (i.e., 's' means whitespace, from '\s').
-static inline void
-AddCharOrEscape(LifoAlloc* alloc,
- CharacterRangeVector* ranges,
- char16_t char_class,
- widechar c)
-{
- if (char_class != kNoCharClass)
- CharacterRange::AddClassEscape(alloc, char_class, ranges);
- else
- ranges->append(CharacterRange::Singleton(c));
-}
-
-static inline void
-AddCharOrEscapeUnicode(LifoAlloc* alloc,
- CharacterRangeVector* ranges,
- CharacterRangeVector* lead_ranges,
- CharacterRangeVector* trail_ranges,
- WideCharRangeVector* wide_ranges,
- char16_t char_class,
- widechar c,
- bool ignore_case)
-{
- if (char_class != kNoCharClass) {
- CharacterRange::AddClassEscapeUnicode(alloc, char_class, ranges, ignore_case);
- switch (char_class) {
- case 'S':
- case 'W':
- case 'D':
- lead_ranges->append(LeadSurrogateRange());
- trail_ranges->append(TrailSurrogateRange());
- wide_ranges->append(NonBMPRange());
- break;
- case '.':
- MOZ_CRASH("Bad char_class!");
- }
- return;
- }
-
- if (unicode::IsLeadSurrogate(c))
- lead_ranges->append(CharacterRange::Singleton(c));
- else if (unicode::IsTrailSurrogate(c))
- trail_ranges->append(CharacterRange::Singleton(c));
- else if (c >= unicode::NonBMPMin)
- wide_ranges->append(WideCharRange::Singleton(c));
- else
- ranges->append(CharacterRange::Singleton(c));
-}
-
-static inline void
-AddUnicodeRange(LifoAlloc* alloc,
- CharacterRangeVector* ranges,
- CharacterRangeVector* lead_ranges,
- CharacterRangeVector* trail_ranges,
- WideCharRangeVector* wide_ranges,
- widechar first,
- widechar next)
-{
- MOZ_ASSERT(first <= next);
- if (first < unicode::LeadSurrogateMin) {
- if (next < unicode::LeadSurrogateMin) {
- ranges->append(CharacterRange::Range(first, next));
- return;
- }
- ranges->append(CharacterRange::Range(first, unicode::LeadSurrogateMin - 1));
- first = unicode::LeadSurrogateMin;
- }
- if (first <= unicode::LeadSurrogateMax) {
- if (next <= unicode::LeadSurrogateMax) {
- lead_ranges->append(CharacterRange::Range(first, next));
- return;
- }
- lead_ranges->append(CharacterRange::Range(first, unicode::LeadSurrogateMax));
- first = unicode::LeadSurrogateMax + 1;
- }
- MOZ_ASSERT(unicode::LeadSurrogateMax + 1 == unicode::TrailSurrogateMin);
- if (first <= unicode::TrailSurrogateMax) {
- if (next <= unicode::TrailSurrogateMax) {
- trail_ranges->append(CharacterRange::Range(first, next));
- return;
- }
- trail_ranges->append(CharacterRange::Range(first, unicode::TrailSurrogateMax));
- first = unicode::TrailSurrogateMax + 1;
- }
- if (first <= unicode::UTF16Max) {
- if (next <= unicode::UTF16Max) {
- ranges->append(CharacterRange::Range(first, next));
- return;
- }
- ranges->append(CharacterRange::Range(first, unicode::UTF16Max));
- first = unicode::NonBMPMin;
- }
- MOZ_ASSERT(unicode::UTF16Max + 1 == unicode::NonBMPMin);
- wide_ranges->append(WideCharRange::Range(first, next));
-}
-
-// Negate a vector of ranges by subtracting its ranges from a range
-// encompassing the full range of possible values.
-template <typename RangeType>
-static inline void
-NegateUnicodeRanges(LifoAlloc* alloc, InfallibleVector<RangeType, 1>** ranges,
- RangeType full_range)
-{
- typedef InfallibleVector<RangeType, 1> RangeVector;
- RangeVector* tmp_ranges = alloc->newInfallible<RangeVector>(*alloc);
- tmp_ranges->append(full_range);
- RangeVector* result_ranges = alloc->newInfallible<RangeVector>(*alloc);
-
- // Perform the following calculation:
- // result_ranges = tmp_ranges - ranges
- // with the following steps:
- // result_ranges = tmp_ranges - ranges[0]
- // SWAP(result_ranges, tmp_ranges)
- // result_ranges = tmp_ranges - ranges[1]
- // SWAP(result_ranges, tmp_ranges)
- // ...
- // result_ranges = tmp_ranges - ranges[N-1]
- // SWAP(result_ranges, tmp_ranges)
- // The last SWAP is just for simplicity of the loop.
- for (size_t i = 0; i < (*ranges)->length(); i++) {
- result_ranges->clear();
-
- const RangeType& range = (**ranges)[i];
- for (size_t j = 0; j < tmp_ranges->length(); j++) {
- const RangeType& tmpRange = (*tmp_ranges)[j];
- auto from1 = tmpRange.from();
- auto to1 = tmpRange.to();
- auto from2 = range.from();
- auto to2 = range.to();
-
- if (from1 < from2) {
- if (to1 < from2) {
- result_ranges->append(tmpRange);
- } else if (to1 <= to2) {
- result_ranges->append(RangeType::Range(from1, from2 - 1));
- } else {
- result_ranges->append(RangeType::Range(from1, from2 - 1));
- result_ranges->append(RangeType::Range(to2 + 1, to1));
- }
- } else if (from1 <= to2) {
- if (to1 > to2)
- result_ranges->append(RangeType::Range(to2 + 1, to1));
- } else {
- result_ranges->append(tmpRange);
- }
- }
-
- auto tmp = tmp_ranges;
- tmp_ranges = result_ranges;
- result_ranges = tmp;
- }
-
- // After the loop, result is pointed at by tmp_ranges, instead of
- // result_ranges.
- *ranges = tmp_ranges;
-}
-
static bool
WideCharRangesContain(WideCharRangeVector* wide_ranges, widechar c)
{
@@ -883,9 +739,9 @@ UnicodeRangesAtom(LifoAlloc* alloc,
}
if (is_negated) {
- NegateUnicodeRanges(alloc, &lead_ranges, LeadSurrogateRange());
- NegateUnicodeRanges(alloc, &trail_ranges, TrailSurrogateRange());
- NegateUnicodeRanges(alloc, &wide_ranges, NonBMPRange());
+ CharacterRange::NegateUnicodeRanges(alloc, &lead_ranges, CharacterRange::LeadSurrogate());
+ CharacterRange::NegateUnicodeRanges(alloc, &trail_ranges, CharacterRange::TrailSurrogate());
+ CharacterRange::NegateUnicodeRanges(alloc, &wide_ranges, WideCharRange::NonBMP());
}
RegExpBuilder* builder = alloc->newInfallible<RegExpBuilder>(alloc);
@@ -893,8 +749,8 @@ UnicodeRangesAtom(LifoAlloc* alloc,
bool added = false;
if (is_negated) {
- ranges->append(LeadSurrogateRange());
- ranges->append(TrailSurrogateRange());
+ ranges->append(CharacterRange::LeadSurrogate());
+ ranges->append(CharacterRange::TrailSurrogate());
}
if (ranges->length() > 0) {
builder->AddAtom(alloc->newInfallible<RegExpCharacterClass>(ranges, is_negated));
@@ -1012,9 +868,9 @@ RegExpParser<CharT>::ParseCharacterClass()
}
while (has_more() && current() != ']') {
- char16_t char_class = kNoCharClass;
- widechar first = 0;
- if (!ParseClassAtom(&char_class, &first))
+ char16_t char_class_1 = kNoCharClass;
+ widechar char_1 = 0;
+ if (!ParseClassEscape(&char_class_1, &char_1, ranges, lead_ranges, trail_ranges, wide_ranges))
return nullptr;
if (current() == '-') {
Advance();
@@ -1023,41 +879,49 @@ RegExpParser<CharT>::ParseCharacterClass()
// following code report an error.
break;
} else if (current() == ']') {
- if (unicode_) {
- AddCharOrEscapeUnicode(alloc, ranges, lead_ranges, trail_ranges, wide_ranges,
- char_class, first, ignore_case_);
- } else {
- AddCharOrEscape(alloc, ranges, char_class, first);
+ // if the last item was not a class, add it verbatim.
+ if (char_class_1 == kNoCharClass) {
+ if (unicode_) {
+ CharacterRange::AddCharUnicode(alloc, ranges, lead_ranges, trail_ranges, wide_ranges, char_1);
+ } else {
+ ranges->append(CharacterRange::Singleton(char_1));
+ }
}
+ // Hyphen at the end of a class. Treat the '-' verbatim.
ranges->append(CharacterRange::Singleton('-'));
break;
}
char16_t char_class_2 = kNoCharClass;
- widechar next = 0;
- if (!ParseClassAtom(&char_class_2, &next))
+ widechar char_2 = 0;
+ if (!ParseClassEscape(&char_class_2, &char_2, ranges, lead_ranges, trail_ranges, wide_ranges))
return nullptr;
- if (char_class != kNoCharClass || char_class_2 != kNoCharClass) {
+ if (char_class_1 != kNoCharClass || char_class_2 != kNoCharClass) {
if (unicode_)
return ReportError(JSMSG_RANGE_WITH_CLASS_ESCAPE);
- // Either end is an escaped character class. Treat the '-' verbatim.
- AddCharOrEscape(alloc, ranges, char_class, first);
+ // Either end is an escaped character class. Treat the '-' verbatim and add the
+ // character that isn't a class
+ if (char_class_1 == kNoCharClass)
+ ranges->append(CharacterRange::Singleton(char_1));
ranges->append(CharacterRange::Singleton('-'));
- AddCharOrEscape(alloc, ranges, char_class_2, next);
+ if (char_class_1 == kNoCharClass)
+ ranges->append(CharacterRange::Singleton(char_2));
continue;
}
- if (first > next)
+ if (char_1 > char_2)
return ReportError(JSMSG_BAD_CLASS_RANGE);
if (unicode_)
- AddUnicodeRange(alloc, ranges, lead_ranges, trail_ranges,wide_ranges, first, next);
+ CharacterRange::AddUnicodeRange(alloc, ranges, lead_ranges, trail_ranges, wide_ranges, char_1, char_2);
else
- ranges->append(CharacterRange::Range(first, next));
+ ranges->append(CharacterRange::Range(char_1, char_2));
} else {
- if (unicode_) {
- AddCharOrEscapeUnicode(alloc, ranges, lead_ranges, trail_ranges, wide_ranges,
- char_class, first, ignore_case_);
- } else {
- AddCharOrEscape(alloc, ranges, char_class, first);
+ // if the last item was not a class, add it verbatim.
+ if (char_class_1 == kNoCharClass) {
+ if (unicode_) {
+ CharacterRange::AddCharUnicode(alloc, ranges, lead_ranges, trail_ranges, wide_ranges, char_1);
+ } else {
+ ranges->append(CharacterRange::Singleton(char_1));
+ }
}
}
}
@@ -1070,22 +934,26 @@ RegExpParser<CharT>::ParseCharacterClass()
is_negated = !is_negated;
}
return alloc->newInfallible<RegExpCharacterClass>(ranges, is_negated);
- }
+ } else {
+ if (!is_negated && ranges->length() == 0 && lead_ranges->length() == 0 &&
+ trail_ranges->length() == 0 && wide_ranges->length() == 0)
+ {
+ ranges->append(CharacterRange::Everything());
+ return alloc->newInfallible<RegExpCharacterClass>(ranges, true);
+ }
- if (!is_negated && ranges->length() == 0 && lead_ranges->length() == 0 &&
- trail_ranges->length() == 0 && wide_ranges->length() == 0)
- {
- ranges->append(CharacterRange::Everything());
- return alloc->newInfallible<RegExpCharacterClass>(ranges, true);
+ return UnicodeRangesAtom(alloc, ranges, lead_ranges, trail_ranges, wide_ranges, is_negated,
+ ignore_case_);
}
-
- return UnicodeRangesAtom(alloc, ranges, lead_ranges, trail_ranges, wide_ranges, is_negated,
- ignore_case_);
}
template <typename CharT>
bool
-RegExpParser<CharT>::ParseClassAtom(char16_t* char_class, widechar* value)
+RegExpParser<CharT>::ParseClassEscape(char16_t* char_class, widechar *value,
+ CharacterRangeVector* ranges,
+ CharacterRangeVector* lead_ranges,
+ CharacterRangeVector* trail_ranges,
+ WideCharRangeVector* wide_ranges)
{
MOZ_ASSERT(*char_class == kNoCharClass);
widechar first = current();
@@ -1094,10 +962,32 @@ RegExpParser<CharT>::ParseClassAtom(char16_t* char_class, widechar* value)
case 'w': case 'W': case 'd': case 'D': case 's': case 'S': {
*char_class = Next();
Advance(2);
+ // add character range to ranges immediately
+ if (unicode_) {
+ CharacterRange::AddCharOrEscapeUnicode(alloc, ranges, lead_ranges, trail_ranges, wide_ranges,
+ *char_class, 0, ignore_case_);
+ } else {
+ CharacterRange::AddCharOrEscape(alloc, ranges, *char_class, 0);
+ }
return true;
}
case kEndMarker:
return ReportError(JSMSG_ESCAPE_AT_END_OF_REGEXP);
+ case 'p':
+ case 'P':
+ if (unicode_) {
+ *char_class = Next();
+ Advance(2);
+ bool negate = *char_class == 'P';
+ std::string name, value;
+ if (!ParsePropertyClassName(name, value) ||
+ !CharacterRange::AddPropertyClassRange(alloc, name, value, negate, ignore_case_,
+ ranges, lead_ranges, trail_ranges, wide_ranges)) {
+ return ReportError(JSMSG_INVALID_CLASS_PROPERTY_NAME);
+ }
+ return true;
+ }
+ MOZ_FALLTHROUGH
default:
if (!ParseClassCharacterEscape(value))
return false;
@@ -1127,6 +1017,7 @@ template <typename CharT>
void
RegExpParser<CharT>::ScanForCaptures()
{
+ const CharT* saved_position = position();
// Start with captures started previous to current position
int capture_count = captures_started();
// Add count of captures after this position.
@@ -1150,12 +1041,32 @@ RegExpParser<CharT>::ScanForCaptures()
break;
}
case '(':
- if (current() != '?') capture_count++;
+ if (current() == '?') {
+ // At this point we could be in
+ // * a non-capturing group '(:',
+ // * a lookbehind assertion '(?<=' '(?<!'
+ // * or a named capture '(?<'.
+ //
+ // Of these, only named captures are capturing groups.
+
+ Advance();
+ if (current() != '<') break;
+
+ Advance();
+ if (current() == '=' || current() == '!') break;
+
+ // Found a possible named capture. It could turn out to be a syntax
+ // error (e.g. an unterminated or invalid name), but that distinction
+ // does not matter for our purposes.
+ has_named_captures_ = true;
+ }
+ capture_count++;
break;
}
}
capture_count_ = capture_count;
is_scanned_for_captures_ = true;
+ Reset(saved_position);
}
inline bool
@@ -1213,6 +1124,269 @@ RegExpParser<CharT>::ParseBackReferenceIndex(int* index_out)
return true;
}
+static void push_code_unit(CharacterVector* v, uint32_t code_unit)
+{
+ // based off of unicode::UTF16Encode
+ if (!unicode::IsSupplementary(code_unit)) {
+ v->append(char16_t(code_unit));
+ } else {
+ v->append(unicode::LeadSurrogate(code_unit));
+ v->append(unicode::TrailSurrogate(code_unit));
+ }
+}
+
+bool IsUnicodePropertyValueCharacter(char c) {
+ // https://tc39.github.io/proposal-regexp-unicode-property-escapes/
+ //
+ // Note that using this to validate each parsed char is quite conservative.
+ // A possible alternative solution would be to only ensure the parsed
+ // property name/value candidate string does not contain '\0' characters and
+ // let ICU lookups trigger the final failure.
+ if ('a' <= c && c <= 'z') return true;
+ if ('A' <= c && c <= 'Z') return true;
+ if ('0' <= c && c <= '9') return true;
+ return (c == '_');
+}
+
+template <typename CharT>
+bool
+RegExpParser<CharT>::ParsePropertyClassName(std::string& name, std::string& value)
+{
+ MOZ_ASSERT(name.empty());
+ MOZ_ASSERT(value.empty());
+ // Parse the property class as follows:
+ // - In \p{name}, 'name' is interpreted
+ // - either as a general category property value name.
+ // - or as a binary property name.
+ // - In \p{name=value}, 'name' is interpreted as an enumerated property name,
+ // and 'value' is interpreted as one of the available property value names.
+ // - Aliases in PropertyAlias.txt and PropertyValueAlias.txt can be used.
+ // - Loose matching is not applied.
+ if (current() == '{') {
+ // Parse \p{[PropertyName=]PropertyNameValue}
+ for (Advance(); current() != '}' && current() != '='; Advance()) {
+ if (!IsUnicodePropertyValueCharacter(current())) return false;
+ if (!has_next()) return false;
+ name += static_cast<char>(current());
+ }
+ if (current() == '=') {
+ for (Advance(); current() != '}'; Advance()) {
+ if (!IsUnicodePropertyValueCharacter(current())) return false;
+ if (!has_next()) return false;
+ value += static_cast<char>(current());
+ }
+ }
+ } else {
+ return false;
+ }
+ Advance();
+
+ return true;
+}
+
+template <typename CharT>
+const CharacterVector*
+RegExpParser<CharT>::ParseCaptureGroupName()
+{
+ CharacterVector* name = alloc->newInfallible<CharacterVector>(*alloc);
+
+ bool at_start = true;
+ while (true) {
+ widechar c = current();
+ Advance();
+
+ // Convert unicode escapes.
+ if (c == '\\' && current() == 'u') {
+ Advance();
+ if (!ParseUnicodeEscape(&c)) {
+ ReportError(JSMSG_INVALID_UNICODE_ESCAPE);
+ return nullptr;
+ }
+ }
+
+ // The backslash char is misclassified as both ID_Start and ID_Continue.
+ if (c == '\\') {
+ ReportError(JSMSG_INVALID_CAPTURE_NAME);
+ return nullptr;
+ }
+
+ if (at_start) {
+ if (!unicode::IsIdentifierStart(c)) {
+ ReportError(JSMSG_INVALID_CAPTURE_NAME);
+ return nullptr;
+ }
+ push_code_unit(name, c);
+ at_start = false;
+ } else {
+ if (c == '>') {
+ break;
+ } else if (unicode::IsIdentifierPart(c)) {
+ push_code_unit(name, c);
+ } else {
+ ReportError(JSMSG_INVALID_CAPTURE_NAME);
+ return nullptr;
+ }
+ }
+ }
+
+ return name;
+}
+
+template <typename CharT>
+bool
+RegExpParser<CharT>::CreateNamedCaptureAtIndex(const CharacterVector* name,
+ int index)
+{
+ MOZ_ASSERT(0 < index && index <= captures_started_);
+ MOZ_ASSERT(name !== nullptr);
+
+ RegExpCapture* capture = GetCapture(index);
+ MOZ_ASSERT(capture->name() == nullptr);
+
+ capture->set_name(name);
+
+ if (named_captures_ == nullptr) {
+ named_captures_ = alloc->newInfallible<RegExpCaptureVector>(*alloc);
+ } else {
+ // Check for duplicates and bail if we find any.
+ if (FindNamedCapture(name) != nullptr) {
+ ReportError(JSMSG_DUPLICATE_CAPTURE_NAME);
+ return false;
+ }
+ }
+ named_captures_->append(capture);
+ return true;
+}
+
+template <typename CharT>
+RegExpCapture*
+RegExpParser<CharT>::FindNamedCapture(const CharacterVector* name)
+{
+ // Linear search is fine since there are usually very few named groups
+ for (auto it=named_captures_->begin(); it<named_captures_->end(); it++) {
+ if (*(*it)->name() == *name) {
+ return *it;
+ }
+ }
+ return nullptr;
+}
+
+template <typename CharT>
+bool
+RegExpParser<CharT>::ParseNamedBackReference(RegExpBuilder* builder,
+ RegExpParserState* state)
+{
+ // The parser is assumed to be on the '<' in \k<name>.
+ if (current() != '<') {
+ ReportError(JSMSG_INVALID_NAMED_REF);
+ return false;
+ }
+
+ Advance();
+ const CharacterVector* name = ParseCaptureGroupName();
+ if (name == nullptr) {
+ return false;
+ }
+
+ if (state->IsInsideCaptureGroup(name)) {
+ builder->AddEmpty();
+ } else {
+ RegExpBackReference* atom = alloc->newInfallible<RegExpBackReference>(nullptr);
+ atom->set_name(name);
+
+ builder->AddAtom(atom);
+
+ if (named_back_references_ == nullptr) {
+ named_back_references_ = alloc->newInfallible<RegExpBackReferenceVector>(*alloc);
+ }
+ named_back_references_->append(atom);
+ }
+
+ return true;
+}
+
+template <typename CharT>
+void
+RegExpParser<CharT>::PatchNamedBackReferences()
+{
+ if (named_back_references_ == nullptr) return;
+
+ if (named_captures_ == nullptr) {
+ // Named backrefs but no named groups
+ ReportError(JSMSG_INVALID_NAMED_CAPTURE_REF);
+ return;
+ }
+
+ // Look up and patch the actual capture for each named back reference.
+ for (size_t i = 0; i < named_back_references_->length(); i++) {
+ RegExpBackReference* ref = (*named_back_references_)[i];
+
+ RegExpCapture* capture = FindNamedCapture(ref->name());
+ if (capture == nullptr) {
+ ReportError(JSMSG_INVALID_NAMED_CAPTURE_REF);
+ return;
+ }
+
+ ref->set_capture(capture);
+ }
+}
+
+template <typename CharT>
+RegExpCapture*
+RegExpParser<CharT>::GetCapture(int index)
+{
+ // The index for the capture groups are one-based. Its index in the list is
+ // zero-based.
+ int known_captures =
+ is_scanned_for_captures_ ? capture_count_ : captures_started_;
+ MOZ_ASSERT(index <= known_captures);
+ if (captures_ == NULL) {
+ captures_ = alloc->newInfallible<RegExpCaptureVector>(*alloc);
+ }
+ while ((int)captures_->length() < known_captures) {
+ RegExpCapture* capture = alloc->newInfallible<RegExpCapture>(nullptr, captures_->length() + 1);
+ captures_->append(capture);
+ }
+ return (*captures_)[index - 1];
+}
+
+template <typename CharT>
+bool
+RegExpParser<CharT>::HasNamedCaptures() {
+ if (has_named_captures_ || is_scanned_for_captures_) {
+ return has_named_captures_;
+ }
+
+ ScanForCaptures();
+ return has_named_captures_;
+}
+
+template <typename CharT>
+bool
+RegExpParser<CharT>::RegExpParserState::IsInsideCaptureGroup(int index)
+{
+ for (RegExpParserState* s = this; s != NULL; s = s->previous_state()) {
+ if (s->group_type() != CAPTURE) continue;
+ // Return true if we found the matching capture index.
+ if (index == s->capture_index()) return true;
+ // Abort if index is larger than what has been parsed up till this state.
+ if (index > s->capture_index()) return false;
+ }
+ return false;
+}
+
+template <typename CharT>
+bool
+RegExpParser<CharT>::RegExpParserState::IsInsideCaptureGroup(const CharacterVector* name)
+{
+ for (RegExpParserState* s = this; s != NULL; s = s->previous_state()) {
+ if (s->group_type() != CAPTURE) continue;
+ if (!s->IsNamedCapture()) continue;
+ if (*s->capture_name() == *name) return true;
+ }
+ return false;
+}
+
// QuantifierPrefix ::
// { DecimalDigits }
// { DecimalDigits , }
@@ -1289,6 +1463,7 @@ RegExpTree*
RegExpParser<CharT>::ParsePattern()
{
RegExpTree* result = ParseDisjunction();
+ PatchNamedBackReferences();
MOZ_ASSERT_IF(result, !has_more());
return result;
}
@@ -1419,12 +1594,102 @@ UnicodeCharacterClassEscapeAtom(LifoAlloc* alloc, char16_t char_class, bool igno
CharacterRangeVector* lead_ranges = alloc->newInfallible<CharacterRangeVector>(*alloc);
CharacterRangeVector* trail_ranges = alloc->newInfallible<CharacterRangeVector>(*alloc);
WideCharRangeVector* wide_ranges = alloc->newInfallible<WideCharRangeVector>(*alloc);
- AddCharOrEscapeUnicode(alloc, ranges, lead_ranges, trail_ranges, wide_ranges, char_class, 0,
- ignore_case);
+ CharacterRange::AddCharOrEscapeUnicode(alloc, ranges, lead_ranges, trail_ranges, wide_ranges,
+ char_class, 0, ignore_case);
return UnicodeRangesAtom(alloc, ranges, lead_ranges, trail_ranges, wide_ranges, false, false);
}
+
+
+static inline RegExpTree* UnicodePropertyClassAtom(LifoAlloc* alloc, const std::string& name,
+ const std::string& value, bool negate, bool ignore_case);
+
+static inline RegExpTree*
+UnicodePropertySequenceAtom(LifoAlloc* alloc, const std::string name)
+{
+ // If |name| is a special sequence name, return a subexpression that matches it.
+ // All possible sequences are hardcoded here.
+ const widechar* sequence_list = nullptr;
+ if (name == "Emoji_Flag_Sequence" ||
+ name == "RGI_Emoji_Flag_Sequence") {
+ sequence_list = kEmojiFlagSequences;
+ } else
+ if (name == "Emoji_Tag_Sequence" ||
+ name == "RGI_Emoji_Tag_Sequence") {
+ sequence_list = kEmojiTagSequences;
+ } else
+ if (name == "Emoji_ZWJ_Sequence" ||
+ name == "RGI_Emoji_ZWJ_Sequence") {
+ sequence_list = kEmojiZWJSequences;
+ }
+ if (sequence_list != nullptr) {
+ // TODO(yangguo): this creates huge regexp code. Alternative to this is
+ // to create a new operator that checks for these sequences at runtime.
+ RegExpBuilder* builder = alloc->newInfallible<RegExpBuilder>(alloc);
+ while (true) { // Iterate through list of sequences.
+ while (*sequence_list != 0) { // Iterate through sequence.
+ builder->AddUnicodeCharacter(*sequence_list, false);
+ sequence_list++;
+ }
+ sequence_list++;
+ if (*sequence_list == 0) break;
+ builder->NewAlternative();
+ }
+ return builder->ToRegExp();
+ }
+
+ if (name == "Emoji_Keycap_Sequence") {
+ // https://unicode.org/reports/tr51/#def_emoji_keycap_sequence
+ // emoji_keycap_sequence := [0-9#*] \x{FE0F 20E3}
+ RegExpBuilder* builder = alloc->newInfallible<RegExpBuilder>(alloc);
+ CharacterRangeVector* prefix_ranges = alloc->newInfallible<CharacterRangeVector>(*alloc);
+ prefix_ranges->append(CharacterRange::Range('0', '9'));
+ prefix_ranges->append(CharacterRange::Singleton('#'));
+ prefix_ranges->append(CharacterRange::Singleton('*'));
+ builder->AddAtom(alloc->newInfallible<RegExpCharacterClass>(prefix_ranges, false));
+ builder->AddCharacter(0xFE0F);
+ builder->AddCharacter(0x20E3);
+ return builder->ToRegExp();
+ } else
+ if (name == "Emoji_Modifier_Sequence" ||
+ name == "RGI_Emoji_Modifier_Sequence") {
+ // https://unicode.org/reports/tr51/#def_emoji_modifier_sequence
+ // emoji_modifier_sequence := emoji_modifier_base emoji_modifier
+
+ RegExpBuilder* builder = alloc->newInfallible<RegExpBuilder>(alloc);
+ builder->AddAtom(UnicodePropertyClassAtom(alloc, "Emoji_Modifier_Base", "", false, false));
+ builder->AddAtom(UnicodePropertyClassAtom(alloc, "Emoji_Modifier", "", false, false));
+ return builder->ToRegExp();
+ }
+
+ return nullptr;
+}
+
+static inline RegExpTree*
+UnicodePropertyClassAtom(LifoAlloc* alloc, const std::string& name, const std::string& value,
+ bool negate, bool ignore_case)
+{
+ CharacterRangeVector* ranges = alloc->newInfallible<CharacterRangeVector>(*alloc);
+ CharacterRangeVector* lead_ranges = alloc->newInfallible<CharacterRangeVector>(*alloc);
+ CharacterRangeVector* trail_ranges = alloc->newInfallible<CharacterRangeVector>(*alloc);
+ WideCharRangeVector* wide_ranges = alloc->newInfallible<WideCharRangeVector>(*alloc);
+
+ if (CharacterRange::AddPropertyClassRange(alloc, name, value, negate, ignore_case,
+ ranges, lead_ranges, trail_ranges, wide_ranges)) {
+ return UnicodeRangesAtom(alloc, ranges, lead_ranges, trail_ranges, wide_ranges, false, false);
+ }
+
+ if (value.empty() && !negate) {
+ // We allow Property Sequences in any unicode mode
+ // They used to be allowed in /u (before /v was introduced) and there is active
+ // discussion to change it back again.
+ // The benefits allow outweigh the noncompliance.
+ return UnicodePropertySequenceAtom(alloc, name);
+ }
+ return nullptr;
+}
+
static inline RegExpTree*
UnicodeBackReferenceAtom(LifoAlloc* alloc, RegExpTree* atom)
{
@@ -1455,24 +1720,24 @@ RegExpTree*
RegExpParser<CharT>::ParseDisjunction()
{
// Used to store current state while parsing subexpressions.
- RegExpParserState initial_state(alloc, nullptr, INITIAL, 0);
- RegExpParserState* stored_state = &initial_state;
+ RegExpParserState initial_state(alloc, nullptr, INITIAL, RegExpLookaround::LOOKAHEAD, 0, nullptr);
+ RegExpParserState* state = &initial_state;
// Cache the builder in a local variable for quick access.
RegExpBuilder* builder = initial_state.builder();
while (true) {
switch (current()) {
case kEndMarker:
- if (stored_state->IsSubexpression()) {
+ if (state->IsSubexpression()) {
// Inside a parenthesized group when hitting end of input.
return ReportError(JSMSG_MISSING_PAREN);
}
- MOZ_ASSERT(INITIAL == stored_state->group_type());
+ MOZ_ASSERT(INITIAL == state->group_type());
// Parsing completed successfully.
return builder->ToRegExp();
case ')': {
- if (!stored_state->IsSubexpression())
+ if (!state->IsSubexpression())
return ReportError(JSMSG_UNMATCHED_RIGHT_PAREN);
- MOZ_ASSERT(INITIAL != stored_state->group_type());
+ MOZ_ASSERT(INITIAL != state->group_type());
Advance();
// End disjunction parsing and convert builder content to new single
@@ -1481,29 +1746,35 @@ RegExpParser<CharT>::ParseDisjunction()
int end_capture_index = captures_started();
- int capture_index = stored_state->capture_index();
- SubexpressionType group_type = stored_state->group_type();
-
- // Restore previous state.
- stored_state = stored_state->previous_state();
- builder = stored_state->builder();
+ int capture_index = state->capture_index();
+ SubexpressionType group_type = state->group_type();
// Build result of subexpression.
if (group_type == CAPTURE) {
- RegExpCapture* capture = alloc->newInfallible<RegExpCapture>(body, capture_index);
- (*captures_)[capture_index - 1] = capture;
+ if (state->IsNamedCapture()) {
+ if (!CreateNamedCaptureAtIndex(state->capture_name(), capture_index)) {
+ return nullptr;
+ }
+ }
+ RegExpCapture* capture = GetCapture(capture_index);
+ capture->set_body(body);
body = capture;
} else if (group_type != GROUPING) {
- MOZ_ASSERT(group_type == POSITIVE_LOOKAHEAD ||
- group_type == NEGATIVE_LOOKAHEAD);
- bool is_positive = (group_type == POSITIVE_LOOKAHEAD);
- body = alloc->newInfallible<RegExpLookahead>(body,
+ MOZ_ASSERT(group_type == POSITIVE_LOOKAROUND ||
+ group_type == NEGATIVE_LOOKAROUND);
+ bool is_positive = (group_type == POSITIVE_LOOKAROUND);
+ body = alloc->newInfallible<RegExpLookaround>(body,
is_positive,
end_capture_index - capture_index,
- capture_index);
+ capture_index,
+ state->lookaround_type());
}
+
+ // Restore previous state.
+ state = state->previous_state();
+ builder = state->builder();
builder->AddAtom(body);
- if (unicode_ && (group_type == POSITIVE_LOOKAHEAD || group_type == NEGATIVE_LOOKAHEAD))
+ if (unicode_ && (group_type == POSITIVE_LOOKAROUND || group_type == NEGATIVE_LOOKAROUND))
continue;
// For compatability with JSC and ES3, we allow quantifiers after
// lookaheads, and break in all cases.
@@ -1563,6 +1834,9 @@ RegExpParser<CharT>::ParseDisjunction()
}
case '(': {
SubexpressionType subexpr_type = CAPTURE;
+ RegExpLookaround::Type lookaround_type = state->lookaround_type();
+ bool is_named_capture = false;
+ const CharacterVector* capture_name = nullptr;
Advance();
if (current() == '?') {
switch (Next()) {
@@ -1570,26 +1844,48 @@ RegExpParser<CharT>::ParseDisjunction()
subexpr_type = GROUPING;
break;
case '=':
- subexpr_type = POSITIVE_LOOKAHEAD;
+ lookaround_type = RegExpLookaround::LOOKAHEAD;
+ subexpr_type = POSITIVE_LOOKAROUND;
break;
case '!':
- subexpr_type = NEGATIVE_LOOKAHEAD;
+ lookaround_type = RegExpLookaround::LOOKAHEAD;
+ subexpr_type = NEGATIVE_LOOKAROUND;
+ break;
+ case '<':
+ Advance();
+ lookaround_type = RegExpLookaround::LOOKBEHIND;
+ if (Next() == '=') {
+ subexpr_type = POSITIVE_LOOKAROUND;
+ break;
+ } else if (Next() == '!') {
+ subexpr_type = NEGATIVE_LOOKAROUND;
+ break;
+ }
+ // Not a lookbehind, continue parsing as named group
+ is_named_capture = true;
+ has_named_captures_ = true;
break;
default:
return ReportError(JSMSG_INVALID_GROUP);
}
- Advance(2);
- } else {
- if (captures_ == nullptr)
- captures_ = alloc->newInfallible<RegExpCaptureVector>(*alloc);
- if (captures_started() >= kMaxCaptures)
- return ReportError(JSMSG_TOO_MANY_PARENS);
- captures_->append((RegExpCapture*) nullptr);
+ Advance(is_named_capture ? 1 : 2);
+ }
+ if (subexpr_type == CAPTURE) {
+ if (captures_started() >= kMaxCaptures)
+ return ReportError(JSMSG_TOO_MANY_PARENS);
+ captures_started_++;
+
+ if (is_named_capture) {
+ capture_name = ParseCaptureGroupName();
+ if (!capture_name)
+ return nullptr;
+ }
}
// Store current state and begin new disjunction parsing.
- stored_state = alloc->newInfallible<RegExpParserState>(alloc, stored_state, subexpr_type,
- captures_started());
- builder = stored_state->builder();
+ state = alloc->newInfallible<RegExpParserState>(alloc, state, subexpr_type,
+ lookaround_type, captures_started_,
+ capture_name);
+ builder = state->builder();
continue;
}
case '[': {
@@ -1619,44 +1915,61 @@ RegExpParser<CharT>::ParseDisjunction()
// CharacterClassEscape :: one of
// d D s S w W
case 'D': case 'S': case 'W':
- if (unicode_) {
- Advance();
- builder->AddAtom(UnicodeCharacterClassEscapeAtom(alloc, current(),
- ignore_case_));
- Advance();
- break;
- }
- MOZ_FALLTHROUGH;
case 'd': case 's': case 'w': {
widechar c = Next();
+ bool negated = c <= 'Z';
Advance(2);
- CharacterRangeVector* ranges =
- alloc->newInfallible<CharacterRangeVector>(*alloc);
- if (unicode_)
- CharacterRange::AddClassEscapeUnicode(alloc, c, ranges, ignore_case_);
- else
- CharacterRange::AddClassEscape(alloc, c, ranges);
- RegExpTree* atom = alloc->newInfallible<RegExpCharacterClass>(ranges, false);
- builder->AddAtom(atom);
+ if (unicode_ && negated) {
+ // must generate negative lookarounds for lone surrogates, done by AddCharOrEscapeUnicode
+ builder->AddAtom(UnicodeCharacterClassEscapeAtom(alloc, c, ignore_case_));
+ } else {
+ // only match positive ranges
+ CharacterRangeVector* ranges = alloc->newInfallible<CharacterRangeVector>(*alloc);
+ if (unicode_)
+ CharacterRange::AddClassEscapeUnicode(alloc, c, ranges, ignore_case_);
+ else
+ CharacterRange::AddClassEscape(alloc, c, ranges);
+ RegExpTree* atom = alloc->newInfallible<RegExpCharacterClass>(ranges, false);
+ builder->AddAtom(atom);
+ }
+ break;
+ }
+ case 'p': case 'P': {
+ widechar p = Next();
+ Advance(2);
+ if (unicode_) {
+ bool negate = p == 'P';
+ std::string name, nvalue;
+ if (ParsePropertyClassName(name, nvalue)) {
+ RegExpTree* atom = UnicodePropertyClassAtom(alloc, name, nvalue,
+ negate, ignore_case_);
+ if (atom != nullptr) {
+ builder->AddAtom(atom);
+ break;
+ }
+ }
+ return ReportError(JSMSG_INVALID_PROPERTY_NAME);
+ } else {
+ builder->AddCharacter(p);
+ }
break;
}
case '1': case '2': case '3': case '4': case '5': case '6':
case '7': case '8': case '9': {
int index = 0;
if (ParseBackReferenceIndex(&index)) {
- RegExpCapture* capture = nullptr;
- if (captures_ != nullptr && index <= (int) captures_->length()) {
- capture = (*captures_)[index - 1];
- }
- if (capture == nullptr) {
- builder->AddEmpty();
- break;
+ if (state->IsInsideCaptureGroup(index)) {
+ // The backreference is inside the capture group it refers to.
+ // Nothing can possibly have been captured yet.
+ builder->AddEmpty();
+ } else {
+ RegExpCapture* capture = GetCapture(index);
+ RegExpTree* atom = alloc->newInfallible<RegExpBackReference>(capture);
+ if (unicode_)
+ builder->AddAtom(UnicodeBackReferenceAtom(alloc, atom));
+ else
+ builder->AddAtom(atom);
}
- RegExpTree* atom = alloc->newInfallible<RegExpBackReference>(capture);
- if (unicode_)
- builder->AddAtom(UnicodeBackReferenceAtom(alloc, atom));
- else
- builder->AddAtom(atom);
break;
}
if (unicode_)
@@ -1741,45 +2054,28 @@ RegExpParser<CharT>::ParseDisjunction()
case 'u': {
Advance(2);
widechar value;
- if (unicode_) {
- if (current() == '{') {
- if (!ParseBracedHexEscape(&value))
- return nullptr;
- if (unicode::IsLeadSurrogate(value)) {
- builder->AddAtom(LeadSurrogateAtom(alloc, value));
- } else if (unicode::IsTrailSurrogate(value)) {
- builder->AddAtom(TrailSurrogateAtom(alloc, value));
- } else if (value >= unicode::NonBMPMin) {
- char16_t lead, trail;
- unicode::UTF16Encode(value, &lead, &trail);
- builder->AddAtom(SurrogatePairAtom(alloc, lead, trail,
- ignore_case_));
- } else {
- builder->AddCharacter(value);
- }
- } else if (ParseHexEscape(4, &value)) {
- if (unicode::IsLeadSurrogate(value)) {
- widechar trail;
- if (ParseTrailSurrogate(&trail)) {
- builder->AddAtom(SurrogatePairAtom(alloc, value, trail,
- ignore_case_));
- } else {
- builder->AddAtom(LeadSurrogateAtom(alloc, value));
- }
- } else if (unicode::IsTrailSurrogate(value)) {
- builder->AddAtom(TrailSurrogateAtom(alloc, value));
- } else {
- builder->AddCharacter(value);
- }
- } else {
- return ReportError(JSMSG_INVALID_UNICODE_ESCAPE);
- }
- break;
+ if (ParseUnicodeEscape(&value)) {
+ builder->AddUnicodeCharacter(value, ignore_case_);
+ } else if (!unicode_) {
+ builder->AddCharacter('u');
+ } else {
+ return ReportError(JSMSG_INVALID_UNICODE_ESCAPE);
}
- if (ParseHexEscape(4, &value)) {
- builder->AddCharacter(value);
+ break;
+ }
+ case 'k': {
+ // Either an identity escape or a named back-reference. The two
+ // interpretations are mutually exclusive: '\k' is interpreted as
+ // an identity escape for non-Unicode patterns without named
+ // capture groups, and as the beginning of a named back-reference
+ // in all other cases.
+ if (unicode_ || HasNamedCaptures()) {
+ Advance(2);
+ if (!ParseNamedBackReference(builder, state)) {
+ return ReportError(JSMSG_INVALID_IDENTITY_ESCAPE);
+ }
} else {
- builder->AddCharacter('u');
+ builder->AddCharacter('k');
}
break;
}
@@ -1911,6 +2207,7 @@ ParsePattern(frontend::TokenStream& ts, LifoAlloc& alloc, const CharT* chars, si
data->simple = parser.simple();
data->contains_anchor = parser.contains_anchor();
data->capture_count = parser.captures_started();
+ parser.StoreNamedCaptureMap(&data->capture_name_list, &data->capture_index_list);
return true;
}
diff --git a/js/src/irregexp/RegExpParser.h b/js/src/irregexp/RegExpParser.h
index 7c6e87e20f..5f3e6c54a2 100644
--- a/js/src/irregexp/RegExpParser.h
+++ b/js/src/irregexp/RegExpParser.h
@@ -139,6 +139,7 @@ class RegExpBuilder
public:
explicit RegExpBuilder(LifoAlloc* alloc);
void AddCharacter(char16_t character);
+ void AddUnicodeCharacter(widechar c, bool ignore_case);
// "Adds" an empty expression. Does nothing except consume a
// following quantifier
void AddEmpty();
@@ -168,9 +169,6 @@ class RegExpBuilder
#endif
};
-// Characters parsed by RegExpParser can be either char16_t or kEndMarker.
-typedef uint32_t widechar;
-
template <typename CharT>
class RegExpParser
{
@@ -198,24 +196,44 @@ class RegExpParser
bool ParseHexEscape(int length, widechar* value);
bool ParseBracedHexEscape(widechar* value);
+ bool ParseUnicodeEscape(widechar* value);
bool ParseTrailSurrogate(widechar* value);
bool ParseRawSurrogatePair(char16_t* lead, char16_t* trail);
widechar ParseOctalLiteral();
+ // Parse the "{name[=value]}" part of a property class escape.
+ bool ParsePropertyClassName(std::string& name, std::string& value);
+
+ // Parses the name of a capture group (?<name>pattern). The name must adhere
+ // to IdentifierName in the ECMAScript standard.
+ const CharacterVector* ParseCaptureGroupName();
+
// Tries to parse the input as a back reference. If successful it
// stores the result in the output parameter and returns true. If
// it fails it will push back the characters read so the same characters
// can be reparsed.
bool ParseBackReferenceIndex(int* index_out);
- bool ParseClassAtom(char16_t* char_class, widechar *value);
+ // Parse a thing inside a character class. Either add escaped class to the range and return
+ // the matched range as |char_class|, or return a single character as |value|
+ // Unicode ranges can be null if not in Unicode mode
+ bool ParseClassEscape(char16_t* char_class, widechar *value,
+ CharacterRangeVector* ranges,
+ CharacterRangeVector* lead_ranges,
+ CharacterRangeVector* trail_ranges,
+ WideCharRangeVector* wide_ranges);
RegExpTree* ReportError(unsigned errorNumber, const char* param = nullptr);
void Advance();
void Advance(int dist) {
next_pos_ += dist - 1;
Advance();
}
+
+ bool StoreNamedCaptureMap(CharacterVectorVector** names, IntegerVector** indices);
+ // Returns true iff the pattern contains named captures. May call
+ // ScanForCaptures to look ahead at the remaining pattern.
+ bool HasNamedCaptures();
void Reset(const CharT* pos) {
next_pos_ = pos;
@@ -228,7 +246,7 @@ class RegExpParser
bool simple() { return simple_; }
bool contains_anchor() { return contains_anchor_; }
void set_contains_anchor() { contains_anchor_ = true; }
- int captures_started() { return captures_ == nullptr ? 0 : captures_->length(); }
+ int captures_started() { return captures_started_; }
const CharT* position() { return next_pos_ - 1; }
static const int kMaxCaptures = 1 << 16;
@@ -238,8 +256,8 @@ class RegExpParser
enum SubexpressionType {
INITIAL,
CAPTURE, // All positive values represent captures.
- POSITIVE_LOOKAHEAD,
- NEGATIVE_LOOKAHEAD,
+ POSITIVE_LOOKAROUND,
+ NEGATIVE_LOOKAROUND,
GROUPING
};
@@ -248,11 +266,15 @@ class RegExpParser
RegExpParserState(LifoAlloc* alloc,
RegExpParserState* previous_state,
SubexpressionType group_type,
- int disjunction_capture_index)
+ RegExpLookaround::Type lookaround_type,
+ int disjunction_capture_index,
+ const CharacterVector* capture_name)
: previous_state_(previous_state),
builder_(alloc->newInfallible<RegExpBuilder>(alloc)),
group_type_(group_type),
- disjunction_capture_index_(disjunction_capture_index)
+ lookaround_type_(lookaround_type),
+ disjunction_capture_index_(disjunction_capture_index),
+ capture_name_(capture_name)
{}
// Parser state of containing expression, if any.
RegExpParserState* previous_state() { return previous_state_; }
@@ -261,10 +283,21 @@ class RegExpParser
RegExpBuilder* builder() { return builder_; }
// Type of regexp being parsed (parenthesized group or entire regexp).
SubexpressionType group_type() { return group_type_; }
+ // Lookahead or Lookbehind.
+ RegExpLookaround::Type lookaround_type() { return lookaround_type_; }
// Index in captures array of first capture in this sub-expression, if any.
// Also the capture index of this sub-expression itself, if group_type
// is CAPTURE.
int capture_index() { return disjunction_capture_index_; }
+ // The name of the current sub-expression, if group_type is CAPTURE. Only
+ // used for named captures.
+ const CharacterVector* capture_name() const { return capture_name_; }
+ bool IsNamedCapture() const { return capture_name_ != nullptr; }
+
+ // Check whether the parser is inside a capture group with the given index.
+ bool IsInsideCaptureGroup(int index);
+ // Check whether the parser is inside a capture group with the given name.
+ bool IsInsideCaptureGroup(const CharacterVector* name);
private:
// Linked list implementation of stack of states.
@@ -273,10 +306,33 @@ class RegExpParser
RegExpBuilder* builder_;
// Stored disjunction type (capture, look-ahead or grouping), if any.
SubexpressionType group_type_;
+ // Stored read direction.
+ RegExpLookaround::Type lookaround_type_;
// Stored disjunction's capture index (if any).
int disjunction_capture_index_;
+ // Stored capture name (if any).
+ const CharacterVector* const capture_name_;
};
+ // Return the 1-indexed RegExpCapture object, allocate if necessary.
+ RegExpCapture* GetCapture(int index);
+
+ // Creates a new named capture at the specified index. Must be called exactly
+ // once for each named capture. Fails if a capture with the same name is
+ // encountered.
+ bool CreateNamedCaptureAtIndex(const CharacterVector* name, int index);
+
+ // Find a named capture group by name, or return null if not found
+ RegExpCapture* FindNamedCapture(const CharacterVector* name);
+
+ bool ParseNamedBackReference(RegExpBuilder* builder,
+ RegExpParserState* state);
+
+ // After the initial parsing pass, patch corresponding RegExpCapture objects
+ // into all RegExpBackReferences. This is done after initial parsing in order
+ // to avoid complicating cases in which references comes before the capture.
+ void PatchNamedBackReferences();
+
widechar current() { return current_; }
bool has_more() { return has_more_; }
bool has_next() { return next_pos_ < end_; }
@@ -290,9 +346,13 @@ class RegExpParser
frontend::TokenStream& ts;
LifoAlloc* alloc;
RegExpCaptureVector* captures_;
+ // contains the subset of captures_ that have names (for duplicate checking)
+ RegExpCaptureVector* named_captures_;
+ RegExpBackReferenceVector* named_back_references_;
const CharT* next_pos_;
const CharT* end_;
widechar current_;
+ int captures_started_;
// The capture count is only valid after we have scanned for captures.
int capture_count_;
bool has_more_;
@@ -303,6 +363,7 @@ class RegExpParser
bool simple_;
bool contains_anchor_;
bool is_scanned_for_captures_;
+ bool has_named_captures_; // Only valid after we have scanned for captures.
};
} } // namespace js::irregexp
diff --git a/js/src/jit/CodeGenerator.cpp b/js/src/jit/CodeGenerator.cpp
index 66e8e25ddf..3f1b7251a3 100644
--- a/js/src/jit/CodeGenerator.cpp
+++ b/js/src/jit/CodeGenerator.cpp
@@ -1513,6 +1513,16 @@ JitCompartment::generateRegExpMatcherStub(JSContext* cx)
return nullptr;
}
+ // If a regexp has named captures, fall back to the OOL stub, which
+ // will end up calling CreateRegExpMatchResults.
+ Register shared = temp2;
+ masm.loadPtr(Address(regexp, NativeObject::getFixedSlotOffset(RegExpObject::PRIVATE_SLOT)),
+ shared);
+ masm.branchPtr(Assembler::NotEqual,
+ Address(shared, RegExpShared::offsetOfGroupsTemplate()),
+ ImmWord(0),
+ &oolEntry);
+
// Construct the result.
Register object = temp1;
Label matchResultFallback, matchResultJoin;
@@ -1523,6 +1533,7 @@ JitCompartment::generateRegExpMatcherStub(JSContext* cx)
masm.loadPtr(Address(object, NativeObject::offsetOfSlots()), temp2);
masm.storeValue(templateObject->getSlot(0), Address(temp2, 0));
masm.storeValue(templateObject->getSlot(1), Address(temp2, sizeof(Value)));
+ masm.storeValue(templateObject->getSlot(2), Address(temp2, 2 * sizeof(Value)));
size_t elementsOffset = NativeObject::offsetOfFixedElements();
@@ -1636,6 +1647,7 @@ JitCompartment::generateRegExpMatcherStub(JSContext* cx)
MOZ_ASSERT(templateObject->numFixedSlots() == 0);
MOZ_ASSERT(templateObject->lookupPure(cx->names().index)->slot() == 0);
MOZ_ASSERT(templateObject->lookupPure(cx->names().input)->slot() == 1);
+ MOZ_ASSERT(templateObject->lookupPure(cx->names().groups)->slot() == 2);
masm.load32(pairsVectorAddress, temp3);
masm.storeValue(JSVAL_TYPE_INT32, temp3, Address(temp2, 0));
diff --git a/js/src/js.msg b/js/src/js.msg
index 51854fc398..93d8a557b1 100644
--- a/js/src/js.msg
+++ b/js/src/js.msg
@@ -513,6 +513,12 @@ MSG_DEF(JSMSG_TOO_MANY_PARENS, 0, JSEXN_INTERNALERR, "too many parenthes
MSG_DEF(JSMSG_UNICODE_OVERFLOW, 1, JSEXN_SYNTAXERR, "Unicode codepoint must not be greater than 0x10FFFF in {0}")
MSG_DEF(JSMSG_UNMATCHED_RIGHT_PAREN, 0, JSEXN_SYNTAXERR, "unmatched ) in regular expression")
MSG_DEF(JSMSG_UNTERM_CLASS, 0, JSEXN_SYNTAXERR, "unterminated character class")
+MSG_DEF(JSMSG_INVALID_PROPERTY_NAME, 0, JSEXN_SYNTAXERR, "invalid property name in regular expression")
+MSG_DEF(JSMSG_INVALID_CLASS_PROPERTY_NAME, 0, JSEXN_SYNTAXERR, "invalid class property name in regular expression")
+MSG_DEF(JSMSG_INVALID_CAPTURE_NAME, 0, JSEXN_SYNTAXERR, "invalid capture group name in regular expression")
+MSG_DEF(JSMSG_DUPLICATE_CAPTURE_NAME, 0, JSEXN_SYNTAXERR, "duplicate capture group name in regular expression")
+MSG_DEF(JSMSG_INVALID_NAMED_REF, 0, JSEXN_SYNTAXERR, "invalid named reference in regular expression")
+MSG_DEF(JSMSG_INVALID_NAMED_CAPTURE_REF, 0, JSEXN_SYNTAXERR, "invalid named capture reference in regular expression")
// Self-hosting
MSG_DEF(JSMSG_DEFAULT_LOCALE_ERROR, 0, JSEXN_ERR, "internal error getting the default locale")
diff --git a/js/src/moz.build b/js/src/moz.build
index 642dd7d911..5ac4fcd669 100644
--- a/js/src/moz.build
+++ b/js/src/moz.build
@@ -153,6 +153,7 @@ UNIFIED_SOURCES += [
'irregexp/NativeRegExpMacroAssembler.cpp',
'irregexp/RegExpAST.cpp',
'irregexp/RegExpCharacters.cpp',
+ 'irregexp/RegExpCharRanges.cpp',
'irregexp/RegExpEngine.cpp',
'irregexp/RegExpInterpreter.cpp',
'irregexp/RegExpMacroAssembler.cpp',
diff --git a/js/src/vm/CommonPropertyNames.h b/js/src/vm/CommonPropertyNames.h
index 5080e6ab09..57ec80669c 100644
--- a/js/src/vm/CommonPropertyNames.h
+++ b/js/src/vm/CommonPropertyNames.h
@@ -162,6 +162,7 @@
macro(global, global, "global") \
macro(globalThis, globalThis, "globalThis") \
macro(group, group, "group") \
+ macro(groups, groups, "groups") \
macro(Handle, Handle, "Handle") \
macro(has, has, "has") \
macro(hasOwn, hasOwn, "hasOwn") \
diff --git a/js/src/vm/RegExpObject.cpp b/js/src/vm/RegExpObject.cpp
index 33b97a1174..e96db29edb 100644
--- a/js/src/vm/RegExpObject.cpp
+++ b/js/src/vm/RegExpObject.cpp
@@ -15,6 +15,7 @@
#include "builtin/RegExp.h"
#include "frontend/TokenStream.h"
+#include "irregexp/FeatureFlags.h"
#ifdef DEBUG
#include "irregexp/RegExpBytecode.h"
#endif
@@ -109,7 +110,7 @@ ScopedMatchPairs::allocOrExpandArray(size_t pairCount)
bool
VectorMatchPairs::allocOrExpandArray(size_t pairCount)
{
- if (!vec_.resizeUninitialized(sizeof(MatchPair) * pairCount))
+ if (!vec_.resizeUninitialized(pairCount))
return false;
pairs_ = &vec_[0];
@@ -950,7 +951,8 @@ js::StringHasRegExpMetaChars(JSLinearString* str)
/* RegExpShared */
RegExpShared::RegExpShared(JSAtom* source, RegExpFlag flags)
- : source(source), flags(flags), parenCount(0), canStringMatch(false), marked_(false)
+ : source(source), flags(flags), parenCount(0), canStringMatch(false), marked_(false),
+ numNamedCaptures_(0), groupsTemplate_(nullptr)
{}
RegExpShared::~RegExpShared()
@@ -1005,6 +1007,56 @@ RegExpShared::compile(JSContext* cx, HandleLinearString input,
}
bool
+RegExpShared::initializeNamedCaptures(JSContext* cx, irregexp::CharacterVectorVector* names, irregexp::IntegerVector* indices)
+{
+ MOZ_ASSERT(!groupsTemplate_);
+ MOZ_ASSERT(names);
+ MOZ_ASSERT(indices);
+ MOZ_ASSERT(names->length() == indices->length());
+
+ // The irregexp parser returns named capture information in the form
+ // of two arrays. We create a template object with a property for each
+ // capture name, and store the capture index as Integer in the corresponding value.
+ uint32_t numNamedCaptures = names->length();
+
+ // Create a plain template object.
+ RootedPlainObject templateObject(cx, NewObjectWithGivenProto<PlainObject>(cx, nullptr, TenuredObject));
+ if (!templateObject) {
+ return false;
+ }
+
+ // Create a new group for the template.
+ Rooted<TaggedProto> proto(cx, templateObject->taggedProto());
+ ObjectGroup* group = ObjectGroupCompartment::makeGroup(cx, templateObject->getClass(), proto);
+ if (!group) {
+ return false;
+ }
+ templateObject->setGroup(group);
+
+ // Initialize the properties of the template.
+ RootedId id(cx);
+ for (uint32_t i = 0; i < numNamedCaptures; i++) {
+ irregexp::CharacterVector* cv = (*names)[i];
+ // Need to explicitly create an Atom (not a String) or it won't get added to the atom table
+ JSAtom* atom = AtomizeChars(cx, cv->begin(), cv->length());
+ if (!atom) {
+ return false;
+ }
+ id = NameToId(atom->asPropertyName());
+ RootedValue idx(cx, Int32Value((*indices)[i]));
+ if (!NativeDefineProperty(cx, templateObject, id, idx,
+ nullptr, nullptr, JSPROP_ENUMERATE)) {
+ return false;
+ }
+ AddTypePropertyId(cx, templateObject, id, TypeSet::Int32Type());
+ }
+
+ groupsTemplate_ = templateObject;
+ numNamedCaptures_ = numNamedCaptures;
+ return true;
+}
+
+bool
RegExpShared::compile(JSContext* cx, HandleAtom pattern, HandleLinearString input,
CompilationMode mode, ForceByteCodeEnum force)
{
@@ -1026,6 +1078,12 @@ RegExpShared::compile(JSContext* cx, HandleAtom pattern, HandleLinearString inpu
}
this->parenCount = data.capture_count;
+ if (data.capture_name_list) {
+ // convert LifoAlloc'd named capture info to NativeObject
+ if (!initializeNamedCaptures(cx, data.capture_name_list, data.capture_index_list)) {
+ return false;
+ }
+ }
irregexp::RegExpCode code = irregexp::CompilePattern(cx, this, &data, input,
false /* global() */,
@@ -1259,17 +1317,27 @@ RegExpCompartment::createMatchResultTemplateObject(JSContext* cx)
return matchResultTemplateObject_; // = nullptr
}
+ /* Set dummy groups property */
+ RootedValue groupsVal(cx, UndefinedValue());
+ if (!NativeDefineProperty(
+ cx, templateObject, cx->names().groups, groupsVal, nullptr, nullptr, JSPROP_ENUMERATE)) {
+ return nullptr;
+ }
+
// Make sure that the properties are in the right slots.
DebugOnly<Shape*> shape = templateObject->lastProperty();
- MOZ_ASSERT(shape->previous()->slot() == 0 &&
- shape->previous()->propidRef() == NameToId(cx->names().index));
- MOZ_ASSERT(shape->slot() == 1 &&
- shape->propidRef() == NameToId(cx->names().input));
+ MOZ_ASSERT(shape->slot() == 2 &&
+ shape->propidRef() == NameToId(cx->names().groups));
+ MOZ_ASSERT(shape->previous()->slot() == 1 &&
+ shape->previous()->propidRef() == NameToId(cx->names().input));
+ MOZ_ASSERT(shape->previous()->previous()->slot() == 0 &&
+ shape->previous()->previous()->propidRef() == NameToId(cx->names().index));
// Make sure type information reflects the indexed properties which might
// be added.
AddTypePropertyId(cx, templateObject, JSID_VOID, TypeSet::StringType());
AddTypePropertyId(cx, templateObject, JSID_VOID, TypeSet::UndefinedType());
+ AddTypePropertyId(cx, templateObject, NameToId(cx->names().groups), TypeSet::AnyObjectType());
matchResultTemplateObject_.set(templateObject);
@@ -1484,6 +1552,13 @@ ParseRegExpFlags(const CharT* chars, size_t length, RegExpFlag* flagsOut, char16
if (!HandleRegExpFlag(UnicodeFlag, flagsOut))
return false;
break;
+ case 'v':
+ if (irregexp::kParseFlagUnicodeSetsAsUnicode) {
+ if (!HandleRegExpFlag(UnicodeFlag, flagsOut))
+ return false;
+ break;
+ }
+ MOZ_FALLTHROUGH
default:
return false;
}
diff --git a/js/src/vm/RegExpObject.h b/js/src/vm/RegExpObject.h
index ca7a39ec65..17d961eede 100644
--- a/js/src/vm/RegExpObject.h
+++ b/js/src/vm/RegExpObject.h
@@ -17,6 +17,7 @@
#include "proxy/Proxy.h"
#include "vm/ArrayObject.h"
#include "vm/Shape.h"
+#include "irregexp/InfallibleVector.h"
/*
* JavaScript Regular Expressions
@@ -133,6 +134,9 @@ class RegExpShared
bool canStringMatch;
bool marked_;
+ uint32_t numNamedCaptures_;
+ GCPtr<PlainObject*> groupsTemplate_;
+
RegExpCompilation compilationArray[4];
static int CompilationIndex(CompilationMode mode, bool latin1) {
@@ -187,6 +191,11 @@ class RegExpShared
/* Accounts for the "0" (whole match) pair. */
size_t pairCount() const { return getParenCount() + 1; }
+ // not public due to circular inclusion problems
+ bool initializeNamedCaptures(JSContext* cx, irregexp::CharacterVectorVector* names, irregexp::IntegerVector* indices);
+ PlainObject* getGroupsTemplate() { return groupsTemplate_; }
+ uint32_t numNamedCaptures() const { return numNamedCaptures_; }
+
JSAtom* getSource() const { return source; }
RegExpFlag getFlags() const { return flags; }
bool ignoreCase() const { return flags & IgnoreCaseFlag; }
@@ -238,6 +247,10 @@ class RegExpShared
+ offsetof(RegExpCompilation, jitCode);
}
+ static size_t offsetOfGroupsTemplate() {
+ return offsetof(RegExpShared, groupsTemplate_);
+ }
+
size_t sizeOfIncludingThis(mozilla::MallocSizeOf mallocSizeOf);
#ifdef DEBUG
diff --git a/js/src/vm/SelfHosting.cpp b/js/src/vm/SelfHosting.cpp
index 06490f24df..686b2e9c28 100644
--- a/js/src/vm/SelfHosting.cpp
+++ b/js/src/vm/SelfHosting.cpp
@@ -1697,39 +1697,29 @@ static bool
intrinsic_RegExpGetSubstitution(JSContext* cx, unsigned argc, Value* vp)
{
CallArgs args = CallArgsFromVp(argc, vp);
-
MOZ_ASSERT(args.length() == 6);
- RootedString matched(cx, args[0].toString());
- RootedString string(cx, args[1].toString());
+ RootedArrayObject matchResult(cx, &args[0].toObject().as<ArrayObject>());
+
+ RootedLinearString string(cx, args[1].toString()->ensureLinear(cx));
+ if (!string)
+ return false;
int32_t position = int32_t(args[2].toNumber());
MOZ_ASSERT(position >= 0);
- RootedObject captures(cx, &args[3].toObject());
-#ifdef DEBUG
- bool isArray = false;
- MOZ_ALWAYS_TRUE(IsArray(cx, captures, &isArray));
- MOZ_ASSERT(isArray);
-#endif
-
- RootedString replacement(cx, args[4].toString());
+ RootedLinearString replacement(cx, args[3].toString()->ensureLinear(cx));
+ if (!replacement)
+ return false;
- int32_t firstDollarIndex = int32_t(args[5].toNumber());
+ int32_t firstDollarIndex = int32_t(args[4].toNumber());
MOZ_ASSERT(firstDollarIndex >= 0);
- RootedLinearString matchedLinear(cx, matched->ensureLinear(cx));
- if (!matchedLinear)
- return false;
- RootedLinearString stringLinear(cx, string->ensureLinear(cx));
- if (!stringLinear)
- return false;
- RootedLinearString replacementLinear(cx, replacement->ensureLinear(cx));
- if (!replacementLinear)
- return false;
+ RootedValue namedCaptures(cx, args[5]);
+ MOZ_ASSERT(namedCaptures.isUndefined() || namedCaptures.isObject());
- return RegExpGetSubstitution(cx, matchedLinear, stringLinear, size_t(position), captures,
- replacementLinear, size_t(firstDollarIndex), args.rval());
+ return RegExpGetSubstitution(cx, matchResult, string, size_t(position), replacement,
+ size_t(firstDollarIndex), namedCaptures, args.rval());
}
static bool