summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorwolfbeast <mcwerewolf@wolfbeast.com>2019-11-18 12:20:44 +0100
committerwolfbeast <mcwerewolf@wolfbeast.com>2019-11-18 12:20:44 +0100
commitf31b04a303607cd82757e7c4f60bb536658c8a30 (patch)
tree98b720782be5bc8bd77202bb4c1dc69e4927c03e
parent36c81a978695ec3ba68af8475422bcab49ef470d (diff)
downloaduxp-f31b04a303607cd82757e7c4f60bb536658c8a30.tar.gz
Issue #1284 - Implement /s (dotAll) for regular expressions.
Resolves #1284.
-rw-r--r--js/src/builtin/RegExp.cpp24
-rw-r--r--js/src/builtin/RegExp.h2
-rw-r--r--js/src/builtin/RegExp.js5
-rw-r--r--js/src/builtin/SelfHostingDefines.h1
-rw-r--r--js/src/frontend/TokenStream.cpp2
-rw-r--r--js/src/irregexp/RegExpParser.cpp52
-rw-r--r--js/src/irregexp/RegExpParser.h1
-rw-r--r--js/src/vm/RegExpObject.h7
8 files changed, 88 insertions, 6 deletions
diff --git a/js/src/builtin/RegExp.cpp b/js/src/builtin/RegExp.cpp
index 7cf20d23c5..93a7f2b79f 100644
--- a/js/src/builtin/RegExp.cpp
+++ b/js/src/builtin/RegExp.cpp
@@ -664,6 +664,29 @@ js::regexp_multiline(JSContext* cx, unsigned argc, JS::Value* vp)
return CallNonGenericMethod<IsRegExpInstanceOrPrototype, regexp_multiline_impl>(cx, args);
}
+// ES 2018 dotAll
+MOZ_ALWAYS_INLINE bool
+regexp_dotall_impl(JSContext* cx, const CallArgs& args)
+{
+ MOZ_ASSERT(IsRegExpInstanceOrPrototype(args.thisv()));
+
+ if (!IsRegExpObject(args.thisv())) {
+ args.rval().setUndefined();
+ return true;
+ }
+
+ Rooted<RegExpObject*> reObj(cx, &args.thisv().toObject().as<RegExpObject>());
+ args.rval().setBoolean(reObj->dotall());
+ return true;
+}
+
+bool
+js::regexp_dotall(JSContext* cx, unsigned argc, JS::Value* vp)
+{
+ CallArgs args = CallArgsFromVp(argc, vp);
+ return CallNonGenericMethod<IsRegExpInstanceOrPrototype, regexp_dotall_impl>(cx, args);
+}
+
// ES 2017 draft rev32 21.2.5.10.
MOZ_ALWAYS_INLINE bool
regexp_source_impl(JSContext* cx, const CallArgs& args)
@@ -759,6 +782,7 @@ const JSPropertySpec js::regexp_properties[] = {
JS_PSG("source", regexp_source, 0),
JS_PSG("sticky", regexp_sticky, 0),
JS_PSG("unicode", regexp_unicode, 0),
+ JS_PSG("dotall", regexp_dotall, 0),
JS_PS_END
};
diff --git a/js/src/builtin/RegExp.h b/js/src/builtin/RegExp.h
index 4e0ff69484..f808f5146a 100644
--- a/js/src/builtin/RegExp.h
+++ b/js/src/builtin/RegExp.h
@@ -153,6 +153,8 @@ extern MOZ_MUST_USE bool
regexp_sticky(JSContext* cx, unsigned argc, JS::Value* vp);
extern MOZ_MUST_USE bool
regexp_unicode(JSContext* cx, unsigned argc, JS::Value* vp);
+extern MOZ_MUST_USE bool
+regexp_dotall(JSContext* cx, unsigned argc, JS::Value* vp);
} /* namespace js */
diff --git a/js/src/builtin/RegExp.js b/js/src/builtin/RegExp.js
index 0b849292cf..1a22765943 100644
--- a/js/src/builtin/RegExp.js
+++ b/js/src/builtin/RegExp.js
@@ -3,6 +3,7 @@
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
// ES6 draft rev34 (2015/02/20) 21.2.5.3 get RegExp.prototype.flags
+// Updated for ES2018 /s (dotAll)
function RegExpFlagsGetter() {
// Steps 1-2.
var R = this;
@@ -31,6 +32,10 @@ function RegExpFlagsGetter() {
// Steps 16-18.
if (R.sticky)
result += "y";
+
+ // ES2018
+ if (R.dotall)
+ result += "s";
// Step 19.
return result;
diff --git a/js/src/builtin/SelfHostingDefines.h b/js/src/builtin/SelfHostingDefines.h
index d676270a14..6512810ca9 100644
--- a/js/src/builtin/SelfHostingDefines.h
+++ b/js/src/builtin/SelfHostingDefines.h
@@ -90,6 +90,7 @@
#define REGEXP_MULTILINE_FLAG 0x04
#define REGEXP_STICKY_FLAG 0x08
#define REGEXP_UNICODE_FLAG 0x10
+#define REGEXP_DOTALL_FLAG 0x20
#define MODULE_OBJECT_ENVIRONMENT_SLOT 2
diff --git a/js/src/frontend/TokenStream.cpp b/js/src/frontend/TokenStream.cpp
index b8623d545e..e07f8df8ad 100644
--- a/js/src/frontend/TokenStream.cpp
+++ b/js/src/frontend/TokenStream.cpp
@@ -1843,6 +1843,8 @@ TokenStream::getTokenInternal(TokenKind* ttp, Modifier modifier)
reflags = RegExpFlag(reflags | StickyFlag);
else if (c == 'u' && !(reflags & UnicodeFlag))
reflags = RegExpFlag(reflags | UnicodeFlag);
+ else if (c == 's' && !(reflags & DotAllFlag))
+ reflags = RegExpFlag(reflags | DotAllFlag);
else
break;
getChar();
diff --git a/js/src/irregexp/RegExpParser.cpp b/js/src/irregexp/RegExpParser.cpp
index 9ef9fe3e2f..28abdb0b48 100644
--- a/js/src/irregexp/RegExpParser.cpp
+++ b/js/src/irregexp/RegExpParser.cpp
@@ -1384,7 +1384,7 @@ UnicodeEverythingAtom(LifoAlloc* alloc)
{
RegExpBuilder* builder = alloc->newInfallible<RegExpBuilder>(alloc);
- // everything except \x0a, \x0d, \u2028 and \u2029
+ // Everything except \x0a, \x0d, \u2028 and \u2029
CharacterRangeVector* ranges = alloc->newInfallible<CharacterRangeVector>(*alloc);
ranges->append(CharacterRange::Range(0x0, 0x09));
@@ -1414,6 +1414,38 @@ UnicodeEverythingAtom(LifoAlloc* alloc)
return builder->ToRegExp();
}
+static inline RegExpTree*
+UnicodeDotAllAtom(LifoAlloc* alloc)
+{
+ RegExpBuilder* builder = alloc->newInfallible<RegExpBuilder>(alloc);
+
+ // Full range excluding surrogates because /s was specified
+
+ CharacterRangeVector* ranges = alloc->newInfallible<CharacterRangeVector>(*alloc);
+ ranges->append(CharacterRange::Range(0x0, unicode::LeadSurrogateMin - 1));
+ ranges->append(CharacterRange::Range(unicode::TrailSurrogateMax + 1, unicode::UTF16Max));
+ builder->AddAtom(alloc->newInfallible<RegExpCharacterClass>(ranges, false));
+
+ builder->NewAlternative();
+
+ builder->AddAtom(RangeAtom(alloc, unicode::LeadSurrogateMin, unicode::LeadSurrogateMax));
+ builder->AddAtom(NegativeLookahead(alloc, unicode::TrailSurrogateMin,
+ unicode::TrailSurrogateMax));
+
+ builder->NewAlternative();
+
+ builder->AddAssertion(alloc->newInfallible<RegExpAssertion>(
+ RegExpAssertion::NOT_AFTER_LEAD_SURROGATE));
+ builder->AddAtom(RangeAtom(alloc, unicode::TrailSurrogateMin, unicode::TrailSurrogateMax));
+
+ builder->NewAlternative();
+
+ builder->AddAtom(RangeAtom(alloc, unicode::LeadSurrogateMin, unicode::LeadSurrogateMax));
+ builder->AddAtom(RangeAtom(alloc, unicode::TrailSurrogateMin, unicode::TrailSurrogateMax));
+
+ return builder->ToRegExp();
+}
+
RegExpTree*
UnicodeCharacterClassEscapeAtom(LifoAlloc* alloc, char16_t char_class, bool ignore_case)
{
@@ -1541,13 +1573,25 @@ RegExpParser<CharT>::ParseDisjunction()
}
case '.': {
Advance();
- // everything except \x0a, \x0d, \u2028 and \u2029
+
if (unicode_) {
- builder->AddAtom(UnicodeEverythingAtom(alloc));
+ if (dotall_) {
+ // Everything
+ builder->AddAtom(UnicodeDotAllAtom(alloc));
+ } else {
+ // Everything except \x0a, \x0d, \u2028 and \u2029
+ builder->AddAtom(UnicodeEverythingAtom(alloc));
+ }
break;
}
CharacterRangeVector* ranges = alloc->newInfallible<CharacterRangeVector>(*alloc);
- CharacterRange::AddClassEscape(alloc, '.', ranges);
+ if (dotall_) {
+ // Everything
+ CharacterRange::AddClassEscape(alloc, '*', ranges);
+ } else {
+ // Everything except \x0a, \x0d, \u2028 and \u2029
+ CharacterRange::AddClassEscape(alloc, '.', ranges);
+ }
RegExpTree* atom = alloc->newInfallible<RegExpCharacterClass>(ranges, false);
builder->AddAtom(atom);
break;
diff --git a/js/src/irregexp/RegExpParser.h b/js/src/irregexp/RegExpParser.h
index 2f02625b5c..36c24cd7c7 100644
--- a/js/src/irregexp/RegExpParser.h
+++ b/js/src/irregexp/RegExpParser.h
@@ -313,6 +313,7 @@ class RegExpParser
bool multiline_;
bool unicode_;
bool ignore_case_;
+ bool dotall_;
bool simple_;
bool contains_anchor_;
bool is_scanned_for_captures_;
diff --git a/js/src/vm/RegExpObject.h b/js/src/vm/RegExpObject.h
index f1ea101ed5..4548521dfc 100644
--- a/js/src/vm/RegExpObject.h
+++ b/js/src/vm/RegExpObject.h
@@ -53,16 +53,18 @@ enum RegExpFlag
MultilineFlag = 0x04,
StickyFlag = 0x08,
UnicodeFlag = 0x10,
+ DotAllFlag = 0x20,
NoFlags = 0x00,
- AllFlags = 0x1f
+ AllFlags = 0x3f
};
static_assert(IgnoreCaseFlag == REGEXP_IGNORECASE_FLAG &&
GlobalFlag == REGEXP_GLOBAL_FLAG &&
MultilineFlag == REGEXP_MULTILINE_FLAG &&
StickyFlag == REGEXP_STICKY_FLAG &&
- UnicodeFlag == REGEXP_UNICODE_FLAG,
+ UnicodeFlag == REGEXP_UNICODE_FLAG &&
+ DotAllFlag == REGEXP_DOTALL_FLAG,
"Flag values should be in sync with self-hosted JS");
enum RegExpRunStatus
@@ -480,6 +482,7 @@ class RegExpObject : public NativeObject
bool multiline() const { return getFlags() & MultilineFlag; }
bool sticky() const { return getFlags() & StickyFlag; }
bool unicode() const { return getFlags() & UnicodeFlag; }
+ bool dotall() const { return getFlags() & DotAllFlag; }
static bool isOriginalFlagGetter(JSNative native, RegExpFlag* mask);