summaryrefslogtreecommitdiff
path: root/js
diff options
context:
space:
mode:
authorwolfbeast <mcwerewolf@wolfbeast.com>2019-11-18 16:50:40 +0100
committerwolfbeast <mcwerewolf@wolfbeast.com>2019-11-18 16:50:40 +0100
commit122e1ee6cd24b5de80a1702313db732c8961202a (patch)
tree4c79ac5ab8d11c15375c42867f9c47f575c5e54f /js
parent62a72e3d281ea48e7b311a1c153a0e5ae7586da8 (diff)
downloaduxp-122e1ee6cd24b5de80a1702313db732c8961202a.tar.gz
Issue #1284 - Implement /s (dotAll) for regular expressions, v2.
Resolves #1284.
Diffstat (limited to 'js')
-rw-r--r--js/src/builtin/RegExp.cpp33
-rw-r--r--js/src/builtin/RegExp.h2
-rw-r--r--js/src/builtin/RegExp.js5
-rw-r--r--js/src/builtin/SelfHostingDefines.h1
-rw-r--r--js/src/frontend/TokenStream.cpp2
-rw-r--r--js/src/irregexp/RegExpParser.cpp75
-rw-r--r--js/src/irregexp/RegExpParser.h7
-rw-r--r--js/src/jsapi.h1
-rw-r--r--js/src/vm/CommonPropertyNames.h1
-rw-r--r--js/src/vm/RegExpObject.cpp5
-rw-r--r--js/src/vm/RegExpObject.h8
11 files changed, 117 insertions, 23 deletions
diff --git a/js/src/builtin/RegExp.cpp b/js/src/builtin/RegExp.cpp
index 7cf20d23c5..b7853d533a 100644
--- a/js/src/builtin/RegExp.cpp
+++ b/js/src/builtin/RegExp.cpp
@@ -178,7 +178,7 @@ CheckPatternSyntax(JSContext* cx, HandleAtom pattern, RegExpFlag flags)
CompileOptions options(cx);
frontend::TokenStream dummyTokenStream(cx, options, nullptr, 0, nullptr);
return irregexp::ParsePatternSyntax(dummyTokenStream, cx->tempLifoAlloc(), pattern,
- flags & UnicodeFlag);
+ flags & UnicodeFlag, flags & DotAllFlag);
}
enum RegExpSharedUse {
@@ -664,6 +664,29 @@ js::regexp_multiline(JSContext* cx, unsigned argc, JS::Value* vp)
return CallNonGenericMethod<IsRegExpInstanceOrPrototype, regexp_multiline_impl>(cx, args);
}
+// ES 2018 dotAll
+MOZ_ALWAYS_INLINE bool
+regexp_dotall_impl(JSContext* cx, const CallArgs& args)
+{
+ MOZ_ASSERT(IsRegExpInstanceOrPrototype(args.thisv()));
+
+ if (!IsRegExpObject(args.thisv())) {
+ args.rval().setUndefined();
+ return true;
+ }
+
+ Rooted<RegExpObject*> reObj(cx, &args.thisv().toObject().as<RegExpObject>());
+ args.rval().setBoolean(reObj->dotall());
+ return true;
+}
+
+bool
+js::regexp_dotall(JSContext* cx, unsigned argc, JS::Value* vp)
+{
+ CallArgs args = CallArgsFromVp(argc, vp);
+ return CallNonGenericMethod<IsRegExpInstanceOrPrototype, regexp_dotall_impl>(cx, args);
+}
+
// ES 2017 draft rev32 21.2.5.10.
MOZ_ALWAYS_INLINE bool
regexp_source_impl(JSContext* cx, const CallArgs& args)
@@ -759,6 +782,7 @@ const JSPropertySpec js::regexp_properties[] = {
JS_PSG("source", regexp_source, 0),
JS_PSG("sticky", regexp_sticky, 0),
JS_PSG("unicode", regexp_unicode, 0),
+ JS_PSG("dotall", regexp_dotall, 0),
JS_PS_END
};
@@ -1642,6 +1666,13 @@ js::RegExpPrototypeOptimizableRaw(JSContext* cx, JSObject* proto)
if (unicodeGetter != regexp_unicode)
return false;
+ JSNative dotAllGetter;
+ if (!GetOwnNativeGetterPure(cx, proto, NameToId(cx->names().dotall), &dotAllGetter))
+ return false;
+
+ if (dotAllGetter != regexp_dotall)
+ return false;
+
// Check if @@match, @@search, and exec are own data properties,
// those values should be tested in selfhosted JS.
bool has = false;
diff --git a/js/src/builtin/RegExp.h b/js/src/builtin/RegExp.h
index 4e0ff69484..f808f5146a 100644
--- a/js/src/builtin/RegExp.h
+++ b/js/src/builtin/RegExp.h
@@ -153,6 +153,8 @@ extern MOZ_MUST_USE bool
regexp_sticky(JSContext* cx, unsigned argc, JS::Value* vp);
extern MOZ_MUST_USE bool
regexp_unicode(JSContext* cx, unsigned argc, JS::Value* vp);
+extern MOZ_MUST_USE bool
+regexp_dotall(JSContext* cx, unsigned argc, JS::Value* vp);
} /* namespace js */
diff --git a/js/src/builtin/RegExp.js b/js/src/builtin/RegExp.js
index 0b849292cf..1a22765943 100644
--- a/js/src/builtin/RegExp.js
+++ b/js/src/builtin/RegExp.js
@@ -3,6 +3,7 @@
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
// ES6 draft rev34 (2015/02/20) 21.2.5.3 get RegExp.prototype.flags
+// Updated for ES2018 /s (dotAll)
function RegExpFlagsGetter() {
// Steps 1-2.
var R = this;
@@ -31,6 +32,10 @@ function RegExpFlagsGetter() {
// Steps 16-18.
if (R.sticky)
result += "y";
+
+ // ES2018
+ if (R.dotall)
+ result += "s";
// Step 19.
return result;
diff --git a/js/src/builtin/SelfHostingDefines.h b/js/src/builtin/SelfHostingDefines.h
index d676270a14..6512810ca9 100644
--- a/js/src/builtin/SelfHostingDefines.h
+++ b/js/src/builtin/SelfHostingDefines.h
@@ -90,6 +90,7 @@
#define REGEXP_MULTILINE_FLAG 0x04
#define REGEXP_STICKY_FLAG 0x08
#define REGEXP_UNICODE_FLAG 0x10
+#define REGEXP_DOTALL_FLAG 0x20
#define MODULE_OBJECT_ENVIRONMENT_SLOT 2
diff --git a/js/src/frontend/TokenStream.cpp b/js/src/frontend/TokenStream.cpp
index b8623d545e..e07f8df8ad 100644
--- a/js/src/frontend/TokenStream.cpp
+++ b/js/src/frontend/TokenStream.cpp
@@ -1843,6 +1843,8 @@ TokenStream::getTokenInternal(TokenKind* ttp, Modifier modifier)
reflags = RegExpFlag(reflags | StickyFlag);
else if (c == 'u' && !(reflags & UnicodeFlag))
reflags = RegExpFlag(reflags | UnicodeFlag);
+ else if (c == 's' && !(reflags & DotAllFlag))
+ reflags = RegExpFlag(reflags | DotAllFlag);
else
break;
getChar();
diff --git a/js/src/irregexp/RegExpParser.cpp b/js/src/irregexp/RegExpParser.cpp
index 9ef9fe3e2f..1ad044e8e2 100644
--- a/js/src/irregexp/RegExpParser.cpp
+++ b/js/src/irregexp/RegExpParser.cpp
@@ -222,7 +222,7 @@ RegExpBuilder::AddQuantifierToAtom(int min, int max,
template <typename CharT>
RegExpParser<CharT>::RegExpParser(frontend::TokenStream& ts, LifoAlloc* alloc,
const CharT* chars, const CharT* end, bool multiline_mode,
- bool unicode, bool ignore_case)
+ bool unicode, bool ignore_case, bool dotall)
: ts(ts),
alloc(alloc),
captures_(nullptr),
@@ -235,6 +235,7 @@ RegExpParser<CharT>::RegExpParser(frontend::TokenStream& ts, LifoAlloc* alloc,
multiline_(multiline_mode),
unicode_(unicode),
ignore_case_(ignore_case),
+ dotall_(dotall),
simple_(false),
contains_anchor_(false),
is_scanned_for_captures_(false)
@@ -1384,7 +1385,7 @@ UnicodeEverythingAtom(LifoAlloc* alloc)
{
RegExpBuilder* builder = alloc->newInfallible<RegExpBuilder>(alloc);
- // everything except \x0a, \x0d, \u2028 and \u2029
+ // Everything except \x0a, \x0d, \u2028 and \u2029
CharacterRangeVector* ranges = alloc->newInfallible<CharacterRangeVector>(*alloc);
ranges->append(CharacterRange::Range(0x0, 0x09));
@@ -1414,6 +1415,38 @@ UnicodeEverythingAtom(LifoAlloc* alloc)
return builder->ToRegExp();
}
+static inline RegExpTree*
+UnicodeDotAllAtom(LifoAlloc* alloc)
+{
+ RegExpBuilder* builder = alloc->newInfallible<RegExpBuilder>(alloc);
+
+ // Full range excluding surrogates because /s was specified
+
+ CharacterRangeVector* ranges = alloc->newInfallible<CharacterRangeVector>(*alloc);
+ ranges->append(CharacterRange::Range(0x0, unicode::LeadSurrogateMin - 1));
+ ranges->append(CharacterRange::Range(unicode::TrailSurrogateMax + 1, unicode::UTF16Max));
+ builder->AddAtom(alloc->newInfallible<RegExpCharacterClass>(ranges, false));
+
+ builder->NewAlternative();
+
+ builder->AddAtom(RangeAtom(alloc, unicode::LeadSurrogateMin, unicode::LeadSurrogateMax));
+ builder->AddAtom(NegativeLookahead(alloc, unicode::TrailSurrogateMin,
+ unicode::TrailSurrogateMax));
+
+ builder->NewAlternative();
+
+ builder->AddAssertion(alloc->newInfallible<RegExpAssertion>(
+ RegExpAssertion::NOT_AFTER_LEAD_SURROGATE));
+ builder->AddAtom(RangeAtom(alloc, unicode::TrailSurrogateMin, unicode::TrailSurrogateMax));
+
+ builder->NewAlternative();
+
+ builder->AddAtom(RangeAtom(alloc, unicode::LeadSurrogateMin, unicode::LeadSurrogateMax));
+ builder->AddAtom(RangeAtom(alloc, unicode::TrailSurrogateMin, unicode::TrailSurrogateMax));
+
+ return builder->ToRegExp();
+}
+
RegExpTree*
UnicodeCharacterClassEscapeAtom(LifoAlloc* alloc, char16_t char_class, bool ignore_case)
{
@@ -1541,13 +1574,25 @@ RegExpParser<CharT>::ParseDisjunction()
}
case '.': {
Advance();
- // everything except \x0a, \x0d, \u2028 and \u2029
+
if (unicode_) {
- builder->AddAtom(UnicodeEverythingAtom(alloc));
+ if (dotall_) {
+ // Everything
+ builder->AddAtom(UnicodeDotAllAtom(alloc));
+ } else {
+ // Everything except \x0a, \x0d, \u2028 and \u2029
+ builder->AddAtom(UnicodeEverythingAtom(alloc));
+ }
break;
}
CharacterRangeVector* ranges = alloc->newInfallible<CharacterRangeVector>(*alloc);
- CharacterRange::AddClassEscape(alloc, '.', ranges);
+ if (dotall_) {
+ // Everything
+ CharacterRange::AddClassEscape(alloc, '*', ranges);
+ } else {
+ // Everything except \x0a, \x0d, \u2028 and \u2029
+ CharacterRange::AddClassEscape(alloc, '.', ranges);
+ }
RegExpTree* atom = alloc->newInfallible<RegExpCharacterClass>(ranges, false);
builder->AddAtom(atom);
break;
@@ -1880,7 +1925,7 @@ template <typename CharT>
static bool
ParsePattern(frontend::TokenStream& ts, LifoAlloc& alloc, const CharT* chars, size_t length,
bool multiline, bool match_only, bool unicode, bool ignore_case,
- bool global, bool sticky, RegExpCompileData* data)
+ bool global, bool sticky, bool dotall, RegExpCompileData* data)
{
if (match_only) {
// Try to strip a leading '.*' from the RegExp, but only if it is not
@@ -1907,7 +1952,7 @@ ParsePattern(frontend::TokenStream& ts, LifoAlloc& alloc, const CharT* chars, si
}
}
- RegExpParser<CharT> parser(ts, &alloc, chars, chars + length, multiline, unicode, ignore_case);
+ RegExpParser<CharT> parser(ts, &alloc, chars, chars + length, multiline, unicode, ignore_case, dotall);
data->tree = parser.ParsePattern();
if (!data->tree)
return false;
@@ -1921,33 +1966,33 @@ ParsePattern(frontend::TokenStream& ts, LifoAlloc& alloc, const CharT* chars, si
bool
irregexp::ParsePattern(frontend::TokenStream& ts, LifoAlloc& alloc, JSAtom* str,
bool multiline, bool match_only, bool unicode, bool ignore_case,
- bool global, bool sticky, RegExpCompileData* data)
+ bool global, bool sticky, bool dotall, RegExpCompileData* data)
{
JS::AutoCheckCannotGC nogc;
return str->hasLatin1Chars()
? ::ParsePattern(ts, alloc, str->latin1Chars(nogc), str->length(),
- multiline, match_only, unicode, ignore_case, global, sticky, data)
+ multiline, match_only, unicode, ignore_case, global, sticky, dotall, data)
: ::ParsePattern(ts, alloc, str->twoByteChars(nogc), str->length(),
- multiline, match_only, unicode, ignore_case, global, sticky, data);
+ multiline, match_only, unicode, ignore_case, global, sticky, dotall, data);
}
template <typename CharT>
static bool
ParsePatternSyntax(frontend::TokenStream& ts, LifoAlloc& alloc, const CharT* chars, size_t length,
- bool unicode)
+ bool unicode, bool dotall)
{
LifoAllocScope scope(&alloc);
- RegExpParser<CharT> parser(ts, &alloc, chars, chars + length, false, unicode, false);
+ RegExpParser<CharT> parser(ts, &alloc, chars, chars + length, false, unicode, dotall, false);
return parser.ParsePattern() != nullptr;
}
bool
irregexp::ParsePatternSyntax(frontend::TokenStream& ts, LifoAlloc& alloc, JSAtom* str,
- bool unicode)
+ bool unicode, bool dotall)
{
JS::AutoCheckCannotGC nogc;
return str->hasLatin1Chars()
- ? ::ParsePatternSyntax(ts, alloc, str->latin1Chars(nogc), str->length(), unicode)
- : ::ParsePatternSyntax(ts, alloc, str->twoByteChars(nogc), str->length(), unicode);
+ ? ::ParsePatternSyntax(ts, alloc, str->latin1Chars(nogc), str->length(), unicode, dotall)
+ : ::ParsePatternSyntax(ts, alloc, str->twoByteChars(nogc), str->length(), unicode, dotall);
}
diff --git a/js/src/irregexp/RegExpParser.h b/js/src/irregexp/RegExpParser.h
index 2f02625b5c..ee57f04365 100644
--- a/js/src/irregexp/RegExpParser.h
+++ b/js/src/irregexp/RegExpParser.h
@@ -44,11 +44,11 @@ namespace irregexp {
bool
ParsePattern(frontend::TokenStream& ts, LifoAlloc& alloc, JSAtom* str,
bool multiline, bool match_only, bool unicode, bool ignore_case,
- bool global, bool sticky, RegExpCompileData* data);
+ bool global, bool sticky, bool dotall, RegExpCompileData* data);
bool
ParsePatternSyntax(frontend::TokenStream& ts, LifoAlloc& alloc, JSAtom* str,
- bool unicode);
+ bool unicode, bool dotall);
// A BufferedVector is an automatically growing list, just like (and backed
// by) a Vector, that is optimized for the case of adding and removing
@@ -178,7 +178,7 @@ class RegExpParser
public:
RegExpParser(frontend::TokenStream& ts, LifoAlloc* alloc,
const CharT* chars, const CharT* end, bool multiline_mode, bool unicode,
- bool ignore_case);
+ bool ignore_case, bool dotall);
RegExpTree* ParsePattern();
RegExpTree* ParseDisjunction();
@@ -313,6 +313,7 @@ class RegExpParser
bool multiline_;
bool unicode_;
bool ignore_case_;
+ bool dotall_;
bool simple_;
bool contains_anchor_;
bool is_scanned_for_captures_;
diff --git a/js/src/jsapi.h b/js/src/jsapi.h
index dc00c650d4..1a69b15139 100644
--- a/js/src/jsapi.h
+++ b/js/src/jsapi.h
@@ -5704,6 +5704,7 @@ JS_ObjectIsDate(JSContext* cx, JS::HandleObject obj, bool* isDate);
#define JSREG_MULTILINE 0x04u /* treat ^ and $ as begin and end of line */
#define JSREG_STICKY 0x08u /* only match starting at lastIndex */
#define JSREG_UNICODE 0x10u /* unicode */
+#define JSREG_DOTALL 0x20u /* match . to everything including newlines */
extern JS_PUBLIC_API(JSObject*)
JS_NewRegExpObject(JSContext* cx, const char* bytes, size_t length, unsigned flags);
diff --git a/js/src/vm/CommonPropertyNames.h b/js/src/vm/CommonPropertyNames.h
index fd1c9f5e63..4ae49d5771 100644
--- a/js/src/vm/CommonPropertyNames.h
+++ b/js/src/vm/CommonPropertyNames.h
@@ -97,6 +97,7 @@
macro(displayURL, displayURL, "displayURL") \
macro(do, do_, "do") \
macro(done, done, "done") \
+ macro(dotall, dotall, "dotall") \
macro(dotGenerator, dotGenerator, ".generator") \
macro(dotThis, dotThis, ".this") \
macro(each, each, "each") \
diff --git a/js/src/vm/RegExpObject.cpp b/js/src/vm/RegExpObject.cpp
index ef97ed8165..cd0b54c9d6 100644
--- a/js/src/vm/RegExpObject.cpp
+++ b/js/src/vm/RegExpObject.cpp
@@ -49,6 +49,7 @@ JS_STATIC_ASSERT(GlobalFlag == JSREG_GLOB);
JS_STATIC_ASSERT(MultilineFlag == JSREG_MULTILINE);
JS_STATIC_ASSERT(StickyFlag == JSREG_STICKY);
JS_STATIC_ASSERT(UnicodeFlag == JSREG_UNICODE);
+JS_STATIC_ASSERT(DotAllFlag == JSREG_DOTALL);
RegExpObject*
js::RegExpAlloc(ExclusiveContext* cx, HandleObject proto /* = nullptr */)
@@ -267,7 +268,7 @@ RegExpObject::create(ExclusiveContext* cx, HandleAtom source, RegExpFlag flags,
tokenStream = dummyTokenStream.ptr();
}
- if (!irregexp::ParsePatternSyntax(*tokenStream, alloc, source, flags & UnicodeFlag))
+ if (!irregexp::ParsePatternSyntax(*tokenStream, alloc, source, flags & UnicodeFlag, flags & DotAllFlag))
return nullptr;
Rooted<RegExpObject*> regexp(cx, RegExpAlloc(cx));
@@ -1017,7 +1018,7 @@ RegExpShared::compile(JSContext* cx, HandleAtom pattern, HandleLinearString inpu
irregexp::RegExpCompileData data;
if (!irregexp::ParsePattern(dummyTokenStream, cx->tempLifoAlloc(), pattern,
multiline(), mode == MatchOnly, unicode(), ignoreCase(),
- global(), sticky(), &data))
+ global(), sticky(), dotall(), &data))
{
return false;
}
diff --git a/js/src/vm/RegExpObject.h b/js/src/vm/RegExpObject.h
index f1ea101ed5..95c64fa678 100644
--- a/js/src/vm/RegExpObject.h
+++ b/js/src/vm/RegExpObject.h
@@ -53,16 +53,18 @@ enum RegExpFlag
MultilineFlag = 0x04,
StickyFlag = 0x08,
UnicodeFlag = 0x10,
+ DotAllFlag = 0x20,
NoFlags = 0x00,
- AllFlags = 0x1f
+ AllFlags = 0x3f
};
static_assert(IgnoreCaseFlag == REGEXP_IGNORECASE_FLAG &&
GlobalFlag == REGEXP_GLOBAL_FLAG &&
MultilineFlag == REGEXP_MULTILINE_FLAG &&
StickyFlag == REGEXP_STICKY_FLAG &&
- UnicodeFlag == REGEXP_UNICODE_FLAG,
+ UnicodeFlag == REGEXP_UNICODE_FLAG &&
+ DotAllFlag == REGEXP_DOTALL_FLAG,
"Flag values should be in sync with self-hosted JS");
enum RegExpRunStatus
@@ -193,6 +195,7 @@ class RegExpShared
bool multiline() const { return flags & MultilineFlag; }
bool sticky() const { return flags & StickyFlag; }
bool unicode() const { return flags & UnicodeFlag; }
+ bool dotall() const { return flags & DotAllFlag; }
bool isCompiled(CompilationMode mode, bool latin1,
ForceByteCodeEnum force = DontForceByteCode) const {
@@ -480,6 +483,7 @@ class RegExpObject : public NativeObject
bool multiline() const { return getFlags() & MultilineFlag; }
bool sticky() const { return getFlags() & StickyFlag; }
bool unicode() const { return getFlags() & UnicodeFlag; }
+ bool dotall() const { return getFlags() & DotAllFlag; }
static bool isOriginalFlagGetter(JSNative native, RegExpFlag* mask);