summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMartok <martok@martoks-place.de>2022-12-21 18:53:25 +0100
committerMartok <martok@martoks-place.de>2022-12-21 18:53:25 +0100
commit94f456f834273adccd4baa07a9665b61e946f511 (patch)
treefc2dead1c5ac1d6988fd8a26a663ec9bdc00240f
parent37bbb4152dbc1dd02e9c094fdcc9320ad9394ac9 (diff)
downloaduxp-94f456f834273adccd4baa07a9665b61e946f511.tar.gz
Issue #2056 - Fix handling of captures in lookbehinds
- Port unification of CheckNotBackReference* - Port LoadCurrentCharacter - Make RegExpMacroAssembler::CheckAtStart understand cp_offset - Replace magic numbers in ChoiceNode::Emit, Trace::PerformDeferredActions - CheckBacktrackStackLimit - Allow backrefs to resist recursion
-rw-r--r--js/src/irregexp/NativeRegExpMacroAssembler.cpp385
-rw-r--r--js/src/irregexp/NativeRegExpMacroAssembler.h8
-rw-r--r--js/src/irregexp/RegExpAST.h10
-rw-r--r--js/src/irregexp/RegExpEngine.cpp75
-rw-r--r--js/src/irregexp/RegExpEngine.h10
-rw-r--r--js/src/irregexp/RegExpInterpreter.cpp2
-rw-r--r--js/src/irregexp/RegExpMacroAssembler.cpp4
-rw-r--r--js/src/irregexp/RegExpMacroAssembler.h4
8 files changed, 255 insertions, 243 deletions
diff --git a/js/src/irregexp/NativeRegExpMacroAssembler.cpp b/js/src/irregexp/NativeRegExpMacroAssembler.cpp
index 2efbf1bf76..41c1951bc2 100644
--- a/js/src/irregexp/NativeRegExpMacroAssembler.cpp
+++ b/js/src/irregexp/NativeRegExpMacroAssembler.cpp
@@ -71,13 +71,13 @@ NativeRegExpMacroAssembler::NativeRegExpMacroAssembler(LifoAlloc* alloc, RegExpS
// Find physical registers for each compiler register.
AllocatableGeneralRegisterSet regs(GeneralRegisterSet::All());
+ temp0 = regs.takeAny();
+ temp1 = regs.takeAny();
+ temp2 = regs.takeAny();
input_end_pointer = regs.takeAny();
current_character = regs.takeAny();
current_position = regs.takeAny();
backtrack_stack_pointer = regs.takeAny();
- temp0 = regs.takeAny();
- temp1 = regs.takeAny();
- temp2 = regs.takeAny();
JitSpew(JitSpew_Codegen,
"Starting RegExp (input_end_pointer %s) (current_character %s)"
@@ -548,23 +548,20 @@ NativeRegExpMacroAssembler::Bind(Label* label)
}
void
-NativeRegExpMacroAssembler::CheckAtStart(Label* on_at_start)
-{
- JitSpew(SPEW_PREFIX "CheckAtStart");
-
- Label not_at_start;
-
- // Did we start the match at the start of the string at all?
- Address startIndex(masm.getStackPointer(), offsetof(FrameData, startIndex));
- masm.branchPtr(Assembler::NotEqual, startIndex, ImmWord(0), &not_at_start);
-
- // If we did, are we still at the start of the input?
- masm.computeEffectiveAddress(BaseIndex(input_end_pointer, current_position, TimesOne), temp0);
+NativeRegExpMacroAssembler::CheckAtStartImpl(int cp_offset, Label* on_cond,
+ Assembler::Condition cond) {
+ masm.computeEffectiveAddress(BaseIndex(input_end_pointer, current_position, TimesOne, cp_offset * char_size()), temp0);
Address inputStart(masm.getStackPointer(), offsetof(FrameData, inputStart));
- masm.branchPtr(Assembler::Equal, inputStart, temp0, BranchOrBacktrack(on_at_start));
+ masm.branchPtr(cond, inputStart, temp0, BranchOrBacktrack(on_cond));
+}
+
+void
+NativeRegExpMacroAssembler::CheckAtStart(int cp_offset, Label* on_at_start)
+{
+ JitSpew(SPEW_PREFIX "CheckAtStart");
- masm.bind(&not_at_start);
+ CheckAtStartImpl(cp_offset, on_at_start, Assembler::Equal);
}
void
@@ -572,15 +569,7 @@ NativeRegExpMacroAssembler::CheckNotAtStart(int cp_offset, Label* on_not_at_star
{
JitSpew(SPEW_PREFIX "CheckNotAtStart");
- // Did we start the match at the start of the string at all?
- Address startIndex(masm.getStackPointer(), offsetof(FrameData, startIndex));
- masm.branchPtr(Assembler::NotEqual, startIndex, ImmWord(0), BranchOrBacktrack(on_not_at_start));
-
- // If we did, are we still at the start of the input?
- masm.computeEffectiveAddress(BaseIndex(input_end_pointer, current_position, TimesOne), temp0);
-
- Address inputStart(masm.getStackPointer(), offsetof(FrameData, inputStart));
- masm.branchPtr(Assembler::NotEqual, inputStart, temp0, BranchOrBacktrack(on_not_at_start));
+ CheckAtStartImpl(cp_offset, on_not_at_start, Assembler::NotEqual);
}
void
@@ -659,211 +648,204 @@ NativeRegExpMacroAssembler::CheckGreedyLoop(Label* on_tos_equals_current_positio
}
void
-NativeRegExpMacroAssembler::CheckNotBackReference(int start_reg, bool read_backward, Label* on_no_match)
+NativeRegExpMacroAssembler::CheckNotBackReferenceImpl(int start_reg, bool read_backward,
+ Label* on_no_match,
+ bool unicode, bool ignore_case)
{
- JitSpew(SPEW_PREFIX "CheckNotBackReference(%d)", start_reg);
-
Label fallthrough;
- Label success;
- Label fail;
-
- // Find length of back-referenced capture.
- masm.loadPtr(register_location(start_reg), current_character);
- masm.loadPtr(register_location(start_reg + 1), temp0);
- masm.subPtr(current_character, temp0); // Length to check.
- // Fail on partial or illegal capture (start of capture after end of capture).
- masm.branchPtr(Assembler::LessThan, temp0, ImmWord(0), BranchOrBacktrack(on_no_match));
+ // Captures are stored as a sequential pair of registers.
+ // Find the length of the back-referenced capture and load the
+ // capture's start index into current_character_
+ masm.loadPtr(register_location(start_reg), current_character); // Index of start of capture
+ masm.loadPtr(register_location(start_reg + 1), temp0); // Index of end of capture
+ masm.subPtr(current_character, temp0); // Length of capture.
- // Succeed on empty capture (including no capture).
+ // If length is zero, either the capture is empty or it is completely
+ // uncaptured. In either case succeed immediately.
masm.branchPtr(Assembler::Equal, temp0, ImmWord(0), &fallthrough);
// Check that there are sufficient characters left in the input.
- masm.movePtr(current_position, temp1);
- masm.addPtr(temp0, temp1);
- masm.branchPtr(Assembler::GreaterThan, temp1, ImmWord(0), BranchOrBacktrack(on_no_match));
-
- // Save register to make it available below.
- masm.push(backtrack_stack_pointer);
-
- // Compute pointers to match string and capture string
- masm.computeEffectiveAddress(BaseIndex(input_end_pointer, current_position, TimesOne), temp1); // Start of match.
- masm.addPtr(input_end_pointer, current_character); // Start of capture.
- masm.computeEffectiveAddress(BaseIndex(temp0, temp1, TimesOne), backtrack_stack_pointer); // End of match.
-
- Label loop;
- masm.bind(&loop);
- if (mode_ == ASCII) {
- masm.load8ZeroExtend(Address(current_character, 0), temp0);
- masm.load8ZeroExtend(Address(temp1, 0), temp2);
+ if (read_backward) {
+ // If start + len > current, there isn't enough room for a
+ // lookbehind backreference.
+ Address inputStart(masm.getStackPointer(), offsetof(FrameData, inputStart));
+ masm.loadPtr(inputStart, temp1);
+ masm.subPtr(input_end_pointer, temp1);
+ masm.addPtr(temp0, temp1);
+ masm.branchPtr(Assembler::GreaterThan, temp1, current_position,
+ BranchOrBacktrack(on_no_match));
} else {
- MOZ_ASSERT(mode_ == CHAR16);
- masm.load16ZeroExtend(Address(current_character, 0), temp0);
- masm.load16ZeroExtend(Address(temp1, 0), temp2);
+ // current_position is the negative offset from the end.
+ // If current + len > 0, there isn't enough room for a backreference.
+ masm.movePtr(current_position, temp1);
+ masm.addPtr(temp0, temp1);
+ masm.branchPtr(Assembler::GreaterThan, temp1, ImmWord(0),
+ BranchOrBacktrack(on_no_match));
}
- masm.branch32(Assembler::NotEqual, temp0, temp2, &fail);
-
- // Increment pointers into capture and match string.
- masm.addPtr(Imm32(char_size()), current_character);
- masm.addPtr(Imm32(char_size()), temp1);
-
- // Check if we have reached end of match area.
- masm.branchPtr(Assembler::Below, temp1, backtrack_stack_pointer, &loop);
- masm.jump(&success);
-
- masm.bind(&fail);
-
- // Restore backtrack stack pointer.
- masm.pop(backtrack_stack_pointer);
- JumpOrBacktrack(on_no_match);
-
- masm.bind(&success);
-
- // Move current character position to position after match.
- masm.movePtr(backtrack_stack_pointer, current_position);
- masm.subPtr(input_end_pointer, current_position);
-
- // Restore backtrack stack pointer.
- masm.pop(backtrack_stack_pointer);
-
- masm.bind(&fallthrough);
-}
-
-void
-NativeRegExpMacroAssembler::CheckNotBackReferenceIgnoreCase(int start_reg, bool read_backward,
- Label* on_no_match, bool unicode)
-{
- JitSpew(SPEW_PREFIX "CheckNotBackReferenceIgnoreCase(%d, %d)", start_reg, unicode);
- Label fallthrough;
+ if (mode_ == CHAR16 && ignore_case) {
+ // We call a helper function for case-insensitive non-latin1 strings.
+ // Save volatile regs. temp1, temp2, and current_character
+ // don't need to be saved. current_position needs to be saved
+ // even if it's non-volatile, because we modify it to use as an argument.
+ LiveGeneralRegisterSet volatileRegs(GeneralRegisterSet::Volatile());
+ volatileRegs.addUnchecked(current_position);
+ volatileRegs.takeUnchecked(temp1);
+ volatileRegs.takeUnchecked(temp2);
+ volatileRegs.takeUnchecked(current_character);
+ masm.PushRegsInMask(volatileRegs);
- masm.loadPtr(register_location(start_reg), current_character); // Index of start of capture
- masm.loadPtr(register_location(start_reg + 1), temp1); // Index of end of capture
- masm.subPtr(current_character, temp1); // Length of capture.
+ // Parameters are
+ // Address byte_offset1 - Address captured substring's start.
+ // Address byte_offset2 - Address of current character position.
+ // size_t byte_length - length of capture in bytes(!)
- // The length of a capture should not be negative. This can only happen
- // if the end of the capture is unrecorded, or at a point earlier than
- // the start of the capture.
- masm.branchPtr(Assembler::LessThan, temp1, ImmWord(0), BranchOrBacktrack(on_no_match));
+ // Set byte_offset1.
+ // Start of capture, where current_character already holds string-end negative offset.
+ masm.addPtr(input_end_pointer, current_character);
- // If length is zero, either the capture is empty or it is completely
- // uncaptured. In either case succeed immediately.
- masm.branchPtr(Assembler::Equal, temp1, ImmWord(0), &fallthrough);
+ // Set byte_offset2.
+ // Found by adding negative string-end offset of current position
+ // to end of string.
+ masm.addPtr(input_end_pointer, current_position);
+ if (read_backward) {
+ // Offset by length when matching backwards.
+ masm.subPtr(temp1, current_position);
+ }
- // Check that there are sufficient characters left in the input.
- masm.movePtr(current_position, temp0);
- masm.addPtr(temp1, temp0);
- masm.branchPtr(Assembler::GreaterThan, temp0, ImmWord(0), BranchOrBacktrack(on_no_match));
+ masm.setupUnalignedABICall(temp1);
+ masm.passABIArg(current_character);
+ masm.passABIArg(current_position);
+ masm.passABIArg(temp0);
+ if (unicode) {
+ int (*fun)(const char16_t*, const char16_t*, size_t) = CaseInsensitiveCompareUCStrings;
+ masm.callWithABI(JS_FUNC_TO_DATA_PTR(void*, fun));
+ } else {
+ int (*fun)(const char16_t*, const char16_t*, size_t) = CaseInsensitiveCompareStrings;
+ masm.callWithABI(JS_FUNC_TO_DATA_PTR(void*, fun));
+ }
+ masm.storeCallInt32Result(temp1);
+ masm.PopRegsInMask(volatileRegs);
+ // Check if function returned non-zero for success or zero for failure.
+ masm.branchTest32(Assembler::Zero, temp1, temp1, BranchOrBacktrack(on_no_match));
- if (mode_ == ASCII) {
- Label success, fail;
+ // On success, advance position by length of capture
+ if (read_backward) {
+ masm.subPtr(temp0, current_position);
+ } else {
+ masm.addPtr(temp0, current_position);
+ }
+ } else {
+ MOZ_ASSERT(mode_ == ASCII || !ignore_case);
// Save register contents to make the registers available below. After
- // this, the temp0, temp2, and current_position registers are available.
+ // this, the temp1, temp2, and current_position registers are available.
masm.push(current_position);
+ // Make offset values into pointers
masm.addPtr(input_end_pointer, current_character); // Start of capture.
masm.addPtr(input_end_pointer, current_position); // Start of text to match against capture.
- masm.addPtr(current_position, temp1); // End of text to match against capture.
-
- Label loop, loop_increment;
- masm.bind(&loop);
- masm.load8ZeroExtend(Address(current_position, 0), temp0);
- masm.load8ZeroExtend(Address(current_character, 0), temp2);
- masm.branch32(Assembler::Equal, temp0, temp2, &loop_increment);
-
- // Mismatch, try case-insensitive match (converting letters to lower-case).
- masm.or32(Imm32(0x20), temp0); // Convert match character to lower-case.
-
- // Is temp0 a lowercase letter?
- Label convert_capture;
- masm.computeEffectiveAddress(Address(temp0, -'a'), temp2);
- masm.branch32(Assembler::BelowOrEqual, temp2, Imm32(static_cast<int32_t>('z' - 'a')),
- &convert_capture);
- // Latin-1: Check for values in range [224,254] but not 247.
- masm.sub32(Imm32(224 - 'a'), temp2);
- masm.branch32(Assembler::Above, temp2, Imm32(254 - 224), &fail);
-
- // Check for 247.
- masm.branch32(Assembler::Equal, temp2, Imm32(247 - 224), &fail);
+ if (read_backward) {
+ // Offset by length when matching backwards.
+ masm.subPtr(temp0, current_position);
+ }
- masm.bind(&convert_capture);
+ // End of text to match against capture (temp0 is pointer now)
+ masm.addPtr(current_position, temp0);
- // Also convert capture character.
- masm.load8ZeroExtend(Address(current_character, 0), temp2);
- masm.or32(Imm32(0x20), temp2);
+ Label success, fail, loop;
+ masm.bind(&loop);
- masm.branch32(Assembler::NotEqual, temp0, temp2, &fail);
+ // Load next character from each string.
+ if (mode_ == ASCII) {
+ masm.load8ZeroExtend(Address(current_character, 0), temp1);
+ masm.load8ZeroExtend(Address(current_position, 0), temp2);
+ } else {
+ masm.load16ZeroExtend(Address(current_character, 0), temp1);
+ masm.load16ZeroExtend(Address(current_position, 0), temp2);
+ }
- masm.bind(&loop_increment);
+ if (ignore_case) {
+ MOZ_ASSERT(mode_ == ASCII);
+ Label loop_increment, convert_match;
+
+ // Try exact match.
+ masm.branch32(Assembler::Equal, temp1, temp2, &loop_increment);
+
+ // Mismatch, try case-insensitive match (converting letters to lower-case).
+ masm.or32(Imm32(0x20), temp1); // Convert match character to lower-case.
+
+ // Is temp1 a lowercase letter [a,z]?
+ masm.computeEffectiveAddress(Address(temp1, -'a'), temp2);
+ masm.branch32(Assembler::BelowOrEqual, temp2, Imm32(static_cast<int32_t>('z' - 'a')),
+ &convert_match);
+ // Latin-1: Check for values in range [224,254] but not 247 (U+00F7 DIVISION SIGN).
+ masm.sub32(Imm32(224 - 'a'), temp2);
+ masm.branch32(Assembler::Above, temp2, Imm32(254 - 224), &fail);
+ // Check for 247.
+ masm.branch32(Assembler::Equal, temp2, Imm32(247 - 224), &fail);
+
+ // Capture character is lower case. Convert match character to lower case and compare
+ masm.bind(&convert_match);
+ // Reload latin1 character since temp2 was clobbered above
+ masm.load8ZeroExtend(Address(current_position, 0), temp2);
+ masm.or32(Imm32(0x20), temp2);
+ masm.branch32(Assembler::NotEqual, temp1, temp2, &fail);
+
+ masm.bind(&loop_increment);
+ } else {
+ // Fail if characters do not match.
+ masm.branch32(Assembler::NotEqual, temp1, temp2, &fail);
+ }
// Increment pointers into match and capture strings.
- masm.addPtr(Imm32(1), current_character);
- masm.addPtr(Imm32(1), current_position);
+ masm.addPtr(Imm32(char_size()), current_character);
+ masm.addPtr(Imm32(char_size()), current_position);
- // Compare to end of match, and loop if not done.
- masm.branchPtr(Assembler::Below, current_position, temp1, &loop);
+ // Loop if we have not reached the end of the match string.
+ masm.branchPtr(Assembler::Below, current_position, temp0, &loop);
masm.jump(&success);
- masm.bind(&fail);
-
// Restore original values before failing.
+ masm.bind(&fail);
masm.pop(current_position);
JumpOrBacktrack(on_no_match);
masm.bind(&success);
-
// Drop original character position value.
- masm.addToStackPtr(Imm32(sizeof(uintptr_t)));
+ masm.pop(temp0);
- // Compute new value of character position after the matched part.
+ // current_position is a pointer (now at the end of the consumed characters). Convert it back to an offset.
masm.subPtr(input_end_pointer, current_position);
- } else {
- MOZ_ASSERT(mode_ == CHAR16);
-
- // Note: temp1 needs to be saved/restored if it is volatile, as it is used after the call.
- LiveGeneralRegisterSet volatileRegs(GeneralRegisterSet::Volatile());
- volatileRegs.takeUnchecked(temp0);
- volatileRegs.takeUnchecked(temp2);
- masm.PushRegsInMask(volatileRegs);
- // Set byte_offset1.
- // Start of capture, where current_character already holds string-end negative offset.
- masm.addPtr(input_end_pointer, current_character);
-
- // Set byte_offset2.
- // Found by adding negative string-end offset of current position
- // to end of string.
- masm.addPtr(input_end_pointer, current_position);
-
- // Parameters are
- // Address byte_offset1 - Address captured substring's start.
- // Address byte_offset2 - Address of current character position.
- // size_t byte_length - length of capture in bytes(!)
- masm.setupUnalignedABICall(temp0);
- masm.passABIArg(current_character);
- masm.passABIArg(current_position);
- masm.passABIArg(temp1);
- if (!unicode) {
- int (*fun)(const char16_t*, const char16_t*, size_t) = CaseInsensitiveCompareStrings;
- masm.callWithABI(JS_FUNC_TO_DATA_PTR(void*, fun));
- } else {
- int (*fun)(const char16_t*, const char16_t*, size_t) = CaseInsensitiveCompareUCStrings;
- masm.callWithABI(JS_FUNC_TO_DATA_PTR(void*, fun));
+ if (read_backward) {
+ // Subtract match length if we matched backward
+ masm.addPtr(register_location(start_reg), current_position);
+ masm.subPtr(register_location(start_reg + 1), current_position);
}
- masm.storeCallInt32Result(temp0);
+ }
- masm.PopRegsInMask(volatileRegs);
+ // Fallthrough if capture length was zero
+ masm.bind(&fallthrough);
+}
- // Check if function returned non-zero for success or zero for failure.
- masm.branchTest32(Assembler::Zero, temp0, temp0, BranchOrBacktrack(on_no_match));
+void
+NativeRegExpMacroAssembler::CheckNotBackReference(int start_reg, bool read_backward, Label* on_no_match)
+{
+ JitSpew(SPEW_PREFIX "CheckNotBackReference(%d)", start_reg);
- // On success, increment position by length of capture.
- masm.addPtr(temp1, current_position);
- }
+ CheckNotBackReferenceImpl(start_reg, read_backward, on_no_match, /*unicode = */ false, /*ignore_case = */ false);
+}
- masm.bind(&fallthrough);
+void
+NativeRegExpMacroAssembler::CheckNotBackReferenceIgnoreCase(int start_reg, bool read_backward,
+ Label* on_no_match, bool unicode)
+{
+ JitSpew(SPEW_PREFIX "CheckNotBackReferenceIgnoreCase(%d, %d)", start_reg, unicode);
+
+ CheckNotBackReferenceImpl(start_reg, read_backward, on_no_match, unicode, /*ignore_case = */ true);
}
void
@@ -961,10 +943,13 @@ NativeRegExpMacroAssembler::LoadCurrentCharacter(int cp_offset, Label* on_end_of
{
JitSpew(SPEW_PREFIX "LoadCurrentCharacter(%d, %d)", cp_offset, characters);
- MOZ_ASSERT(cp_offset >= -1); // ^ and \b can look behind one character.
MOZ_ASSERT(cp_offset < (1<<30)); // Be sane! (And ensure negation works)
if (check_bounds)
- CheckPosition(cp_offset + characters - 1, on_end_of_input);
+ if (cp_offset >= 0) {
+ CheckPosition(cp_offset + characters - 1, on_end_of_input);
+ } else {
+ CheckPosition(cp_offset, on_end_of_input);
+ }
LoadCurrentCharacterUnchecked(cp_offset, characters);
}
@@ -972,9 +957,8 @@ void
NativeRegExpMacroAssembler::LoadCurrentCharacterUnchecked(int cp_offset, int characters)
{
JitSpew(SPEW_PREFIX "LoadCurrentCharacterUnchecked(%d, %d)", cp_offset, characters);
-
+ BaseIndex address(input_end_pointer, current_position, TimesOne, cp_offset * char_size());
if (mode_ == ASCII) {
- BaseIndex address(input_end_pointer, current_position, TimesOne, cp_offset);
if (characters == 4) {
masm.load32(address, current_character);
} else if (characters == 2) {
@@ -986,7 +970,6 @@ NativeRegExpMacroAssembler::LoadCurrentCharacterUnchecked(int cp_offset, int cha
} else {
MOZ_ASSERT(mode_ == CHAR16);
MOZ_ASSERT(characters <= 2);
- BaseIndex address(input_end_pointer, current_position, TimesOne, cp_offset * sizeof(char16_t));
if (characters == 2)
masm.load32(address, current_character);
else
@@ -1096,10 +1079,11 @@ NativeRegExpMacroAssembler::CheckBacktrackStackLimit()
masm.moveStackPtrTo(temp2);
masm.call(&stack_overflow_label_);
- masm.bind(&no_stack_overflow);
// Exit with an exception if the call failed.
masm.branchTest32(Assembler::Zero, temp0, temp0, &exit_with_exception_label_);
+
+ masm.bind(&no_stack_overflow);
}
void
@@ -1213,8 +1197,21 @@ void
NativeRegExpMacroAssembler::CheckPosition(int cp_offset, Label* on_outside_input)
{
JitSpew(SPEW_PREFIX "CheckPosition(%d)", cp_offset);
- masm.branchPtr(Assembler::GreaterThanOrEqual, current_position,
- ImmWord(-cp_offset * char_size()), BranchOrBacktrack(on_outside_input));
+ if (cp_offset >= 0) {
+ // end + current + offset >= end
+ // <=> current + offset >= 0
+ // <=> current >= -offset
+ masm.branchPtr(Assembler::GreaterThanOrEqual, current_position,
+ ImmWord(-cp_offset * char_size()), BranchOrBacktrack(on_outside_input));
+ } else {
+ // negative cp_offset means we're reading backwards, check against start of string
+ // Compute offset address
+ masm.computeEffectiveAddress(BaseIndex(input_end_pointer, current_position, TimesOne, cp_offset * char_size()), temp0);
+
+ // Compare to start of input.
+ Address inputStart(masm.getStackPointer(), offsetof(FrameData, inputStart));
+ masm.branchPtr(Assembler::GreaterThan, inputStart, temp0, BranchOrBacktrack(on_outside_input));
+ }
}
Label*
diff --git a/js/src/irregexp/NativeRegExpMacroAssembler.h b/js/src/irregexp/NativeRegExpMacroAssembler.h
index 3a04a33cf2..857900cabf 100644
--- a/js/src/irregexp/NativeRegExpMacroAssembler.h
+++ b/js/src/irregexp/NativeRegExpMacroAssembler.h
@@ -98,7 +98,7 @@ class MOZ_STACK_CLASS NativeRegExpMacroAssembler final : public RegExpMacroAssem
void AdvanceRegister(int reg, int by);
void Backtrack();
void Bind(jit::Label* label);
- void CheckAtStart(jit::Label* on_at_start);
+ void CheckAtStart(int cp_offset, jit::Label* on_at_start);
void CheckCharacter(unsigned c, jit::Label* on_equal);
void CheckCharacterAfterAnd(unsigned c, unsigned and_with, jit::Label* on_equal);
void CheckCharacterGT(char16_t limit, jit::Label* on_greater);
@@ -203,13 +203,17 @@ class MOZ_STACK_CLASS NativeRegExpMacroAssembler final : public RegExpMacroAssem
Vector<LabelPatch, 4, SystemAllocPolicy> labelPatches;
- // See RegExpMacroAssembler.cpp for the meaning of these registers.
+ // See NativeRegExpMacroAssembler.cpp for the meaning of these registers.
jit::Register input_end_pointer;
jit::Register current_character;
jit::Register current_position;
jit::Register backtrack_stack_pointer;
jit::Register temp0, temp1, temp2;
+ void CheckAtStartImpl(int cp_offset, jit::Label* on_cond, jit::Assembler::Condition cond);
+ void CheckNotBackReferenceImpl(int start_reg, bool read_backward, jit::Label* on_no_match,
+ bool unicode, bool ignore_case);
+
// The frame_pointer-relative location of a regexp register.
jit::Address register_location(int register_index) {
checkRegister(register_index);
diff --git a/js/src/irregexp/RegExpAST.h b/js/src/irregexp/RegExpAST.h
index 2cdf71aad8..37c85b7427 100644
--- a/js/src/irregexp/RegExpAST.h
+++ b/js/src/irregexp/RegExpAST.h
@@ -424,13 +424,9 @@ class RegExpBackReference : public RegExpTree
virtual RegExpBackReference* AsBackReference();
virtual bool IsBackReference();
virtual int min_match() override { return 0; }
- // The capture may not be completely parsed yet, if the reference occurs
- // before the capture. In the ordinary case, nothing has been captured yet,
- // so the back reference must have the length 0. If the back reference is
- // inside a lookbehind, effectively making it a forward reference, we return
- virtual int max_match() override {
- return capture_->body() ? capture_->max_match() : 0;
- }
+ // The back reference may be recursive, e.g. /(\2)(\1)/. To avoid infinite
+ // recursion, we give up. Ignorance is bliss.
+ int max_match() override { return kInfinity; }
int index() { return capture_->index(); }
RegExpCapture* capture() { return capture_; }
private:
diff --git a/js/src/irregexp/RegExpEngine.cpp b/js/src/irregexp/RegExpEngine.cpp
index 42787dc0f0..bf95b59dda 100644
--- a/js/src/irregexp/RegExpEngine.cpp
+++ b/js/src/irregexp/RegExpEngine.cpp
@@ -2010,8 +2010,9 @@ RegExpQuantifier::ToNode(int min,
alternation->AddAlternative(GuardedAlternative(body->ToNode(compiler, answer)));
}
answer = alternation;
- if (not_at_start && !compiler->read_backward())
+ if (not_at_start && !compiler->read_backward()) {
alternation->set_not_at_start();
+ }
}
return answer;
}
@@ -2218,9 +2219,7 @@ RegExpCapture::ToNode(RegExpTree* body,
int start_reg = RegExpCapture::StartRegister(index);
int end_reg = RegExpCapture::EndRegister(index);
if (compiler->read_backward()) {
- // std::swap(start_reg, end_reg);
- start_reg = RegExpCapture::EndRegister(index);
- end_reg = RegExpCapture::StartRegister(index);
+ std::swap(start_reg, end_reg);
}
RegExpNode* store_end = ActionNode::StorePosition(end_reg, true, on_success);
RegExpNode* body_node = body->ToNode(compiler, store_end);
@@ -2604,6 +2603,7 @@ Trace::PerformDeferredActions(LifoAlloc* alloc,
{
// The "+1" is to avoid a push_limit of zero if stack_limit_slack() is 1.
const int push_limit = (assembler->stack_limit_slack() + 1) / 2;
+ static const int kNoStore = INT32_MIN;
// Count pushes performed to force a stack limit check occasionally.
int pushes = 0;
@@ -2620,7 +2620,7 @@ Trace::PerformDeferredActions(LifoAlloc* alloc,
int value = 0;
bool absolute = false;
bool clear = false;
- int store_position = -1;
+ int store_position = kNoStore;
// This is a little tricky because we are scanning the actions in reverse
// historical order (newest first).
for (DeferredAction* action = actions_;
@@ -2641,7 +2641,7 @@ Trace::PerformDeferredActions(LifoAlloc* alloc,
// we can set undo_action to IGNORE if we know there is no value to
// restore.
undo_action = DEFER_RESTORE;
- MOZ_ASSERT(store_position == -1);
+ MOZ_ASSERT(store_position == kNoStore);
MOZ_ASSERT(!clear);
break;
}
@@ -2649,14 +2649,14 @@ Trace::PerformDeferredActions(LifoAlloc* alloc,
if (!absolute) {
value++;
}
- MOZ_ASSERT(store_position == -1);
+ MOZ_ASSERT(store_position == kNoStore);
MOZ_ASSERT(!clear);
undo_action = DEFER_RESTORE;
break;
case ActionNode::STORE_POSITION: {
Trace::DeferredCapture* pc =
static_cast<Trace::DeferredCapture*>(action);
- if (!clear && store_position == -1) {
+ if (!clear && store_position == kNoStore) {
store_position = pc->cp_offset();
}
@@ -2680,7 +2680,7 @@ Trace::PerformDeferredActions(LifoAlloc* alloc,
// Since we're scanning in reverse order, if we've already
// set the position we have to ignore historically earlier
// clearing operations.
- if (store_position == -1) {
+ if (store_position == kNoStore) {
clear = true;
}
undo_action = DEFER_RESTORE;
@@ -2710,7 +2710,7 @@ Trace::PerformDeferredActions(LifoAlloc* alloc,
}
// Perform the chronologically last action (or accumulated increment)
// for the register.
- if (store_position != -1) {
+ if (store_position != kNoStore) {
assembler->WriteCurrentPositionToRegister(reg, store_position);
} else if (clear) {
assembler->ClearRegisters(reg, reg);
@@ -2910,16 +2910,23 @@ EmitHat(RegExpCompiler* compiler, RegExpNode* on_success, Trace* trace)
Trace new_trace(*trace);
new_trace.InvalidateCurrentCharacter();
+ // A positive (> 0) cp_offset means we've already successfully matched a
+ // non-empty-width part of the pattern, and thus cannot be at or before the
+ // start of the subject string. We can thus skip both at-start and
+ // bounds-checks when loading the one-character lookbehind.
+ const bool may_be_at_or_before_subject_string_start = new_trace.cp_offset() <= 0;
+
jit::Label ok;
- if (new_trace.cp_offset() == 0) {
- // The start of input counts as a newline in this context, so skip to
- // ok if we are at the start.
- assembler->CheckAtStart(&ok);
+ if (may_be_at_or_before_subject_string_start) {
+ // The start of input counts as a newline in this context, so skip to ok if
+ // we are at the start.
+ assembler->CheckAtStart(new_trace.cp_offset(), &ok);
}
- // We already checked that we are not at the start of input so it must be
- // OK to load the previous character.
- assembler->LoadCurrentCharacter(new_trace.cp_offset() -1, new_trace.backtrack(), false);
+ // If we've already checked that we are not at the start of input, it's okay
+ // to load the previous character without bounds checks.
+ const bool can_skip_bounds_check = !may_be_at_or_before_subject_string_start;
+ assembler->LoadCurrentCharacter(new_trace.cp_offset() -1, new_trace.backtrack(), can_skip_bounds_check);
if (!assembler->CheckSpecialCharacterClass('n', new_trace.backtrack())) {
// Newline means \n, \r, 0x2028 or 0x2029.
@@ -2944,11 +2951,10 @@ EmitNotAfterLeadSurrogate(RegExpCompiler* compiler, RegExpNode* on_success, Trac
new_trace.InvalidateCurrentCharacter();
jit::Label ok;
- if (new_trace.cp_offset() == 0)
- assembler->CheckAtStart(&ok);
+ if (new_trace.cp_offset() <= 0) {
+ assembler->CheckAtStart(new_trace.cp_offset(), &ok);
+ }
- // We already checked that we are not at the start of input so it must be
- // OK to load the previous character.
assembler->LoadCurrentCharacter(new_trace.cp_offset() - 1, new_trace.backtrack(), false);
assembler->CheckCharacterInRange(unicode::LeadSurrogateMin, unicode::LeadSurrogateMax,
new_trace.backtrack());
@@ -2972,8 +2978,9 @@ EmitNotInSurrogatePair(RegExpCompiler* compiler, RegExpNode* on_success, Trace*
Trace new_trace(*trace);
new_trace.InvalidateCurrentCharacter();
- if (new_trace.cp_offset() == 0)
- assembler->CheckAtStart(&ok);
+ if (new_trace.cp_offset() <= 0) {
+ assembler->CheckAtStart(new_trace.cp_offset(), &ok);
+ }
// First check if next character is a trail surrogate.
assembler->LoadCurrentCharacter(new_trace.cp_offset(), new_trace.backtrack(), false);
@@ -3091,10 +3098,10 @@ AssertionNode::BacktrackIfPrevious(RegExpCompiler* compiler,
jit::Label* non_word = backtrack_if_previous == kIsNonWord ? new_trace.backtrack() : &fall_through;
jit::Label* word = backtrack_if_previous == kIsNonWord ? &fall_through : new_trace.backtrack();
- if (new_trace.cp_offset() == 0) {
+ if (new_trace.cp_offset() <= 0) {
// The start of input counts as a non-word character, so the question is
// decided if we are at the start.
- assembler->CheckAtStart(non_word);
+ assembler->CheckAtStart(new_trace.cp_offset(), non_word);
}
// We already checked that we are not at the start of input so it must be
// OK to load the previous character.
@@ -4152,7 +4159,7 @@ ChoiceNode::CalculatePreloadCharacters(RegExpCompiler* compiler, int eats_at_lea
RegExpNode*
TextNode::GetSuccessorOfOmnivorousTextNode(RegExpCompiler* compiler)
{
- if (read_backward()) return NULL;
+ if (read_backward()) return nullptr;
if (elements().length() != 1)
return nullptr;
@@ -4362,6 +4369,8 @@ ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace)
// For now we just call all choices one after the other. The idea ultimately
// is to use the Dispatch table to try only the relevant ones.
for (size_t i = first_normal_choice; i < choice_count; i++) {
+ bool is_last = i == choice_count - 1;
+ bool fall_through_on_failure = !is_last;
GuardedAlternative alternative = alternatives()[i];
AlternativeGeneration* alt_gen = alt_gens.at(i);
alt_gen->quick_check_details.set_characters(preload_characters);
@@ -4377,20 +4386,20 @@ ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace)
if (not_at_start_) new_trace.set_at_start(Trace::FALSE_VALUE);
alt_gen->expects_preload = preload_is_current;
bool generate_full_check_inline = false;
- if (try_to_emit_quick_check_for_alternative(i) &&
+ if (try_to_emit_quick_check_for_alternative(i == 0) &&
alternative.node()->EmitQuickCheck(compiler,
&new_trace,
preload_has_checked_bounds,
&alt_gen->possible_success,
&alt_gen->quick_check_details,
- i < choice_count - 1)) {
+ fall_through_on_failure)) {
// Quick check was generated for this choice.
preload_is_current = true;
preload_has_checked_bounds = true;
// On the last choice in the ChoiceNode we generated the quick
// check to fall through on possible success. So now we need to
// generate the full check inline.
- if (i == choice_count - 1) {
+ if (!fall_through_on_failure) {
macro_assembler->Bind(&alt_gen->possible_success);
new_trace.set_quick_check_performed(&alt_gen->quick_check_details);
new_trace.set_characters_preloaded(preload_characters);
@@ -4398,7 +4407,7 @@ ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace)
generate_full_check_inline = true;
}
} else if (alt_gen->quick_check_details.cannot_match()) {
- if (i == choice_count - 1 && !greedy_loop) {
+ if (!fall_through_on_failure && !greedy_loop) {
macro_assembler->JumpOrBacktrack(trace->backtrack());
}
continue;
@@ -4412,7 +4421,7 @@ ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace)
alt_gen->expects_preload = false;
new_trace.InvalidateCurrentCharacter();
}
- if (i < choice_count - 1) {
+ if (!is_last) {
new_trace.set_backtrack(&alt_gen->after);
}
generate_full_check_inline = true;
@@ -4450,12 +4459,14 @@ ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace)
if (new_trace.actions() != nullptr) {
new_trace.set_flush_budget(new_flush_budget);
}
+ bool next_expects_preload =
+ i == choice_count - 1 ? false : alt_gens.at(i + 1)->expects_preload;
EmitOutOfLineContinuation(compiler,
&new_trace,
alternatives()[i],
alt_gen,
preload_characters,
- alt_gens.at(i + 1)->expects_preload);
+ next_expects_preload);
}
}
diff --git a/js/src/irregexp/RegExpEngine.h b/js/src/irregexp/RegExpEngine.h
index 0d760145f9..c665105d7c 100644
--- a/js/src/irregexp/RegExpEngine.h
+++ b/js/src/irregexp/RegExpEngine.h
@@ -524,7 +524,7 @@ class RegExpNode
int characters_filled_in,
bool not_at_start) = 0;
- static const int kNodeIsTooComplexForGreedyLoops = -1;
+ static const int kNodeIsTooComplexForGreedyLoops = INT32_MIN;
virtual int GreedyLoopTextLength() { return kNodeIsTooComplexForGreedyLoops; }
@@ -1060,7 +1060,9 @@ class ChoiceNode : public RegExpNode
bool not_at_start() { return not_at_start_; }
void set_not_at_start() { not_at_start_ = true; }
void set_being_calculated(bool b) { being_calculated_ = b; }
- virtual bool try_to_emit_quick_check_for_alternative(int i) { return true; }
+ virtual bool try_to_emit_quick_check_for_alternative(bool is_first) {
+ return true;
+ }
virtual RegExpNode* FilterASCII(int depth, bool ignore_case, bool unicode);
virtual bool read_backward() { return false; }
@@ -1114,7 +1116,9 @@ class NegativeLookaheadChoiceNode : public ChoiceNode
// starts by loading enough characters for the alternative that takes fewest
// characters, but on a negative lookahead the negative branch did not take
// part in that calculation (EatsAtLeast) so the assumptions don't hold.
- virtual bool try_to_emit_quick_check_for_alternative(int i) { return i != 0; }
+ bool try_to_emit_quick_check_for_alternative(bool is_first) override {
+ return !is_first;
+ }
virtual RegExpNode* FilterASCII(int depth, bool ignore_case, bool unicode);
};
diff --git a/js/src/irregexp/RegExpInterpreter.cpp b/js/src/irregexp/RegExpInterpreter.cpp
index eab428faa9..f53acfb606 100644
--- a/js/src/irregexp/RegExpInterpreter.cpp
+++ b/js/src/irregexp/RegExpInterpreter.cpp
@@ -529,7 +529,7 @@ irregexp::InterpretCode(JSContext* cx, const uint8_t* byteCode, const CharT* cha
}
BYTECODE(CHECK_AT_START)
- if (current == 0)
+ if (current + (insn >> BYTECODE_SHIFT) == 0)
pc = byteCode + Load32Aligned(pc + 4);
else
pc += BC_CHECK_AT_START_LENGTH;
diff --git a/js/src/irregexp/RegExpMacroAssembler.cpp b/js/src/irregexp/RegExpMacroAssembler.cpp
index acb78203bd..2c4ec67ef5 100644
--- a/js/src/irregexp/RegExpMacroAssembler.cpp
+++ b/js/src/irregexp/RegExpMacroAssembler.cpp
@@ -172,9 +172,9 @@ InterpretedRegExpMacroAssembler::Bind(jit::Label* label)
}
void
-InterpretedRegExpMacroAssembler::CheckAtStart(jit::Label* on_at_start)
+InterpretedRegExpMacroAssembler::CheckAtStart(int cp_offset, jit::Label* on_at_start)
{
- Emit(BC_CHECK_AT_START, 0);
+ Emit(BC_CHECK_AT_START, cp_offset);
EmitOrLink(on_at_start);
}
diff --git a/js/src/irregexp/RegExpMacroAssembler.h b/js/src/irregexp/RegExpMacroAssembler.h
index 06784ce853..4fa0ab5630 100644
--- a/js/src/irregexp/RegExpMacroAssembler.h
+++ b/js/src/irregexp/RegExpMacroAssembler.h
@@ -96,7 +96,7 @@ class MOZ_STACK_CLASS RegExpMacroAssembler
virtual void Backtrack() = 0;
virtual void Bind(jit::Label* label) = 0;
- virtual void CheckAtStart(jit::Label* on_at_start) = 0;
+ virtual void CheckAtStart(int cp_offset, jit::Label* on_at_start) = 0;
// Dispatch after looking the current character up in a 2-bits-per-entry
// map. The destinations vector has up to 4 labels.
@@ -238,7 +238,7 @@ class MOZ_STACK_CLASS InterpretedRegExpMacroAssembler final : public RegExpMacro
void AdvanceRegister(int reg, int by);
void Backtrack();
void Bind(jit::Label* label);
- void CheckAtStart(jit::Label* on_at_start);
+ void CheckAtStart(int cp_offset, jit::Label* on_at_start);
void CheckCharacter(unsigned c, jit::Label* on_equal);
void CheckCharacterAfterAnd(unsigned c, unsigned and_with, jit::Label* on_equal);
void CheckCharacterGT(char16_t limit, jit::Label* on_greater);