diff options
author | Martok <martok@martoks-place.de> | 2022-12-21 18:53:25 +0100 |
---|---|---|
committer | Martok <martok@martoks-place.de> | 2022-12-21 18:53:25 +0100 |
commit | 94f456f834273adccd4baa07a9665b61e946f511 (patch) | |
tree | fc2dead1c5ac1d6988fd8a26a663ec9bdc00240f | |
parent | 37bbb4152dbc1dd02e9c094fdcc9320ad9394ac9 (diff) | |
download | uxp-94f456f834273adccd4baa07a9665b61e946f511.tar.gz |
Issue #2056 - Fix handling of captures in lookbehinds
- Port unification of CheckNotBackReference*
- Port LoadCurrentCharacter
- Make RegExpMacroAssembler::CheckAtStart understand cp_offset
- Replace magic numbers in ChoiceNode::Emit, Trace::PerformDeferredActions
- CheckBacktrackStackLimit
- Allow backrefs to resist recursion
-rw-r--r-- | js/src/irregexp/NativeRegExpMacroAssembler.cpp | 385 | ||||
-rw-r--r-- | js/src/irregexp/NativeRegExpMacroAssembler.h | 8 | ||||
-rw-r--r-- | js/src/irregexp/RegExpAST.h | 10 | ||||
-rw-r--r-- | js/src/irregexp/RegExpEngine.cpp | 75 | ||||
-rw-r--r-- | js/src/irregexp/RegExpEngine.h | 10 | ||||
-rw-r--r-- | js/src/irregexp/RegExpInterpreter.cpp | 2 | ||||
-rw-r--r-- | js/src/irregexp/RegExpMacroAssembler.cpp | 4 | ||||
-rw-r--r-- | js/src/irregexp/RegExpMacroAssembler.h | 4 |
8 files changed, 255 insertions, 243 deletions
diff --git a/js/src/irregexp/NativeRegExpMacroAssembler.cpp b/js/src/irregexp/NativeRegExpMacroAssembler.cpp index 2efbf1bf76..41c1951bc2 100644 --- a/js/src/irregexp/NativeRegExpMacroAssembler.cpp +++ b/js/src/irregexp/NativeRegExpMacroAssembler.cpp @@ -71,13 +71,13 @@ NativeRegExpMacroAssembler::NativeRegExpMacroAssembler(LifoAlloc* alloc, RegExpS // Find physical registers for each compiler register. AllocatableGeneralRegisterSet regs(GeneralRegisterSet::All()); + temp0 = regs.takeAny(); + temp1 = regs.takeAny(); + temp2 = regs.takeAny(); input_end_pointer = regs.takeAny(); current_character = regs.takeAny(); current_position = regs.takeAny(); backtrack_stack_pointer = regs.takeAny(); - temp0 = regs.takeAny(); - temp1 = regs.takeAny(); - temp2 = regs.takeAny(); JitSpew(JitSpew_Codegen, "Starting RegExp (input_end_pointer %s) (current_character %s)" @@ -548,23 +548,20 @@ NativeRegExpMacroAssembler::Bind(Label* label) } void -NativeRegExpMacroAssembler::CheckAtStart(Label* on_at_start) -{ - JitSpew(SPEW_PREFIX "CheckAtStart"); - - Label not_at_start; - - // Did we start the match at the start of the string at all? - Address startIndex(masm.getStackPointer(), offsetof(FrameData, startIndex)); - masm.branchPtr(Assembler::NotEqual, startIndex, ImmWord(0), ¬_at_start); - - // If we did, are we still at the start of the input? - masm.computeEffectiveAddress(BaseIndex(input_end_pointer, current_position, TimesOne), temp0); +NativeRegExpMacroAssembler::CheckAtStartImpl(int cp_offset, Label* on_cond, + Assembler::Condition cond) { + masm.computeEffectiveAddress(BaseIndex(input_end_pointer, current_position, TimesOne, cp_offset * char_size()), temp0); Address inputStart(masm.getStackPointer(), offsetof(FrameData, inputStart)); - masm.branchPtr(Assembler::Equal, inputStart, temp0, BranchOrBacktrack(on_at_start)); + masm.branchPtr(cond, inputStart, temp0, BranchOrBacktrack(on_cond)); +} + +void +NativeRegExpMacroAssembler::CheckAtStart(int cp_offset, Label* on_at_start) +{ + JitSpew(SPEW_PREFIX "CheckAtStart"); - masm.bind(¬_at_start); + CheckAtStartImpl(cp_offset, on_at_start, Assembler::Equal); } void @@ -572,15 +569,7 @@ NativeRegExpMacroAssembler::CheckNotAtStart(int cp_offset, Label* on_not_at_star { JitSpew(SPEW_PREFIX "CheckNotAtStart"); - // Did we start the match at the start of the string at all? - Address startIndex(masm.getStackPointer(), offsetof(FrameData, startIndex)); - masm.branchPtr(Assembler::NotEqual, startIndex, ImmWord(0), BranchOrBacktrack(on_not_at_start)); - - // If we did, are we still at the start of the input? - masm.computeEffectiveAddress(BaseIndex(input_end_pointer, current_position, TimesOne), temp0); - - Address inputStart(masm.getStackPointer(), offsetof(FrameData, inputStart)); - masm.branchPtr(Assembler::NotEqual, inputStart, temp0, BranchOrBacktrack(on_not_at_start)); + CheckAtStartImpl(cp_offset, on_not_at_start, Assembler::NotEqual); } void @@ -659,211 +648,204 @@ NativeRegExpMacroAssembler::CheckGreedyLoop(Label* on_tos_equals_current_positio } void -NativeRegExpMacroAssembler::CheckNotBackReference(int start_reg, bool read_backward, Label* on_no_match) +NativeRegExpMacroAssembler::CheckNotBackReferenceImpl(int start_reg, bool read_backward, + Label* on_no_match, + bool unicode, bool ignore_case) { - JitSpew(SPEW_PREFIX "CheckNotBackReference(%d)", start_reg); - Label fallthrough; - Label success; - Label fail; - - // Find length of back-referenced capture. - masm.loadPtr(register_location(start_reg), current_character); - masm.loadPtr(register_location(start_reg + 1), temp0); - masm.subPtr(current_character, temp0); // Length to check. - // Fail on partial or illegal capture (start of capture after end of capture). - masm.branchPtr(Assembler::LessThan, temp0, ImmWord(0), BranchOrBacktrack(on_no_match)); + // Captures are stored as a sequential pair of registers. + // Find the length of the back-referenced capture and load the + // capture's start index into current_character_ + masm.loadPtr(register_location(start_reg), current_character); // Index of start of capture + masm.loadPtr(register_location(start_reg + 1), temp0); // Index of end of capture + masm.subPtr(current_character, temp0); // Length of capture. - // Succeed on empty capture (including no capture). + // If length is zero, either the capture is empty or it is completely + // uncaptured. In either case succeed immediately. masm.branchPtr(Assembler::Equal, temp0, ImmWord(0), &fallthrough); // Check that there are sufficient characters left in the input. - masm.movePtr(current_position, temp1); - masm.addPtr(temp0, temp1); - masm.branchPtr(Assembler::GreaterThan, temp1, ImmWord(0), BranchOrBacktrack(on_no_match)); - - // Save register to make it available below. - masm.push(backtrack_stack_pointer); - - // Compute pointers to match string and capture string - masm.computeEffectiveAddress(BaseIndex(input_end_pointer, current_position, TimesOne), temp1); // Start of match. - masm.addPtr(input_end_pointer, current_character); // Start of capture. - masm.computeEffectiveAddress(BaseIndex(temp0, temp1, TimesOne), backtrack_stack_pointer); // End of match. - - Label loop; - masm.bind(&loop); - if (mode_ == ASCII) { - masm.load8ZeroExtend(Address(current_character, 0), temp0); - masm.load8ZeroExtend(Address(temp1, 0), temp2); + if (read_backward) { + // If start + len > current, there isn't enough room for a + // lookbehind backreference. + Address inputStart(masm.getStackPointer(), offsetof(FrameData, inputStart)); + masm.loadPtr(inputStart, temp1); + masm.subPtr(input_end_pointer, temp1); + masm.addPtr(temp0, temp1); + masm.branchPtr(Assembler::GreaterThan, temp1, current_position, + BranchOrBacktrack(on_no_match)); } else { - MOZ_ASSERT(mode_ == CHAR16); - masm.load16ZeroExtend(Address(current_character, 0), temp0); - masm.load16ZeroExtend(Address(temp1, 0), temp2); + // current_position is the negative offset from the end. + // If current + len > 0, there isn't enough room for a backreference. + masm.movePtr(current_position, temp1); + masm.addPtr(temp0, temp1); + masm.branchPtr(Assembler::GreaterThan, temp1, ImmWord(0), + BranchOrBacktrack(on_no_match)); } - masm.branch32(Assembler::NotEqual, temp0, temp2, &fail); - - // Increment pointers into capture and match string. - masm.addPtr(Imm32(char_size()), current_character); - masm.addPtr(Imm32(char_size()), temp1); - - // Check if we have reached end of match area. - masm.branchPtr(Assembler::Below, temp1, backtrack_stack_pointer, &loop); - masm.jump(&success); - - masm.bind(&fail); - - // Restore backtrack stack pointer. - masm.pop(backtrack_stack_pointer); - JumpOrBacktrack(on_no_match); - - masm.bind(&success); - - // Move current character position to position after match. - masm.movePtr(backtrack_stack_pointer, current_position); - masm.subPtr(input_end_pointer, current_position); - - // Restore backtrack stack pointer. - masm.pop(backtrack_stack_pointer); - - masm.bind(&fallthrough); -} - -void -NativeRegExpMacroAssembler::CheckNotBackReferenceIgnoreCase(int start_reg, bool read_backward, - Label* on_no_match, bool unicode) -{ - JitSpew(SPEW_PREFIX "CheckNotBackReferenceIgnoreCase(%d, %d)", start_reg, unicode); - Label fallthrough; + if (mode_ == CHAR16 && ignore_case) { + // We call a helper function for case-insensitive non-latin1 strings. + // Save volatile regs. temp1, temp2, and current_character + // don't need to be saved. current_position needs to be saved + // even if it's non-volatile, because we modify it to use as an argument. + LiveGeneralRegisterSet volatileRegs(GeneralRegisterSet::Volatile()); + volatileRegs.addUnchecked(current_position); + volatileRegs.takeUnchecked(temp1); + volatileRegs.takeUnchecked(temp2); + volatileRegs.takeUnchecked(current_character); + masm.PushRegsInMask(volatileRegs); - masm.loadPtr(register_location(start_reg), current_character); // Index of start of capture - masm.loadPtr(register_location(start_reg + 1), temp1); // Index of end of capture - masm.subPtr(current_character, temp1); // Length of capture. + // Parameters are + // Address byte_offset1 - Address captured substring's start. + // Address byte_offset2 - Address of current character position. + // size_t byte_length - length of capture in bytes(!) - // The length of a capture should not be negative. This can only happen - // if the end of the capture is unrecorded, or at a point earlier than - // the start of the capture. - masm.branchPtr(Assembler::LessThan, temp1, ImmWord(0), BranchOrBacktrack(on_no_match)); + // Set byte_offset1. + // Start of capture, where current_character already holds string-end negative offset. + masm.addPtr(input_end_pointer, current_character); - // If length is zero, either the capture is empty or it is completely - // uncaptured. In either case succeed immediately. - masm.branchPtr(Assembler::Equal, temp1, ImmWord(0), &fallthrough); + // Set byte_offset2. + // Found by adding negative string-end offset of current position + // to end of string. + masm.addPtr(input_end_pointer, current_position); + if (read_backward) { + // Offset by length when matching backwards. + masm.subPtr(temp1, current_position); + } - // Check that there are sufficient characters left in the input. - masm.movePtr(current_position, temp0); - masm.addPtr(temp1, temp0); - masm.branchPtr(Assembler::GreaterThan, temp0, ImmWord(0), BranchOrBacktrack(on_no_match)); + masm.setupUnalignedABICall(temp1); + masm.passABIArg(current_character); + masm.passABIArg(current_position); + masm.passABIArg(temp0); + if (unicode) { + int (*fun)(const char16_t*, const char16_t*, size_t) = CaseInsensitiveCompareUCStrings; + masm.callWithABI(JS_FUNC_TO_DATA_PTR(void*, fun)); + } else { + int (*fun)(const char16_t*, const char16_t*, size_t) = CaseInsensitiveCompareStrings; + masm.callWithABI(JS_FUNC_TO_DATA_PTR(void*, fun)); + } + masm.storeCallInt32Result(temp1); + masm.PopRegsInMask(volatileRegs); + // Check if function returned non-zero for success or zero for failure. + masm.branchTest32(Assembler::Zero, temp1, temp1, BranchOrBacktrack(on_no_match)); - if (mode_ == ASCII) { - Label success, fail; + // On success, advance position by length of capture + if (read_backward) { + masm.subPtr(temp0, current_position); + } else { + masm.addPtr(temp0, current_position); + } + } else { + MOZ_ASSERT(mode_ == ASCII || !ignore_case); // Save register contents to make the registers available below. After - // this, the temp0, temp2, and current_position registers are available. + // this, the temp1, temp2, and current_position registers are available. masm.push(current_position); + // Make offset values into pointers masm.addPtr(input_end_pointer, current_character); // Start of capture. masm.addPtr(input_end_pointer, current_position); // Start of text to match against capture. - masm.addPtr(current_position, temp1); // End of text to match against capture. - - Label loop, loop_increment; - masm.bind(&loop); - masm.load8ZeroExtend(Address(current_position, 0), temp0); - masm.load8ZeroExtend(Address(current_character, 0), temp2); - masm.branch32(Assembler::Equal, temp0, temp2, &loop_increment); - - // Mismatch, try case-insensitive match (converting letters to lower-case). - masm.or32(Imm32(0x20), temp0); // Convert match character to lower-case. - - // Is temp0 a lowercase letter? - Label convert_capture; - masm.computeEffectiveAddress(Address(temp0, -'a'), temp2); - masm.branch32(Assembler::BelowOrEqual, temp2, Imm32(static_cast<int32_t>('z' - 'a')), - &convert_capture); - // Latin-1: Check for values in range [224,254] but not 247. - masm.sub32(Imm32(224 - 'a'), temp2); - masm.branch32(Assembler::Above, temp2, Imm32(254 - 224), &fail); - - // Check for 247. - masm.branch32(Assembler::Equal, temp2, Imm32(247 - 224), &fail); + if (read_backward) { + // Offset by length when matching backwards. + masm.subPtr(temp0, current_position); + } - masm.bind(&convert_capture); + // End of text to match against capture (temp0 is pointer now) + masm.addPtr(current_position, temp0); - // Also convert capture character. - masm.load8ZeroExtend(Address(current_character, 0), temp2); - masm.or32(Imm32(0x20), temp2); + Label success, fail, loop; + masm.bind(&loop); - masm.branch32(Assembler::NotEqual, temp0, temp2, &fail); + // Load next character from each string. + if (mode_ == ASCII) { + masm.load8ZeroExtend(Address(current_character, 0), temp1); + masm.load8ZeroExtend(Address(current_position, 0), temp2); + } else { + masm.load16ZeroExtend(Address(current_character, 0), temp1); + masm.load16ZeroExtend(Address(current_position, 0), temp2); + } - masm.bind(&loop_increment); + if (ignore_case) { + MOZ_ASSERT(mode_ == ASCII); + Label loop_increment, convert_match; + + // Try exact match. + masm.branch32(Assembler::Equal, temp1, temp2, &loop_increment); + + // Mismatch, try case-insensitive match (converting letters to lower-case). + masm.or32(Imm32(0x20), temp1); // Convert match character to lower-case. + + // Is temp1 a lowercase letter [a,z]? + masm.computeEffectiveAddress(Address(temp1, -'a'), temp2); + masm.branch32(Assembler::BelowOrEqual, temp2, Imm32(static_cast<int32_t>('z' - 'a')), + &convert_match); + // Latin-1: Check for values in range [224,254] but not 247 (U+00F7 DIVISION SIGN). + masm.sub32(Imm32(224 - 'a'), temp2); + masm.branch32(Assembler::Above, temp2, Imm32(254 - 224), &fail); + // Check for 247. + masm.branch32(Assembler::Equal, temp2, Imm32(247 - 224), &fail); + + // Capture character is lower case. Convert match character to lower case and compare + masm.bind(&convert_match); + // Reload latin1 character since temp2 was clobbered above + masm.load8ZeroExtend(Address(current_position, 0), temp2); + masm.or32(Imm32(0x20), temp2); + masm.branch32(Assembler::NotEqual, temp1, temp2, &fail); + + masm.bind(&loop_increment); + } else { + // Fail if characters do not match. + masm.branch32(Assembler::NotEqual, temp1, temp2, &fail); + } // Increment pointers into match and capture strings. - masm.addPtr(Imm32(1), current_character); - masm.addPtr(Imm32(1), current_position); + masm.addPtr(Imm32(char_size()), current_character); + masm.addPtr(Imm32(char_size()), current_position); - // Compare to end of match, and loop if not done. - masm.branchPtr(Assembler::Below, current_position, temp1, &loop); + // Loop if we have not reached the end of the match string. + masm.branchPtr(Assembler::Below, current_position, temp0, &loop); masm.jump(&success); - masm.bind(&fail); - // Restore original values before failing. + masm.bind(&fail); masm.pop(current_position); JumpOrBacktrack(on_no_match); masm.bind(&success); - // Drop original character position value. - masm.addToStackPtr(Imm32(sizeof(uintptr_t))); + masm.pop(temp0); - // Compute new value of character position after the matched part. + // current_position is a pointer (now at the end of the consumed characters). Convert it back to an offset. masm.subPtr(input_end_pointer, current_position); - } else { - MOZ_ASSERT(mode_ == CHAR16); - - // Note: temp1 needs to be saved/restored if it is volatile, as it is used after the call. - LiveGeneralRegisterSet volatileRegs(GeneralRegisterSet::Volatile()); - volatileRegs.takeUnchecked(temp0); - volatileRegs.takeUnchecked(temp2); - masm.PushRegsInMask(volatileRegs); - // Set byte_offset1. - // Start of capture, where current_character already holds string-end negative offset. - masm.addPtr(input_end_pointer, current_character); - - // Set byte_offset2. - // Found by adding negative string-end offset of current position - // to end of string. - masm.addPtr(input_end_pointer, current_position); - - // Parameters are - // Address byte_offset1 - Address captured substring's start. - // Address byte_offset2 - Address of current character position. - // size_t byte_length - length of capture in bytes(!) - masm.setupUnalignedABICall(temp0); - masm.passABIArg(current_character); - masm.passABIArg(current_position); - masm.passABIArg(temp1); - if (!unicode) { - int (*fun)(const char16_t*, const char16_t*, size_t) = CaseInsensitiveCompareStrings; - masm.callWithABI(JS_FUNC_TO_DATA_PTR(void*, fun)); - } else { - int (*fun)(const char16_t*, const char16_t*, size_t) = CaseInsensitiveCompareUCStrings; - masm.callWithABI(JS_FUNC_TO_DATA_PTR(void*, fun)); + if (read_backward) { + // Subtract match length if we matched backward + masm.addPtr(register_location(start_reg), current_position); + masm.subPtr(register_location(start_reg + 1), current_position); } - masm.storeCallInt32Result(temp0); + } - masm.PopRegsInMask(volatileRegs); + // Fallthrough if capture length was zero + masm.bind(&fallthrough); +} - // Check if function returned non-zero for success or zero for failure. - masm.branchTest32(Assembler::Zero, temp0, temp0, BranchOrBacktrack(on_no_match)); +void +NativeRegExpMacroAssembler::CheckNotBackReference(int start_reg, bool read_backward, Label* on_no_match) +{ + JitSpew(SPEW_PREFIX "CheckNotBackReference(%d)", start_reg); - // On success, increment position by length of capture. - masm.addPtr(temp1, current_position); - } + CheckNotBackReferenceImpl(start_reg, read_backward, on_no_match, /*unicode = */ false, /*ignore_case = */ false); +} - masm.bind(&fallthrough); +void +NativeRegExpMacroAssembler::CheckNotBackReferenceIgnoreCase(int start_reg, bool read_backward, + Label* on_no_match, bool unicode) +{ + JitSpew(SPEW_PREFIX "CheckNotBackReferenceIgnoreCase(%d, %d)", start_reg, unicode); + + CheckNotBackReferenceImpl(start_reg, read_backward, on_no_match, unicode, /*ignore_case = */ true); } void @@ -961,10 +943,13 @@ NativeRegExpMacroAssembler::LoadCurrentCharacter(int cp_offset, Label* on_end_of { JitSpew(SPEW_PREFIX "LoadCurrentCharacter(%d, %d)", cp_offset, characters); - MOZ_ASSERT(cp_offset >= -1); // ^ and \b can look behind one character. MOZ_ASSERT(cp_offset < (1<<30)); // Be sane! (And ensure negation works) if (check_bounds) - CheckPosition(cp_offset + characters - 1, on_end_of_input); + if (cp_offset >= 0) { + CheckPosition(cp_offset + characters - 1, on_end_of_input); + } else { + CheckPosition(cp_offset, on_end_of_input); + } LoadCurrentCharacterUnchecked(cp_offset, characters); } @@ -972,9 +957,8 @@ void NativeRegExpMacroAssembler::LoadCurrentCharacterUnchecked(int cp_offset, int characters) { JitSpew(SPEW_PREFIX "LoadCurrentCharacterUnchecked(%d, %d)", cp_offset, characters); - + BaseIndex address(input_end_pointer, current_position, TimesOne, cp_offset * char_size()); if (mode_ == ASCII) { - BaseIndex address(input_end_pointer, current_position, TimesOne, cp_offset); if (characters == 4) { masm.load32(address, current_character); } else if (characters == 2) { @@ -986,7 +970,6 @@ NativeRegExpMacroAssembler::LoadCurrentCharacterUnchecked(int cp_offset, int cha } else { MOZ_ASSERT(mode_ == CHAR16); MOZ_ASSERT(characters <= 2); - BaseIndex address(input_end_pointer, current_position, TimesOne, cp_offset * sizeof(char16_t)); if (characters == 2) masm.load32(address, current_character); else @@ -1096,10 +1079,11 @@ NativeRegExpMacroAssembler::CheckBacktrackStackLimit() masm.moveStackPtrTo(temp2); masm.call(&stack_overflow_label_); - masm.bind(&no_stack_overflow); // Exit with an exception if the call failed. masm.branchTest32(Assembler::Zero, temp0, temp0, &exit_with_exception_label_); + + masm.bind(&no_stack_overflow); } void @@ -1213,8 +1197,21 @@ void NativeRegExpMacroAssembler::CheckPosition(int cp_offset, Label* on_outside_input) { JitSpew(SPEW_PREFIX "CheckPosition(%d)", cp_offset); - masm.branchPtr(Assembler::GreaterThanOrEqual, current_position, - ImmWord(-cp_offset * char_size()), BranchOrBacktrack(on_outside_input)); + if (cp_offset >= 0) { + // end + current + offset >= end + // <=> current + offset >= 0 + // <=> current >= -offset + masm.branchPtr(Assembler::GreaterThanOrEqual, current_position, + ImmWord(-cp_offset * char_size()), BranchOrBacktrack(on_outside_input)); + } else { + // negative cp_offset means we're reading backwards, check against start of string + // Compute offset address + masm.computeEffectiveAddress(BaseIndex(input_end_pointer, current_position, TimesOne, cp_offset * char_size()), temp0); + + // Compare to start of input. + Address inputStart(masm.getStackPointer(), offsetof(FrameData, inputStart)); + masm.branchPtr(Assembler::GreaterThan, inputStart, temp0, BranchOrBacktrack(on_outside_input)); + } } Label* diff --git a/js/src/irregexp/NativeRegExpMacroAssembler.h b/js/src/irregexp/NativeRegExpMacroAssembler.h index 3a04a33cf2..857900cabf 100644 --- a/js/src/irregexp/NativeRegExpMacroAssembler.h +++ b/js/src/irregexp/NativeRegExpMacroAssembler.h @@ -98,7 +98,7 @@ class MOZ_STACK_CLASS NativeRegExpMacroAssembler final : public RegExpMacroAssem void AdvanceRegister(int reg, int by); void Backtrack(); void Bind(jit::Label* label); - void CheckAtStart(jit::Label* on_at_start); + void CheckAtStart(int cp_offset, jit::Label* on_at_start); void CheckCharacter(unsigned c, jit::Label* on_equal); void CheckCharacterAfterAnd(unsigned c, unsigned and_with, jit::Label* on_equal); void CheckCharacterGT(char16_t limit, jit::Label* on_greater); @@ -203,13 +203,17 @@ class MOZ_STACK_CLASS NativeRegExpMacroAssembler final : public RegExpMacroAssem Vector<LabelPatch, 4, SystemAllocPolicy> labelPatches; - // See RegExpMacroAssembler.cpp for the meaning of these registers. + // See NativeRegExpMacroAssembler.cpp for the meaning of these registers. jit::Register input_end_pointer; jit::Register current_character; jit::Register current_position; jit::Register backtrack_stack_pointer; jit::Register temp0, temp1, temp2; + void CheckAtStartImpl(int cp_offset, jit::Label* on_cond, jit::Assembler::Condition cond); + void CheckNotBackReferenceImpl(int start_reg, bool read_backward, jit::Label* on_no_match, + bool unicode, bool ignore_case); + // The frame_pointer-relative location of a regexp register. jit::Address register_location(int register_index) { checkRegister(register_index); diff --git a/js/src/irregexp/RegExpAST.h b/js/src/irregexp/RegExpAST.h index 2cdf71aad8..37c85b7427 100644 --- a/js/src/irregexp/RegExpAST.h +++ b/js/src/irregexp/RegExpAST.h @@ -424,13 +424,9 @@ class RegExpBackReference : public RegExpTree virtual RegExpBackReference* AsBackReference(); virtual bool IsBackReference(); virtual int min_match() override { return 0; } - // The capture may not be completely parsed yet, if the reference occurs - // before the capture. In the ordinary case, nothing has been captured yet, - // so the back reference must have the length 0. If the back reference is - // inside a lookbehind, effectively making it a forward reference, we return - virtual int max_match() override { - return capture_->body() ? capture_->max_match() : 0; - } + // The back reference may be recursive, e.g. /(\2)(\1)/. To avoid infinite + // recursion, we give up. Ignorance is bliss. + int max_match() override { return kInfinity; } int index() { return capture_->index(); } RegExpCapture* capture() { return capture_; } private: diff --git a/js/src/irregexp/RegExpEngine.cpp b/js/src/irregexp/RegExpEngine.cpp index 42787dc0f0..bf95b59dda 100644 --- a/js/src/irregexp/RegExpEngine.cpp +++ b/js/src/irregexp/RegExpEngine.cpp @@ -2010,8 +2010,9 @@ RegExpQuantifier::ToNode(int min, alternation->AddAlternative(GuardedAlternative(body->ToNode(compiler, answer))); } answer = alternation; - if (not_at_start && !compiler->read_backward()) + if (not_at_start && !compiler->read_backward()) { alternation->set_not_at_start(); + } } return answer; } @@ -2218,9 +2219,7 @@ RegExpCapture::ToNode(RegExpTree* body, int start_reg = RegExpCapture::StartRegister(index); int end_reg = RegExpCapture::EndRegister(index); if (compiler->read_backward()) { - // std::swap(start_reg, end_reg); - start_reg = RegExpCapture::EndRegister(index); - end_reg = RegExpCapture::StartRegister(index); + std::swap(start_reg, end_reg); } RegExpNode* store_end = ActionNode::StorePosition(end_reg, true, on_success); RegExpNode* body_node = body->ToNode(compiler, store_end); @@ -2604,6 +2603,7 @@ Trace::PerformDeferredActions(LifoAlloc* alloc, { // The "+1" is to avoid a push_limit of zero if stack_limit_slack() is 1. const int push_limit = (assembler->stack_limit_slack() + 1) / 2; + static const int kNoStore = INT32_MIN; // Count pushes performed to force a stack limit check occasionally. int pushes = 0; @@ -2620,7 +2620,7 @@ Trace::PerformDeferredActions(LifoAlloc* alloc, int value = 0; bool absolute = false; bool clear = false; - int store_position = -1; + int store_position = kNoStore; // This is a little tricky because we are scanning the actions in reverse // historical order (newest first). for (DeferredAction* action = actions_; @@ -2641,7 +2641,7 @@ Trace::PerformDeferredActions(LifoAlloc* alloc, // we can set undo_action to IGNORE if we know there is no value to // restore. undo_action = DEFER_RESTORE; - MOZ_ASSERT(store_position == -1); + MOZ_ASSERT(store_position == kNoStore); MOZ_ASSERT(!clear); break; } @@ -2649,14 +2649,14 @@ Trace::PerformDeferredActions(LifoAlloc* alloc, if (!absolute) { value++; } - MOZ_ASSERT(store_position == -1); + MOZ_ASSERT(store_position == kNoStore); MOZ_ASSERT(!clear); undo_action = DEFER_RESTORE; break; case ActionNode::STORE_POSITION: { Trace::DeferredCapture* pc = static_cast<Trace::DeferredCapture*>(action); - if (!clear && store_position == -1) { + if (!clear && store_position == kNoStore) { store_position = pc->cp_offset(); } @@ -2680,7 +2680,7 @@ Trace::PerformDeferredActions(LifoAlloc* alloc, // Since we're scanning in reverse order, if we've already // set the position we have to ignore historically earlier // clearing operations. - if (store_position == -1) { + if (store_position == kNoStore) { clear = true; } undo_action = DEFER_RESTORE; @@ -2710,7 +2710,7 @@ Trace::PerformDeferredActions(LifoAlloc* alloc, } // Perform the chronologically last action (or accumulated increment) // for the register. - if (store_position != -1) { + if (store_position != kNoStore) { assembler->WriteCurrentPositionToRegister(reg, store_position); } else if (clear) { assembler->ClearRegisters(reg, reg); @@ -2910,16 +2910,23 @@ EmitHat(RegExpCompiler* compiler, RegExpNode* on_success, Trace* trace) Trace new_trace(*trace); new_trace.InvalidateCurrentCharacter(); + // A positive (> 0) cp_offset means we've already successfully matched a + // non-empty-width part of the pattern, and thus cannot be at or before the + // start of the subject string. We can thus skip both at-start and + // bounds-checks when loading the one-character lookbehind. + const bool may_be_at_or_before_subject_string_start = new_trace.cp_offset() <= 0; + jit::Label ok; - if (new_trace.cp_offset() == 0) { - // The start of input counts as a newline in this context, so skip to - // ok if we are at the start. - assembler->CheckAtStart(&ok); + if (may_be_at_or_before_subject_string_start) { + // The start of input counts as a newline in this context, so skip to ok if + // we are at the start. + assembler->CheckAtStart(new_trace.cp_offset(), &ok); } - // We already checked that we are not at the start of input so it must be - // OK to load the previous character. - assembler->LoadCurrentCharacter(new_trace.cp_offset() -1, new_trace.backtrack(), false); + // If we've already checked that we are not at the start of input, it's okay + // to load the previous character without bounds checks. + const bool can_skip_bounds_check = !may_be_at_or_before_subject_string_start; + assembler->LoadCurrentCharacter(new_trace.cp_offset() -1, new_trace.backtrack(), can_skip_bounds_check); if (!assembler->CheckSpecialCharacterClass('n', new_trace.backtrack())) { // Newline means \n, \r, 0x2028 or 0x2029. @@ -2944,11 +2951,10 @@ EmitNotAfterLeadSurrogate(RegExpCompiler* compiler, RegExpNode* on_success, Trac new_trace.InvalidateCurrentCharacter(); jit::Label ok; - if (new_trace.cp_offset() == 0) - assembler->CheckAtStart(&ok); + if (new_trace.cp_offset() <= 0) { + assembler->CheckAtStart(new_trace.cp_offset(), &ok); + } - // We already checked that we are not at the start of input so it must be - // OK to load the previous character. assembler->LoadCurrentCharacter(new_trace.cp_offset() - 1, new_trace.backtrack(), false); assembler->CheckCharacterInRange(unicode::LeadSurrogateMin, unicode::LeadSurrogateMax, new_trace.backtrack()); @@ -2972,8 +2978,9 @@ EmitNotInSurrogatePair(RegExpCompiler* compiler, RegExpNode* on_success, Trace* Trace new_trace(*trace); new_trace.InvalidateCurrentCharacter(); - if (new_trace.cp_offset() == 0) - assembler->CheckAtStart(&ok); + if (new_trace.cp_offset() <= 0) { + assembler->CheckAtStart(new_trace.cp_offset(), &ok); + } // First check if next character is a trail surrogate. assembler->LoadCurrentCharacter(new_trace.cp_offset(), new_trace.backtrack(), false); @@ -3091,10 +3098,10 @@ AssertionNode::BacktrackIfPrevious(RegExpCompiler* compiler, jit::Label* non_word = backtrack_if_previous == kIsNonWord ? new_trace.backtrack() : &fall_through; jit::Label* word = backtrack_if_previous == kIsNonWord ? &fall_through : new_trace.backtrack(); - if (new_trace.cp_offset() == 0) { + if (new_trace.cp_offset() <= 0) { // The start of input counts as a non-word character, so the question is // decided if we are at the start. - assembler->CheckAtStart(non_word); + assembler->CheckAtStart(new_trace.cp_offset(), non_word); } // We already checked that we are not at the start of input so it must be // OK to load the previous character. @@ -4152,7 +4159,7 @@ ChoiceNode::CalculatePreloadCharacters(RegExpCompiler* compiler, int eats_at_lea RegExpNode* TextNode::GetSuccessorOfOmnivorousTextNode(RegExpCompiler* compiler) { - if (read_backward()) return NULL; + if (read_backward()) return nullptr; if (elements().length() != 1) return nullptr; @@ -4362,6 +4369,8 @@ ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) // For now we just call all choices one after the other. The idea ultimately // is to use the Dispatch table to try only the relevant ones. for (size_t i = first_normal_choice; i < choice_count; i++) { + bool is_last = i == choice_count - 1; + bool fall_through_on_failure = !is_last; GuardedAlternative alternative = alternatives()[i]; AlternativeGeneration* alt_gen = alt_gens.at(i); alt_gen->quick_check_details.set_characters(preload_characters); @@ -4377,20 +4386,20 @@ ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) if (not_at_start_) new_trace.set_at_start(Trace::FALSE_VALUE); alt_gen->expects_preload = preload_is_current; bool generate_full_check_inline = false; - if (try_to_emit_quick_check_for_alternative(i) && + if (try_to_emit_quick_check_for_alternative(i == 0) && alternative.node()->EmitQuickCheck(compiler, &new_trace, preload_has_checked_bounds, &alt_gen->possible_success, &alt_gen->quick_check_details, - i < choice_count - 1)) { + fall_through_on_failure)) { // Quick check was generated for this choice. preload_is_current = true; preload_has_checked_bounds = true; // On the last choice in the ChoiceNode we generated the quick // check to fall through on possible success. So now we need to // generate the full check inline. - if (i == choice_count - 1) { + if (!fall_through_on_failure) { macro_assembler->Bind(&alt_gen->possible_success); new_trace.set_quick_check_performed(&alt_gen->quick_check_details); new_trace.set_characters_preloaded(preload_characters); @@ -4398,7 +4407,7 @@ ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) generate_full_check_inline = true; } } else if (alt_gen->quick_check_details.cannot_match()) { - if (i == choice_count - 1 && !greedy_loop) { + if (!fall_through_on_failure && !greedy_loop) { macro_assembler->JumpOrBacktrack(trace->backtrack()); } continue; @@ -4412,7 +4421,7 @@ ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) alt_gen->expects_preload = false; new_trace.InvalidateCurrentCharacter(); } - if (i < choice_count - 1) { + if (!is_last) { new_trace.set_backtrack(&alt_gen->after); } generate_full_check_inline = true; @@ -4450,12 +4459,14 @@ ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) if (new_trace.actions() != nullptr) { new_trace.set_flush_budget(new_flush_budget); } + bool next_expects_preload = + i == choice_count - 1 ? false : alt_gens.at(i + 1)->expects_preload; EmitOutOfLineContinuation(compiler, &new_trace, alternatives()[i], alt_gen, preload_characters, - alt_gens.at(i + 1)->expects_preload); + next_expects_preload); } } diff --git a/js/src/irregexp/RegExpEngine.h b/js/src/irregexp/RegExpEngine.h index 0d760145f9..c665105d7c 100644 --- a/js/src/irregexp/RegExpEngine.h +++ b/js/src/irregexp/RegExpEngine.h @@ -524,7 +524,7 @@ class RegExpNode int characters_filled_in, bool not_at_start) = 0; - static const int kNodeIsTooComplexForGreedyLoops = -1; + static const int kNodeIsTooComplexForGreedyLoops = INT32_MIN; virtual int GreedyLoopTextLength() { return kNodeIsTooComplexForGreedyLoops; } @@ -1060,7 +1060,9 @@ class ChoiceNode : public RegExpNode bool not_at_start() { return not_at_start_; } void set_not_at_start() { not_at_start_ = true; } void set_being_calculated(bool b) { being_calculated_ = b; } - virtual bool try_to_emit_quick_check_for_alternative(int i) { return true; } + virtual bool try_to_emit_quick_check_for_alternative(bool is_first) { + return true; + } virtual RegExpNode* FilterASCII(int depth, bool ignore_case, bool unicode); virtual bool read_backward() { return false; } @@ -1114,7 +1116,9 @@ class NegativeLookaheadChoiceNode : public ChoiceNode // starts by loading enough characters for the alternative that takes fewest // characters, but on a negative lookahead the negative branch did not take // part in that calculation (EatsAtLeast) so the assumptions don't hold. - virtual bool try_to_emit_quick_check_for_alternative(int i) { return i != 0; } + bool try_to_emit_quick_check_for_alternative(bool is_first) override { + return !is_first; + } virtual RegExpNode* FilterASCII(int depth, bool ignore_case, bool unicode); }; diff --git a/js/src/irregexp/RegExpInterpreter.cpp b/js/src/irregexp/RegExpInterpreter.cpp index eab428faa9..f53acfb606 100644 --- a/js/src/irregexp/RegExpInterpreter.cpp +++ b/js/src/irregexp/RegExpInterpreter.cpp @@ -529,7 +529,7 @@ irregexp::InterpretCode(JSContext* cx, const uint8_t* byteCode, const CharT* cha } BYTECODE(CHECK_AT_START) - if (current == 0) + if (current + (insn >> BYTECODE_SHIFT) == 0) pc = byteCode + Load32Aligned(pc + 4); else pc += BC_CHECK_AT_START_LENGTH; diff --git a/js/src/irregexp/RegExpMacroAssembler.cpp b/js/src/irregexp/RegExpMacroAssembler.cpp index acb78203bd..2c4ec67ef5 100644 --- a/js/src/irregexp/RegExpMacroAssembler.cpp +++ b/js/src/irregexp/RegExpMacroAssembler.cpp @@ -172,9 +172,9 @@ InterpretedRegExpMacroAssembler::Bind(jit::Label* label) } void -InterpretedRegExpMacroAssembler::CheckAtStart(jit::Label* on_at_start) +InterpretedRegExpMacroAssembler::CheckAtStart(int cp_offset, jit::Label* on_at_start) { - Emit(BC_CHECK_AT_START, 0); + Emit(BC_CHECK_AT_START, cp_offset); EmitOrLink(on_at_start); } diff --git a/js/src/irregexp/RegExpMacroAssembler.h b/js/src/irregexp/RegExpMacroAssembler.h index 06784ce853..4fa0ab5630 100644 --- a/js/src/irregexp/RegExpMacroAssembler.h +++ b/js/src/irregexp/RegExpMacroAssembler.h @@ -96,7 +96,7 @@ class MOZ_STACK_CLASS RegExpMacroAssembler virtual void Backtrack() = 0; virtual void Bind(jit::Label* label) = 0; - virtual void CheckAtStart(jit::Label* on_at_start) = 0; + virtual void CheckAtStart(int cp_offset, jit::Label* on_at_start) = 0; // Dispatch after looking the current character up in a 2-bits-per-entry // map. The destinations vector has up to 4 labels. @@ -238,7 +238,7 @@ class MOZ_STACK_CLASS InterpretedRegExpMacroAssembler final : public RegExpMacro void AdvanceRegister(int reg, int by); void Backtrack(); void Bind(jit::Label* label); - void CheckAtStart(jit::Label* on_at_start); + void CheckAtStart(int cp_offset, jit::Label* on_at_start); void CheckCharacter(unsigned c, jit::Label* on_equal); void CheckCharacterAfterAnd(unsigned c, unsigned and_with, jit::Label* on_equal); void CheckCharacterGT(char16_t limit, jit::Label* on_greater); |