diff options
author | Martok <martok@martoks-place.de> | 2022-12-21 18:53:25 +0100 |
---|---|---|
committer | Martok <martok@martoks-place.de> | 2022-12-21 18:53:25 +0100 |
commit | 94f456f834273adccd4baa07a9665b61e946f511 (patch) | |
tree | fc2dead1c5ac1d6988fd8a26a663ec9bdc00240f /js/src/irregexp/NativeRegExpMacroAssembler.cpp | |
parent | 37bbb4152dbc1dd02e9c094fdcc9320ad9394ac9 (diff) | |
download | uxp-94f456f834273adccd4baa07a9665b61e946f511.tar.gz |
Issue #2056 - Fix handling of captures in lookbehinds
- Port unification of CheckNotBackReference*
- Port LoadCurrentCharacter
- Make RegExpMacroAssembler::CheckAtStart understand cp_offset
- Replace magic numbers in ChoiceNode::Emit, Trace::PerformDeferredActions
- CheckBacktrackStackLimit
- Allow backrefs to resist recursion
Diffstat (limited to 'js/src/irregexp/NativeRegExpMacroAssembler.cpp')
-rw-r--r-- | js/src/irregexp/NativeRegExpMacroAssembler.cpp | 385 |
1 files changed, 191 insertions, 194 deletions
diff --git a/js/src/irregexp/NativeRegExpMacroAssembler.cpp b/js/src/irregexp/NativeRegExpMacroAssembler.cpp index 2efbf1bf76..41c1951bc2 100644 --- a/js/src/irregexp/NativeRegExpMacroAssembler.cpp +++ b/js/src/irregexp/NativeRegExpMacroAssembler.cpp @@ -71,13 +71,13 @@ NativeRegExpMacroAssembler::NativeRegExpMacroAssembler(LifoAlloc* alloc, RegExpS // Find physical registers for each compiler register. AllocatableGeneralRegisterSet regs(GeneralRegisterSet::All()); + temp0 = regs.takeAny(); + temp1 = regs.takeAny(); + temp2 = regs.takeAny(); input_end_pointer = regs.takeAny(); current_character = regs.takeAny(); current_position = regs.takeAny(); backtrack_stack_pointer = regs.takeAny(); - temp0 = regs.takeAny(); - temp1 = regs.takeAny(); - temp2 = regs.takeAny(); JitSpew(JitSpew_Codegen, "Starting RegExp (input_end_pointer %s) (current_character %s)" @@ -548,23 +548,20 @@ NativeRegExpMacroAssembler::Bind(Label* label) } void -NativeRegExpMacroAssembler::CheckAtStart(Label* on_at_start) -{ - JitSpew(SPEW_PREFIX "CheckAtStart"); - - Label not_at_start; - - // Did we start the match at the start of the string at all? - Address startIndex(masm.getStackPointer(), offsetof(FrameData, startIndex)); - masm.branchPtr(Assembler::NotEqual, startIndex, ImmWord(0), ¬_at_start); - - // If we did, are we still at the start of the input? - masm.computeEffectiveAddress(BaseIndex(input_end_pointer, current_position, TimesOne), temp0); +NativeRegExpMacroAssembler::CheckAtStartImpl(int cp_offset, Label* on_cond, + Assembler::Condition cond) { + masm.computeEffectiveAddress(BaseIndex(input_end_pointer, current_position, TimesOne, cp_offset * char_size()), temp0); Address inputStart(masm.getStackPointer(), offsetof(FrameData, inputStart)); - masm.branchPtr(Assembler::Equal, inputStart, temp0, BranchOrBacktrack(on_at_start)); + masm.branchPtr(cond, inputStart, temp0, BranchOrBacktrack(on_cond)); +} + +void +NativeRegExpMacroAssembler::CheckAtStart(int cp_offset, Label* on_at_start) +{ + JitSpew(SPEW_PREFIX "CheckAtStart"); - masm.bind(¬_at_start); + CheckAtStartImpl(cp_offset, on_at_start, Assembler::Equal); } void @@ -572,15 +569,7 @@ NativeRegExpMacroAssembler::CheckNotAtStart(int cp_offset, Label* on_not_at_star { JitSpew(SPEW_PREFIX "CheckNotAtStart"); - // Did we start the match at the start of the string at all? - Address startIndex(masm.getStackPointer(), offsetof(FrameData, startIndex)); - masm.branchPtr(Assembler::NotEqual, startIndex, ImmWord(0), BranchOrBacktrack(on_not_at_start)); - - // If we did, are we still at the start of the input? - masm.computeEffectiveAddress(BaseIndex(input_end_pointer, current_position, TimesOne), temp0); - - Address inputStart(masm.getStackPointer(), offsetof(FrameData, inputStart)); - masm.branchPtr(Assembler::NotEqual, inputStart, temp0, BranchOrBacktrack(on_not_at_start)); + CheckAtStartImpl(cp_offset, on_not_at_start, Assembler::NotEqual); } void @@ -659,211 +648,204 @@ NativeRegExpMacroAssembler::CheckGreedyLoop(Label* on_tos_equals_current_positio } void -NativeRegExpMacroAssembler::CheckNotBackReference(int start_reg, bool read_backward, Label* on_no_match) +NativeRegExpMacroAssembler::CheckNotBackReferenceImpl(int start_reg, bool read_backward, + Label* on_no_match, + bool unicode, bool ignore_case) { - JitSpew(SPEW_PREFIX "CheckNotBackReference(%d)", start_reg); - Label fallthrough; - Label success; - Label fail; - - // Find length of back-referenced capture. - masm.loadPtr(register_location(start_reg), current_character); - masm.loadPtr(register_location(start_reg + 1), temp0); - masm.subPtr(current_character, temp0); // Length to check. - // Fail on partial or illegal capture (start of capture after end of capture). - masm.branchPtr(Assembler::LessThan, temp0, ImmWord(0), BranchOrBacktrack(on_no_match)); + // Captures are stored as a sequential pair of registers. + // Find the length of the back-referenced capture and load the + // capture's start index into current_character_ + masm.loadPtr(register_location(start_reg), current_character); // Index of start of capture + masm.loadPtr(register_location(start_reg + 1), temp0); // Index of end of capture + masm.subPtr(current_character, temp0); // Length of capture. - // Succeed on empty capture (including no capture). + // If length is zero, either the capture is empty or it is completely + // uncaptured. In either case succeed immediately. masm.branchPtr(Assembler::Equal, temp0, ImmWord(0), &fallthrough); // Check that there are sufficient characters left in the input. - masm.movePtr(current_position, temp1); - masm.addPtr(temp0, temp1); - masm.branchPtr(Assembler::GreaterThan, temp1, ImmWord(0), BranchOrBacktrack(on_no_match)); - - // Save register to make it available below. - masm.push(backtrack_stack_pointer); - - // Compute pointers to match string and capture string - masm.computeEffectiveAddress(BaseIndex(input_end_pointer, current_position, TimesOne), temp1); // Start of match. - masm.addPtr(input_end_pointer, current_character); // Start of capture. - masm.computeEffectiveAddress(BaseIndex(temp0, temp1, TimesOne), backtrack_stack_pointer); // End of match. - - Label loop; - masm.bind(&loop); - if (mode_ == ASCII) { - masm.load8ZeroExtend(Address(current_character, 0), temp0); - masm.load8ZeroExtend(Address(temp1, 0), temp2); + if (read_backward) { + // If start + len > current, there isn't enough room for a + // lookbehind backreference. + Address inputStart(masm.getStackPointer(), offsetof(FrameData, inputStart)); + masm.loadPtr(inputStart, temp1); + masm.subPtr(input_end_pointer, temp1); + masm.addPtr(temp0, temp1); + masm.branchPtr(Assembler::GreaterThan, temp1, current_position, + BranchOrBacktrack(on_no_match)); } else { - MOZ_ASSERT(mode_ == CHAR16); - masm.load16ZeroExtend(Address(current_character, 0), temp0); - masm.load16ZeroExtend(Address(temp1, 0), temp2); + // current_position is the negative offset from the end. + // If current + len > 0, there isn't enough room for a backreference. + masm.movePtr(current_position, temp1); + masm.addPtr(temp0, temp1); + masm.branchPtr(Assembler::GreaterThan, temp1, ImmWord(0), + BranchOrBacktrack(on_no_match)); } - masm.branch32(Assembler::NotEqual, temp0, temp2, &fail); - - // Increment pointers into capture and match string. - masm.addPtr(Imm32(char_size()), current_character); - masm.addPtr(Imm32(char_size()), temp1); - - // Check if we have reached end of match area. - masm.branchPtr(Assembler::Below, temp1, backtrack_stack_pointer, &loop); - masm.jump(&success); - - masm.bind(&fail); - - // Restore backtrack stack pointer. - masm.pop(backtrack_stack_pointer); - JumpOrBacktrack(on_no_match); - - masm.bind(&success); - - // Move current character position to position after match. - masm.movePtr(backtrack_stack_pointer, current_position); - masm.subPtr(input_end_pointer, current_position); - - // Restore backtrack stack pointer. - masm.pop(backtrack_stack_pointer); - - masm.bind(&fallthrough); -} - -void -NativeRegExpMacroAssembler::CheckNotBackReferenceIgnoreCase(int start_reg, bool read_backward, - Label* on_no_match, bool unicode) -{ - JitSpew(SPEW_PREFIX "CheckNotBackReferenceIgnoreCase(%d, %d)", start_reg, unicode); - Label fallthrough; + if (mode_ == CHAR16 && ignore_case) { + // We call a helper function for case-insensitive non-latin1 strings. + // Save volatile regs. temp1, temp2, and current_character + // don't need to be saved. current_position needs to be saved + // even if it's non-volatile, because we modify it to use as an argument. + LiveGeneralRegisterSet volatileRegs(GeneralRegisterSet::Volatile()); + volatileRegs.addUnchecked(current_position); + volatileRegs.takeUnchecked(temp1); + volatileRegs.takeUnchecked(temp2); + volatileRegs.takeUnchecked(current_character); + masm.PushRegsInMask(volatileRegs); - masm.loadPtr(register_location(start_reg), current_character); // Index of start of capture - masm.loadPtr(register_location(start_reg + 1), temp1); // Index of end of capture - masm.subPtr(current_character, temp1); // Length of capture. + // Parameters are + // Address byte_offset1 - Address captured substring's start. + // Address byte_offset2 - Address of current character position. + // size_t byte_length - length of capture in bytes(!) - // The length of a capture should not be negative. This can only happen - // if the end of the capture is unrecorded, or at a point earlier than - // the start of the capture. - masm.branchPtr(Assembler::LessThan, temp1, ImmWord(0), BranchOrBacktrack(on_no_match)); + // Set byte_offset1. + // Start of capture, where current_character already holds string-end negative offset. + masm.addPtr(input_end_pointer, current_character); - // If length is zero, either the capture is empty or it is completely - // uncaptured. In either case succeed immediately. - masm.branchPtr(Assembler::Equal, temp1, ImmWord(0), &fallthrough); + // Set byte_offset2. + // Found by adding negative string-end offset of current position + // to end of string. + masm.addPtr(input_end_pointer, current_position); + if (read_backward) { + // Offset by length when matching backwards. + masm.subPtr(temp1, current_position); + } - // Check that there are sufficient characters left in the input. - masm.movePtr(current_position, temp0); - masm.addPtr(temp1, temp0); - masm.branchPtr(Assembler::GreaterThan, temp0, ImmWord(0), BranchOrBacktrack(on_no_match)); + masm.setupUnalignedABICall(temp1); + masm.passABIArg(current_character); + masm.passABIArg(current_position); + masm.passABIArg(temp0); + if (unicode) { + int (*fun)(const char16_t*, const char16_t*, size_t) = CaseInsensitiveCompareUCStrings; + masm.callWithABI(JS_FUNC_TO_DATA_PTR(void*, fun)); + } else { + int (*fun)(const char16_t*, const char16_t*, size_t) = CaseInsensitiveCompareStrings; + masm.callWithABI(JS_FUNC_TO_DATA_PTR(void*, fun)); + } + masm.storeCallInt32Result(temp1); + masm.PopRegsInMask(volatileRegs); + // Check if function returned non-zero for success or zero for failure. + masm.branchTest32(Assembler::Zero, temp1, temp1, BranchOrBacktrack(on_no_match)); - if (mode_ == ASCII) { - Label success, fail; + // On success, advance position by length of capture + if (read_backward) { + masm.subPtr(temp0, current_position); + } else { + masm.addPtr(temp0, current_position); + } + } else { + MOZ_ASSERT(mode_ == ASCII || !ignore_case); // Save register contents to make the registers available below. After - // this, the temp0, temp2, and current_position registers are available. + // this, the temp1, temp2, and current_position registers are available. masm.push(current_position); + // Make offset values into pointers masm.addPtr(input_end_pointer, current_character); // Start of capture. masm.addPtr(input_end_pointer, current_position); // Start of text to match against capture. - masm.addPtr(current_position, temp1); // End of text to match against capture. - - Label loop, loop_increment; - masm.bind(&loop); - masm.load8ZeroExtend(Address(current_position, 0), temp0); - masm.load8ZeroExtend(Address(current_character, 0), temp2); - masm.branch32(Assembler::Equal, temp0, temp2, &loop_increment); - - // Mismatch, try case-insensitive match (converting letters to lower-case). - masm.or32(Imm32(0x20), temp0); // Convert match character to lower-case. - - // Is temp0 a lowercase letter? - Label convert_capture; - masm.computeEffectiveAddress(Address(temp0, -'a'), temp2); - masm.branch32(Assembler::BelowOrEqual, temp2, Imm32(static_cast<int32_t>('z' - 'a')), - &convert_capture); - // Latin-1: Check for values in range [224,254] but not 247. - masm.sub32(Imm32(224 - 'a'), temp2); - masm.branch32(Assembler::Above, temp2, Imm32(254 - 224), &fail); - - // Check for 247. - masm.branch32(Assembler::Equal, temp2, Imm32(247 - 224), &fail); + if (read_backward) { + // Offset by length when matching backwards. + masm.subPtr(temp0, current_position); + } - masm.bind(&convert_capture); + // End of text to match against capture (temp0 is pointer now) + masm.addPtr(current_position, temp0); - // Also convert capture character. - masm.load8ZeroExtend(Address(current_character, 0), temp2); - masm.or32(Imm32(0x20), temp2); + Label success, fail, loop; + masm.bind(&loop); - masm.branch32(Assembler::NotEqual, temp0, temp2, &fail); + // Load next character from each string. + if (mode_ == ASCII) { + masm.load8ZeroExtend(Address(current_character, 0), temp1); + masm.load8ZeroExtend(Address(current_position, 0), temp2); + } else { + masm.load16ZeroExtend(Address(current_character, 0), temp1); + masm.load16ZeroExtend(Address(current_position, 0), temp2); + } - masm.bind(&loop_increment); + if (ignore_case) { + MOZ_ASSERT(mode_ == ASCII); + Label loop_increment, convert_match; + + // Try exact match. + masm.branch32(Assembler::Equal, temp1, temp2, &loop_increment); + + // Mismatch, try case-insensitive match (converting letters to lower-case). + masm.or32(Imm32(0x20), temp1); // Convert match character to lower-case. + + // Is temp1 a lowercase letter [a,z]? + masm.computeEffectiveAddress(Address(temp1, -'a'), temp2); + masm.branch32(Assembler::BelowOrEqual, temp2, Imm32(static_cast<int32_t>('z' - 'a')), + &convert_match); + // Latin-1: Check for values in range [224,254] but not 247 (U+00F7 DIVISION SIGN). + masm.sub32(Imm32(224 - 'a'), temp2); + masm.branch32(Assembler::Above, temp2, Imm32(254 - 224), &fail); + // Check for 247. + masm.branch32(Assembler::Equal, temp2, Imm32(247 - 224), &fail); + + // Capture character is lower case. Convert match character to lower case and compare + masm.bind(&convert_match); + // Reload latin1 character since temp2 was clobbered above + masm.load8ZeroExtend(Address(current_position, 0), temp2); + masm.or32(Imm32(0x20), temp2); + masm.branch32(Assembler::NotEqual, temp1, temp2, &fail); + + masm.bind(&loop_increment); + } else { + // Fail if characters do not match. + masm.branch32(Assembler::NotEqual, temp1, temp2, &fail); + } // Increment pointers into match and capture strings. - masm.addPtr(Imm32(1), current_character); - masm.addPtr(Imm32(1), current_position); + masm.addPtr(Imm32(char_size()), current_character); + masm.addPtr(Imm32(char_size()), current_position); - // Compare to end of match, and loop if not done. - masm.branchPtr(Assembler::Below, current_position, temp1, &loop); + // Loop if we have not reached the end of the match string. + masm.branchPtr(Assembler::Below, current_position, temp0, &loop); masm.jump(&success); - masm.bind(&fail); - // Restore original values before failing. + masm.bind(&fail); masm.pop(current_position); JumpOrBacktrack(on_no_match); masm.bind(&success); - // Drop original character position value. - masm.addToStackPtr(Imm32(sizeof(uintptr_t))); + masm.pop(temp0); - // Compute new value of character position after the matched part. + // current_position is a pointer (now at the end of the consumed characters). Convert it back to an offset. masm.subPtr(input_end_pointer, current_position); - } else { - MOZ_ASSERT(mode_ == CHAR16); - - // Note: temp1 needs to be saved/restored if it is volatile, as it is used after the call. - LiveGeneralRegisterSet volatileRegs(GeneralRegisterSet::Volatile()); - volatileRegs.takeUnchecked(temp0); - volatileRegs.takeUnchecked(temp2); - masm.PushRegsInMask(volatileRegs); - // Set byte_offset1. - // Start of capture, where current_character already holds string-end negative offset. - masm.addPtr(input_end_pointer, current_character); - - // Set byte_offset2. - // Found by adding negative string-end offset of current position - // to end of string. - masm.addPtr(input_end_pointer, current_position); - - // Parameters are - // Address byte_offset1 - Address captured substring's start. - // Address byte_offset2 - Address of current character position. - // size_t byte_length - length of capture in bytes(!) - masm.setupUnalignedABICall(temp0); - masm.passABIArg(current_character); - masm.passABIArg(current_position); - masm.passABIArg(temp1); - if (!unicode) { - int (*fun)(const char16_t*, const char16_t*, size_t) = CaseInsensitiveCompareStrings; - masm.callWithABI(JS_FUNC_TO_DATA_PTR(void*, fun)); - } else { - int (*fun)(const char16_t*, const char16_t*, size_t) = CaseInsensitiveCompareUCStrings; - masm.callWithABI(JS_FUNC_TO_DATA_PTR(void*, fun)); + if (read_backward) { + // Subtract match length if we matched backward + masm.addPtr(register_location(start_reg), current_position); + masm.subPtr(register_location(start_reg + 1), current_position); } - masm.storeCallInt32Result(temp0); + } - masm.PopRegsInMask(volatileRegs); + // Fallthrough if capture length was zero + masm.bind(&fallthrough); +} - // Check if function returned non-zero for success or zero for failure. - masm.branchTest32(Assembler::Zero, temp0, temp0, BranchOrBacktrack(on_no_match)); +void +NativeRegExpMacroAssembler::CheckNotBackReference(int start_reg, bool read_backward, Label* on_no_match) +{ + JitSpew(SPEW_PREFIX "CheckNotBackReference(%d)", start_reg); - // On success, increment position by length of capture. - masm.addPtr(temp1, current_position); - } + CheckNotBackReferenceImpl(start_reg, read_backward, on_no_match, /*unicode = */ false, /*ignore_case = */ false); +} - masm.bind(&fallthrough); +void +NativeRegExpMacroAssembler::CheckNotBackReferenceIgnoreCase(int start_reg, bool read_backward, + Label* on_no_match, bool unicode) +{ + JitSpew(SPEW_PREFIX "CheckNotBackReferenceIgnoreCase(%d, %d)", start_reg, unicode); + + CheckNotBackReferenceImpl(start_reg, read_backward, on_no_match, unicode, /*ignore_case = */ true); } void @@ -961,10 +943,13 @@ NativeRegExpMacroAssembler::LoadCurrentCharacter(int cp_offset, Label* on_end_of { JitSpew(SPEW_PREFIX "LoadCurrentCharacter(%d, %d)", cp_offset, characters); - MOZ_ASSERT(cp_offset >= -1); // ^ and \b can look behind one character. MOZ_ASSERT(cp_offset < (1<<30)); // Be sane! (And ensure negation works) if (check_bounds) - CheckPosition(cp_offset + characters - 1, on_end_of_input); + if (cp_offset >= 0) { + CheckPosition(cp_offset + characters - 1, on_end_of_input); + } else { + CheckPosition(cp_offset, on_end_of_input); + } LoadCurrentCharacterUnchecked(cp_offset, characters); } @@ -972,9 +957,8 @@ void NativeRegExpMacroAssembler::LoadCurrentCharacterUnchecked(int cp_offset, int characters) { JitSpew(SPEW_PREFIX "LoadCurrentCharacterUnchecked(%d, %d)", cp_offset, characters); - + BaseIndex address(input_end_pointer, current_position, TimesOne, cp_offset * char_size()); if (mode_ == ASCII) { - BaseIndex address(input_end_pointer, current_position, TimesOne, cp_offset); if (characters == 4) { masm.load32(address, current_character); } else if (characters == 2) { @@ -986,7 +970,6 @@ NativeRegExpMacroAssembler::LoadCurrentCharacterUnchecked(int cp_offset, int cha } else { MOZ_ASSERT(mode_ == CHAR16); MOZ_ASSERT(characters <= 2); - BaseIndex address(input_end_pointer, current_position, TimesOne, cp_offset * sizeof(char16_t)); if (characters == 2) masm.load32(address, current_character); else @@ -1096,10 +1079,11 @@ NativeRegExpMacroAssembler::CheckBacktrackStackLimit() masm.moveStackPtrTo(temp2); masm.call(&stack_overflow_label_); - masm.bind(&no_stack_overflow); // Exit with an exception if the call failed. masm.branchTest32(Assembler::Zero, temp0, temp0, &exit_with_exception_label_); + + masm.bind(&no_stack_overflow); } void @@ -1213,8 +1197,21 @@ void NativeRegExpMacroAssembler::CheckPosition(int cp_offset, Label* on_outside_input) { JitSpew(SPEW_PREFIX "CheckPosition(%d)", cp_offset); - masm.branchPtr(Assembler::GreaterThanOrEqual, current_position, - ImmWord(-cp_offset * char_size()), BranchOrBacktrack(on_outside_input)); + if (cp_offset >= 0) { + // end + current + offset >= end + // <=> current + offset >= 0 + // <=> current >= -offset + masm.branchPtr(Assembler::GreaterThanOrEqual, current_position, + ImmWord(-cp_offset * char_size()), BranchOrBacktrack(on_outside_input)); + } else { + // negative cp_offset means we're reading backwards, check against start of string + // Compute offset address + masm.computeEffectiveAddress(BaseIndex(input_end_pointer, current_position, TimesOne, cp_offset * char_size()), temp0); + + // Compare to start of input. + Address inputStart(masm.getStackPointer(), offsetof(FrameData, inputStart)); + masm.branchPtr(Assembler::GreaterThan, inputStart, temp0, BranchOrBacktrack(on_outside_input)); + } } Label* |