summaryrefslogtreecommitdiff
path: root/js/src/new-regexp/regexp-native-macro-assembler.cc
diff options
context:
space:
mode:
Diffstat (limited to 'js/src/new-regexp/regexp-native-macro-assembler.cc')
-rw-r--r--js/src/new-regexp/regexp-native-macro-assembler.cc1213
1 files changed, 0 insertions, 1213 deletions
diff --git a/js/src/new-regexp/regexp-native-macro-assembler.cc b/js/src/new-regexp/regexp-native-macro-assembler.cc
deleted file mode 100644
index 01453a937..000000000
--- a/js/src/new-regexp/regexp-native-macro-assembler.cc
+++ /dev/null
@@ -1,1213 +0,0 @@
-/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
- * vim: set ts=8 sts=2 et sw=2 tw=80:
- * This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-
-// Copyright 2020 the V8 project authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#include "jit/Linker.h"
-#include "gc/Zone.h"
-#include "new-regexp/regexp-macro-assembler-arch.h"
-#include "new-regexp/regexp-stack.h"
-#include "vm/MatchPairs.h"
-
-#include "jit/MacroAssembler-inl.h"
-
-using namespace js;
-using namespace js::irregexp;
-using namespace js::jit;
-
-namespace v8 {
-namespace internal {
-
-using js::MatchPairs;
-using js::jit::AbsoluteAddress;
-using js::jit::Address;
-using js::jit::AllocatableGeneralRegisterSet;
-using js::jit::Assembler;
-using js::jit::BaseIndex;
-using js::jit::CodeLocationLabel;
-using js::jit::GeneralRegisterBackwardIterator;
-using js::jit::GeneralRegisterForwardIterator;
-using js::jit::GeneralRegisterSet;
-using js::jit::Imm32;
-using js::jit::ImmPtr;
-using js::jit::ImmWord;
-using js::jit::JitCode;
-using js::jit::Linker;
-using js::jit::LiveGeneralRegisterSet;
-using js::jit::Register;
-using js::jit::Registers;
-using js::jit::StackMacroAssembler;
-
-SMRegExpMacroAssembler::SMRegExpMacroAssembler(JSContext* cx, Isolate* isolate,
- StackMacroAssembler& masm,
- Zone* zone, Mode mode,
- uint32_t num_capture_registers)
- : NativeRegExpMacroAssembler(isolate, zone),
- cx_(cx),
- masm_(masm),
- mode_(mode),
- num_registers_(num_capture_registers),
- num_capture_registers_(num_capture_registers) {
- // Each capture has a start and an end register
- MOZ_ASSERT(num_capture_registers_ % 2 == 0);
-
- AllocatableGeneralRegisterSet regs(GeneralRegisterSet::All());
-
- temp0_ = regs.takeAny();
- temp1_ = regs.takeAny();
- temp2_ = regs.takeAny();
- input_end_pointer_ = regs.takeAny();
- current_character_ = regs.takeAny();
- current_position_ = regs.takeAny();
- backtrack_stack_pointer_ = regs.takeAny();
- savedRegisters_ = js::jit::SavedNonVolatileRegisters(regs);
-
- masm_.jump(&entry_label_); // We'll generate the entry code later
- masm_.bind(&start_label_); // and continue from here.
-}
-
-int SMRegExpMacroAssembler::stack_limit_slack() {
- return RegExpStack::kStackLimitSlack;
-}
-
-void SMRegExpMacroAssembler::AdvanceCurrentPosition(int by) {
- if (by != 0) {
- masm_.addPtr(Imm32(by * char_size()), current_position_);
- }
-}
-
-void SMRegExpMacroAssembler::AdvanceRegister(int reg, int by) {
- MOZ_ASSERT(reg >= 0 && reg < num_registers_);
- if (by != 0) {
- masm_.addPtr(Imm32(by), register_location(reg));
- }
-}
-
-void SMRegExpMacroAssembler::Backtrack() {
- // Pop code location from backtrack stack and jump to location.
- Pop(temp0_);
- masm_.jump(temp0_);
-}
-
-void SMRegExpMacroAssembler::Bind(Label* label) {
- masm_.bind(label->inner());
- if (label->patchOffset_.bound()) {
- AddLabelPatch(label->patchOffset_, label->pos());
- }
-}
-
-// Check if current_position + cp_offset is the input start
-void SMRegExpMacroAssembler::CheckAtStartImpl(int cp_offset, Label* on_cond,
- Assembler::Condition cond) {
- Address addr(current_position_, cp_offset * char_size());
- masm_.computeEffectiveAddress(addr, temp0_);
-
- masm_.branchPtr(cond, inputStart(), temp0_,
- LabelOrBacktrack(on_cond));
-}
-
-void SMRegExpMacroAssembler::CheckAtStart(int cp_offset, Label* on_at_start) {
- CheckAtStartImpl(cp_offset, on_at_start, Assembler::Equal);
-}
-
-void SMRegExpMacroAssembler::CheckNotAtStart(int cp_offset,
- Label* on_not_at_start) {
- CheckAtStartImpl(cp_offset, on_not_at_start, Assembler::NotEqual);
-}
-
-void SMRegExpMacroAssembler::CheckCharacterImpl(Imm32 c, Label* on_cond,
- Assembler::Condition cond) {
- masm_.branch32(cond, current_character_, c, LabelOrBacktrack(on_cond));
-}
-
-void SMRegExpMacroAssembler::CheckCharacter(uint32_t c, Label* on_equal) {
- CheckCharacterImpl(Imm32(c), on_equal, Assembler::Equal);
-}
-
-void SMRegExpMacroAssembler::CheckNotCharacter(uint32_t c,
- Label* on_not_equal) {
- CheckCharacterImpl(Imm32(c), on_not_equal, Assembler::NotEqual);
-}
-
-void SMRegExpMacroAssembler::CheckCharacterGT(uc16 c, Label* on_greater) {
- CheckCharacterImpl(Imm32(c), on_greater, Assembler::GreaterThan);
-}
-
-void SMRegExpMacroAssembler::CheckCharacterLT(uc16 c, Label* on_less) {
- CheckCharacterImpl(Imm32(c), on_less, Assembler::LessThan);
-}
-
-// Bitwise-and the current character with mask and then check for a
-// match with c.
-void SMRegExpMacroAssembler::CheckCharacterAfterAndImpl(uint32_t c,
- uint32_t mask,
- Label* on_cond,
- bool is_not) {
- if (c == 0) {
- Assembler::Condition cond = is_not ? Assembler::NonZero : Assembler::Zero;
- masm_.branchTest32(cond, current_character_, Imm32(mask),
- LabelOrBacktrack(on_cond));
- } else {
- Assembler::Condition cond = is_not ? Assembler::NotEqual : Assembler::Equal;
- masm_.move32(Imm32(mask), temp0_);
- masm_.and32(current_character_, temp0_);
- masm_.branch32(cond, temp0_, Imm32(c), LabelOrBacktrack(on_cond));
- }
-}
-
-void SMRegExpMacroAssembler::CheckCharacterAfterAnd(uint32_t c,
- uint32_t mask,
- Label* on_equal) {
- CheckCharacterAfterAndImpl(c, mask, on_equal, /*is_not =*/false);
-}
-
-void SMRegExpMacroAssembler::CheckNotCharacterAfterAnd(uint32_t c,
- uint32_t mask,
- Label* on_not_equal) {
- CheckCharacterAfterAndImpl(c, mask, on_not_equal, /*is_not =*/true);
-}
-
-
-// Subtract minus from the current character, then bitwise-and the
-// result with mask, then check for a match with c.
-void SMRegExpMacroAssembler::CheckNotCharacterAfterMinusAnd(
- uc16 c, uc16 minus, uc16 mask, Label* on_not_equal) {
- masm_.computeEffectiveAddress(Address(current_character_, -minus), temp0_);
- if (c == 0) {
- masm_.branchTest32(Assembler::NonZero, temp0_, Imm32(mask),
- LabelOrBacktrack(on_not_equal));
- } else {
- masm_.and32(Imm32(mask), temp0_);
- masm_.branch32(Assembler::NotEqual, temp0_, Imm32(c),
- LabelOrBacktrack(on_not_equal));
- }
-}
-
-// If the current position matches the position stored on top of the backtrack
-// stack, pops the backtrack stack and branches to the given label.
-void SMRegExpMacroAssembler::CheckGreedyLoop(Label* on_equal) {
- js::jit::Label fallthrough;
- masm_.branchPtr(Assembler::NotEqual, Address(backtrack_stack_pointer_, 0),
- current_position_, &fallthrough);
- masm_.addPtr(Imm32(sizeof(void*)), backtrack_stack_pointer_); // Pop.
- JumpOrBacktrack(on_equal);
- masm_.bind(&fallthrough);
-}
-
-void SMRegExpMacroAssembler::CheckCharacterInRangeImpl(
- uc16 from, uc16 to, Label* on_cond, Assembler::Condition cond) {
- // x is in [from,to] if unsigned(x - from) <= to - from
- masm_.computeEffectiveAddress(Address(current_character_, -from), temp0_);
- masm_.branch32(cond, temp0_, Imm32(to - from), LabelOrBacktrack(on_cond));
-}
-
-void SMRegExpMacroAssembler::CheckCharacterInRange(uc16 from, uc16 to,
- Label* on_in_range) {
- CheckCharacterInRangeImpl(from, to, on_in_range, Assembler::BelowOrEqual);
-}
-
-void SMRegExpMacroAssembler::CheckCharacterNotInRange(uc16 from, uc16 to,
- Label* on_not_in_range) {
- CheckCharacterInRangeImpl(from, to, on_not_in_range, Assembler::Above);
-}
-
-void SMRegExpMacroAssembler::CheckBitInTable(Handle<ByteArray> table,
- Label* on_bit_set) {
- // Claim ownership of the ByteArray from the current HandleScope.
- // ByteArrays are allocated on the C++ heap and are (eventually)
- // owned by the RegExpShared.
- PseudoHandle<ByteArrayData> rawTable = table->takeOwnership(isolate());
-
- masm_.movePtr(ImmPtr(rawTable->data()), temp0_);
-
- masm_.move32(Imm32(kTableMask), temp1_);
- masm_.and32(current_character_, temp1_);
-
- masm_.load8ZeroExtend(BaseIndex(temp0_, temp1_, js::jit::TimesOne), temp0_);
- masm_.branchTest32(Assembler::NonZero, temp0_, temp0_,
- LabelOrBacktrack(on_bit_set));
-
- // Transfer ownership of |rawTable| to the |tables_| vector.
- AddTable(std::move(rawTable));
-}
-
-void SMRegExpMacroAssembler::CheckNotBackReferenceImpl(int start_reg,
- bool read_backward,
- Label* on_no_match,
- bool ignore_case) {
- js::jit::Label fallthrough;
-
- // Captures are stored as a sequential pair of registers.
- // Find the length of the back-referenced capture and load the
- // capture's start index into current_character_.
- masm_.loadPtr(register_location(start_reg), // index of start
- current_character_);
- masm_.loadPtr(register_location(start_reg + 1), temp0_); // index of end
- masm_.subPtr(current_character_, temp0_); // length of capture
-
- // Capture registers are either both set or both cleared.
- // If the capture length is zero, then the capture is either empty or cleared.
- // Fall through in both cases.
- masm_.branchPtr(Assembler::Equal, temp0_, ImmWord(0), &fallthrough);
-
- // Check that there are sufficient characters left in the input.
- if (read_backward) {
- // If start + len > current, there isn't enough room for a
- // lookbehind backreference.
- masm_.loadPtr(inputStart(), temp1_);
- masm_.addPtr(temp0_, temp1_);
- masm_.branchPtr(Assembler::GreaterThan, temp1_, current_position_,
- LabelOrBacktrack(on_no_match));
- } else {
- // current_position_ is the negative offset from the end.
- // If current + len > 0, there isn't enough room for a backreference.
- masm_.movePtr(current_position_, temp1_);
- masm_.addPtr(temp0_, temp1_);
- masm_.branchPtr(Assembler::GreaterThan, temp1_, ImmWord(0),
- LabelOrBacktrack(on_no_match));
- }
-
- if (mode_ == UC16 && ignore_case) {
- // We call a helper function for case-insensitive non-latin1 strings.
-
- // Save volatile regs. temp1_ and temp2_ don't need to be saved.
- LiveGeneralRegisterSet volatileRegs(GeneralRegisterSet::Volatile());
- volatileRegs.takeUnchecked(temp1_);
- volatileRegs.takeUnchecked(temp2_);
- masm_.PushRegsInMask(volatileRegs);
-
- // Parameters are
- // Address captured - Address of captured substring's start.
- // Address current - Address of current character position.
- // size_t byte_length - length of capture (in bytes)
-
- // Compute |captured|
- masm_.addPtr(input_end_pointer_, current_character_);
-
- // Compute |current|
- masm_.addPtr(input_end_pointer_, current_position_);
- if (read_backward) {
- // Offset by length when matching backwards.
- masm_.subPtr(temp0_, current_position_);
- }
-
- masm_.setupUnalignedABICall(temp1_);
- masm_.passABIArg(current_character_);
- masm_.passABIArg(current_position_);
- masm_.passABIArg(temp0_);
-
- bool unicode = true; // TODO: Fix V8 bug
- if (unicode) {
- uint32_t (*fun)(const char16_t*, const char16_t*, size_t) =
- CaseInsensitiveCompareUCStrings;
- masm_.callWithABI(JS_FUNC_TO_DATA_PTR(void*, fun));
- } else {
- uint32_t (*fun)(const char16_t*, const char16_t*, size_t) =
- CaseInsensitiveCompareStrings;
- masm_.callWithABI(JS_FUNC_TO_DATA_PTR(void*, fun));
- }
- masm_.storeCallInt32Result(temp1_);
- masm_.PopRegsInMask(volatileRegs);
- masm_.branchTest32(Assembler::Zero, temp1_, temp1_,
- LabelOrBacktrack(on_no_match));
-
- // On success, advance position by length of capture
- if (read_backward) {
- masm_.subPtr(temp0_, current_position_);
- } else {
- masm_.addPtr(temp0_, current_position_);
- }
-
- masm_.bind(&fallthrough);
- return;
- }
-
- // We will be modifying current_position_. Save it in case the match fails.
- masm_.push(current_position_);
-
- // Compute start of capture string
- masm_.addPtr(input_end_pointer_, current_character_);
-
- // Compute start of match string
- masm_.addPtr(input_end_pointer_, current_position_);
- if (read_backward) {
- // Offset by length when matching backwards.
- masm_.subPtr(temp0_, current_position_);
- }
-
- // Compute end of match string
- masm_.addPtr(current_position_, temp0_);
-
- js::jit::Label success;
- js::jit::Label fail;
- js::jit::Label loop;
- masm_.bind(&loop);
-
- // Load next character from each string.
- if (mode_ == LATIN1) {
- masm_.load8ZeroExtend(Address(current_character_, 0), temp1_);
- masm_.load8ZeroExtend(Address(current_position_, 0), temp2_);
- } else {
- masm_.load16ZeroExtend(Address(current_character_, 0), temp1_);
- masm_.load16ZeroExtend(Address(current_position_, 0), temp2_);
- }
-
- if (ignore_case) {
- MOZ_ASSERT(mode_ == LATIN1);
- // Try exact match.
- js::jit::Label loop_increment;
- masm_.branch32(Assembler::Equal, temp1_, temp2_, &loop_increment);
-
- // Mismatch. Try case-insensitive match.
- // Force the match character to lower case (by setting bit 0x20)
- // then check to see if it is a letter.
- js::jit::Label convert_capture;
- masm_.or32(Imm32(0x20), temp1_);
-
- // Check if it is in [a,z].
- masm_.computeEffectiveAddress(Address(temp1_, -'a'), temp2_);
- masm_.branch32(Assembler::BelowOrEqual, temp2_, Imm32('z' - 'a'),
- &convert_capture);
- // Check for values in range [224,254].
- // Exclude 247 (U+00F7 DIVISION SIGN).
- masm_.sub32(Imm32(224 - 'a'), temp2_);
- masm_.branch32(Assembler::Above, temp2_, Imm32(254 - 224), &fail);
- masm_.branch32(Assembler::Equal, temp2_, Imm32(247 - 224), &fail);
-
- // Match character is lower case. Convert capture character
- // to lower case and compare.
- masm_.bind(&convert_capture);
- masm_.load8ZeroExtend(Address(current_character_, 0), temp2_);
- masm_.or32(Imm32(0x20), temp2_);
- masm_.branch32(Assembler::NotEqual, temp1_, temp2_, &fail);
-
- masm_.bind(&loop_increment);
- } else {
- // Fail if characters do not match.
- masm_.branch32(Assembler::NotEqual, temp1_, temp2_, &fail);
- }
-
- // Increment pointers into match and capture strings.
- masm_.addPtr(Imm32(char_size()), current_character_);
- masm_.addPtr(Imm32(char_size()), current_position_);
-
- // Loop if we have not reached the end of the match string.
- masm_.branchPtr(Assembler::Below, current_position_, temp0_, &loop);
- masm_.jump(&success);
-
- // If we fail, restore current_position_ and branch.
- masm_.bind(&fail);
- masm_.pop(current_position_);
- JumpOrBacktrack(on_no_match);
-
- masm_.bind(&success);
-
- // current_position_ is a pointer. Convert it back to an offset.
- masm_.subPtr(input_end_pointer_, current_position_);
- if (read_backward) {
- // Subtract match length if we matched backward
- masm_.addPtr(register_location(start_reg), current_position_);
- masm_.subPtr(register_location(start_reg + 1), current_position_);
- }
-
- // Drop saved value of current_position_
- masm_.addToStackPtr(Imm32(sizeof(uintptr_t)));
-
- masm_.bind(&fallthrough);
-}
-
-// Branch if a back-reference does not match a previous capture.
-void SMRegExpMacroAssembler::CheckNotBackReference(int start_reg,
- bool read_backward,
- Label* on_no_match) {
- CheckNotBackReferenceImpl(start_reg, read_backward, on_no_match,
- /*ignore_case = */ false);
-}
-
-void SMRegExpMacroAssembler::CheckNotBackReferenceIgnoreCase(
- int start_reg, bool read_backward, Label* on_no_match) {
- CheckNotBackReferenceImpl(start_reg, read_backward, on_no_match,
- /*ignore_case = */ true);
-}
-
-// Checks whether the given offset from the current position is
-// inside the input string.
-void SMRegExpMacroAssembler::CheckPosition(int cp_offset,
- Label* on_outside_input) {
- // Note: current_position_ is a (negative) byte offset relative to
- // the end of the input string.
- if (cp_offset >= 0) {
- // end + current + offset >= end
- // <=> current + offset >= 0
- // <=> current >= -offset
- masm_.branchPtr(Assembler::GreaterThanOrEqual, current_position_,
- ImmWord(-cp_offset * char_size()),
- LabelOrBacktrack(on_outside_input));
- } else {
- // Compute offset position
- masm_.computeEffectiveAddress(
- Address(current_position_, cp_offset * char_size()), temp0_);
-
- // Compare to start of input.
- masm_.branchPtr(Assembler::GreaterThanOrEqual, inputStart(), temp0_,
- LabelOrBacktrack(on_outside_input));
- }
-}
-
-// This function attempts to generate special case code for character classes.
-// Returns true if a special case is generated.
-// Otherwise returns false and generates no code.
-bool SMRegExpMacroAssembler::CheckSpecialCharacterClass(uc16 type,
- Label* on_no_match) {
- js::jit::Label* no_match = LabelOrBacktrack(on_no_match);
-
- // Note: throughout this function, range checks (c in [min, max])
- // are implemented by an unsigned (c - min) <= (max - min) check.
- switch (type) {
- case 's': {
- // Match space-characters
- if (mode_ != LATIN1) {
- return false;
- }
- js::jit::Label success;
- // One byte space characters are ' ', '\t'..'\r', and '\u00a0' (NBSP).
-
- // Check ' '
- masm_.branch32(Assembler::Equal, current_character_, Imm32(' '),
- &success);
-
- // Check '\t'..'\r'
- masm_.computeEffectiveAddress(Address(current_character_, -'\t'),
- temp0_);
- masm_.branch32(Assembler::BelowOrEqual, temp0_, Imm32('\r' - '\t'),
- &success);
-
- // Check \u00a0.
- masm_.branch32(Assembler::NotEqual, temp0_, Imm32(0x00a0 - '\t'),
- no_match);
-
- masm_.bind(&success);
- return true;
- }
- case 'S':
- // The emitted code for generic character classes is good enough.
- return false;
- case 'd':
- // Match latin1 digits ('0'-'9')
- masm_.computeEffectiveAddress(Address(current_character_, -'0'), temp0_);
- masm_.branch32(Assembler::Above, temp0_, Imm32('9' - '0'), no_match);
- return true;
- case 'D':
- // Match anything except latin1 digits ('0'-'9')
- masm_.computeEffectiveAddress(Address(current_character_, -'0'), temp0_);
- masm_.branch32(Assembler::BelowOrEqual, temp0_, Imm32('9' - '0'),
- no_match);
- return true;
- case '.':
- // Match non-newlines. This excludes '\n' (0x0a), '\r' (0x0d),
- // U+2028 LINE SEPARATOR, and U+2029 PARAGRAPH SEPARATOR.
- // See https://tc39.es/ecma262/#prod-LineTerminator
-
- // To test for 0x0a and 0x0d efficiently, we XOR the input with 1.
- // This converts 0x0a to 0x0b, and 0x0d to 0x0c, allowing us to
- // test for the contiguous range 0x0b..0x0c.
- masm_.move32(current_character_, temp0_);
- masm_.xor32(Imm32(0x01), temp0_);
- masm_.sub32(Imm32(0x0b), temp0_);
- masm_.branch32(Assembler::BelowOrEqual, temp0_, Imm32(0x0c - 0x0b),
- no_match);
-
- if (mode_ == UC16) {
- // Compare original value to 0x2028 and 0x2029, using the already
- // computed (current_char ^ 0x01 - 0x0b). I.e., check for
- // 0x201d (0x2028 - 0x0b) or 0x201e.
- masm_.sub32(Imm32(0x2028 - 0x0b), temp0_);
- masm_.branch32(Assembler::BelowOrEqual, temp0_, Imm32(0x2029 - 0x2028),
- no_match);
- }
- return true;
- case 'w':
- // \w matches the set of 63 characters defined in Runtime Semantics:
- // WordCharacters. We use a static lookup table, which is defined in
- // regexp-macro-assembler.cc.
- // Note: if both Unicode and IgnoreCase are true, \w matches a
- // larger set of characters. That case is handled elsewhere.
- if (mode_ != LATIN1) {
- masm_.branch32(Assembler::Above, current_character_, Imm32('z'),
- no_match);
- }
- static_assert(arraysize(word_character_map) > unibrow::Latin1::kMaxChar,
- "regex: arraysize(word_character_map) > unibrow::Latin1::kMaxChar");
- masm_.movePtr(ImmPtr(word_character_map), temp0_);
- masm_.load8ZeroExtend(
- BaseIndex(temp0_, current_character_, js::jit::TimesOne), temp0_);
- masm_.branchTest32(Assembler::Zero, temp0_, temp0_, no_match);
- return true;
- case 'W': {
- // See 'w' above.
- js::jit::Label done;
- if (mode_ != LATIN1) {
- masm_.branch32(Assembler::Above, current_character_, Imm32('z'), &done);
- }
- static_assert(arraysize(word_character_map) > unibrow::Latin1::kMaxChar,
- "regex: arraysize(word_character_map) > unibrow::Latin1::kMaxChar");
- masm_.movePtr(ImmPtr(word_character_map), temp0_);
- masm_.load8ZeroExtend(
- BaseIndex(temp0_, current_character_, js::jit::TimesOne), temp0_);
- masm_.branchTest32(Assembler::NonZero, temp0_, temp0_, no_match);
- if (mode_ != LATIN1) {
- masm_.bind(&done);
- }
- return true;
- }
- ////////////////////////////////////////////////////////////////////////
- // Non-standard classes (with no syntactic shorthand) used internally //
- ////////////////////////////////////////////////////////////////////////
- case '*':
- // Match any character
- return true;
- case 'n':
- // Match newlines. The opposite of '.'. See '.' above.
- masm_.move32(current_character_, temp0_);
- masm_.xor32(Imm32(0x01), temp0_);
- masm_.sub32(Imm32(0x0b), temp0_);
- if (mode_ == LATIN1) {
- masm_.branch32(Assembler::Above, temp0_, Imm32(0x0c - 0x0b), no_match);
- } else {
- MOZ_ASSERT(mode_ == UC16);
- js::jit::Label done;
- masm_.branch32(Assembler::BelowOrEqual, temp0_, Imm32(0x0c - 0x0b),
- &done);
-
- // Compare original value to 0x2028 and 0x2029, using the already
- // computed (current_char ^ 0x01 - 0x0b). I.e., check for
- // 0x201d (0x2028 - 0x0b) or 0x201e.
- masm_.sub32(Imm32(0x2028 - 0x0b), temp0_);
- masm_.branch32(Assembler::Above, temp0_, Imm32(0x2029 - 0x2028),
- no_match);
- masm_.bind(&done);
- }
- return true;
-
- // No custom implementation
- default:
- return false;
- }
-}
-
-void SMRegExpMacroAssembler::Fail() {
- masm_.movePtr(ImmWord(js::RegExpRunStatus_Success_NotFound), temp0_);
- masm_.jump(&exit_label_);
-}
-
-void SMRegExpMacroAssembler::GoTo(Label* to) {
- masm_.jump(LabelOrBacktrack(to));
-}
-
-void SMRegExpMacroAssembler::IfRegisterGE(int reg, int comparand,
- Label* if_ge) {
- masm_.branchPtr(Assembler::GreaterThanOrEqual, register_location(reg),
- ImmWord(comparand), LabelOrBacktrack(if_ge));
-}
-
-void SMRegExpMacroAssembler::IfRegisterLT(int reg, int comparand,
- Label* if_lt) {
- masm_.branchPtr(Assembler::LessThan, register_location(reg),
- ImmWord(comparand), LabelOrBacktrack(if_lt));
-}
-
-void SMRegExpMacroAssembler::IfRegisterEqPos(int reg, Label* if_eq) {
- masm_.branchPtr(Assembler::Equal, register_location(reg), current_position_,
- LabelOrBacktrack(if_eq));
-}
-
-// This is a word-for-word identical copy of the V8 code, which is
-// duplicated in at least nine different places in V8 (one per
-// supported architecture) with no differences outside of comments and
-// formatting. It should be hoisted into the superclass. Once that is
-// done upstream, this version can be deleted.
-void SMRegExpMacroAssembler::LoadCurrentCharacterImpl(int cp_offset,
- Label* on_end_of_input,
- bool check_bounds,
- int characters,
- int eats_at_least) {
- // It's possible to preload a small number of characters when each success
- // path requires a large number of characters, but not the reverse.
- MOZ_ASSERT(eats_at_least >= characters);
- MOZ_ASSERT(cp_offset < (1 << 30)); // Be sane! (And ensure negation works)
-
- if (check_bounds) {
- if (cp_offset >= 0) {
- CheckPosition(cp_offset + eats_at_least - 1, on_end_of_input);
- } else {
- CheckPosition(cp_offset, on_end_of_input);
- }
- }
- LoadCurrentCharacterUnchecked(cp_offset, characters);
-}
-
-// Load the character (or characters) at the specified offset from the
-// current position. Zero-extend to 32 bits.
-void SMRegExpMacroAssembler::LoadCurrentCharacterUnchecked(int cp_offset,
- int characters) {
- BaseIndex address(input_end_pointer_, current_position_, js::jit::TimesOne,
- cp_offset * char_size());
- if (mode_ == LATIN1) {
- if (characters == 4) {
- masm_.load32(address, current_character_);
- } else if (characters == 2) {
- masm_.load16ZeroExtend(address, current_character_);
- } else {
- MOZ_ASSERT(characters == 1);
- masm_.load8ZeroExtend(address, current_character_);
- }
- } else {
- MOZ_ASSERT(mode_ == UC16);
- if (characters == 2) {
- masm_.load32(address, current_character_);
- } else {
- MOZ_ASSERT(characters == 1);
- masm_.load16ZeroExtend(address, current_character_);
- }
- }
-}
-
-void SMRegExpMacroAssembler::PopCurrentPosition() { Pop(current_position_); }
-
-void SMRegExpMacroAssembler::PopRegister(int register_index) {
- Pop(temp0_);
- masm_.storePtr(temp0_, register_location(register_index));
-}
-
-void SMRegExpMacroAssembler::PushBacktrack(Label* label) {
- MOZ_ASSERT(!label->is_bound());
- MOZ_ASSERT(!label->patchOffset_.bound());
- label->patchOffset_ = masm_.movWithPatch(ImmPtr(nullptr), temp0_);
- MOZ_ASSERT(label->patchOffset_.bound());
-
- Push(temp0_);
-
- CheckBacktrackStackLimit();
-}
-
-void SMRegExpMacroAssembler::PushCurrentPosition() { Push(current_position_); }
-
-void SMRegExpMacroAssembler::PushRegister(int register_index,
- StackCheckFlag check_stack_limit) {
- masm_.loadPtr(register_location(register_index), temp0_);
- Push(temp0_);
- if (check_stack_limit) {
- CheckBacktrackStackLimit();
- }
-}
-
-void SMRegExpMacroAssembler::ReadCurrentPositionFromRegister(int reg) {
- masm_.loadPtr(register_location(reg), current_position_);
-}
-
-void SMRegExpMacroAssembler::WriteCurrentPositionToRegister(int reg,
- int cp_offset) {
- if (cp_offset == 0) {
- masm_.storePtr(current_position_, register_location(reg));
- } else {
- Address addr(current_position_, cp_offset * char_size());
- masm_.computeEffectiveAddress(addr, temp0_);
- masm_.storePtr(temp0_, register_location(reg));
- }
-}
-
-// Note: The backtrack stack pointer is stored in a register as an
-// offset from the stack top, not as a bare pointer, so that it is not
-// corrupted if the backtrack stack grows (and therefore moves).
-void SMRegExpMacroAssembler::ReadStackPointerFromRegister(int reg) {
- masm_.loadPtr(register_location(reg), backtrack_stack_pointer_);
- masm_.addPtr(backtrackStackBase(), backtrack_stack_pointer_);
-}
-void SMRegExpMacroAssembler::WriteStackPointerToRegister(int reg) {
- masm_.movePtr(backtrack_stack_pointer_, temp0_);
- masm_.subPtr(backtrackStackBase(), temp0_);
- masm_.storePtr(temp0_, register_location(reg));
-}
-
-// When matching a regexp that is anchored at the end, this operation
-// is used to try skipping the beginning of long strings. If the
-// maximum length of a match is less than the length of the string, we
-// can skip the initial len - max_len bytes.
-void SMRegExpMacroAssembler::SetCurrentPositionFromEnd(int by) {
- js::jit::Label after_position;
- masm_.branchPtr(Assembler::GreaterThanOrEqual, current_position_,
- ImmWord(-by * char_size()), &after_position);
- masm_.movePtr(ImmWord(-by * char_size()), current_position_);
-
- // On RegExp code entry (where this operation is used), the character before
- // the current position is expected to be already loaded.
- // We have advanced the position, so it's safe to read backwards.
- LoadCurrentCharacterUnchecked(-1, 1);
- masm_.bind(&after_position);
-}
-
-void SMRegExpMacroAssembler::SetRegister(int register_index, int to) {
- MOZ_ASSERT(register_index >= num_capture_registers_);
- masm_.storePtr(ImmWord(to), register_location(register_index));
-}
-
-// Returns true if a regexp match can be restarted (aka the regexp is global).
-// The return value is not used anywhere, but we implement it to be safe.
-bool SMRegExpMacroAssembler::Succeed() {
- masm_.jump(&success_label_);
- return global();
-}
-
-// Capture registers are initialized to input[-1]
-void SMRegExpMacroAssembler::ClearRegisters(int reg_from, int reg_to) {
- MOZ_ASSERT(reg_from <= reg_to);
- masm_.loadPtr(inputStart(), temp0_);
- masm_.subPtr(Imm32(char_size()), temp0_);
- for (int reg = reg_from; reg <= reg_to; reg++) {
- masm_.storePtr(temp0_, register_location(reg));
- }
-}
-
-void SMRegExpMacroAssembler::Push(Register source) {
- MOZ_ASSERT(source != backtrack_stack_pointer_);
-
- masm_.subPtr(Imm32(sizeof(void*)), backtrack_stack_pointer_);
- masm_.storePtr(source, Address(backtrack_stack_pointer_, 0));
-}
-
-void SMRegExpMacroAssembler::Pop(Register target) {
- MOZ_ASSERT(target != backtrack_stack_pointer_);
-
- masm_.loadPtr(Address(backtrack_stack_pointer_, 0), target);
- masm_.addPtr(Imm32(sizeof(void*)), backtrack_stack_pointer_);
-}
-
-void SMRegExpMacroAssembler::JumpOrBacktrack(Label* to) {
- if (to) {
- masm_.jump(to->inner());
- } else {
- Backtrack();
- }
-}
-
-// Generate a quick inline test for backtrack stack overflow.
-// If the test fails, call an OOL handler to try growing the stack.
-void SMRegExpMacroAssembler::CheckBacktrackStackLimit() {
- js::jit::Label no_stack_overflow;
- masm_.branchPtr(
- Assembler::BelowOrEqual,
- AbsoluteAddress(isolate()->regexp_stack()->limit_address_address()),
- backtrack_stack_pointer_, &no_stack_overflow);
-
- masm_.call(&stack_overflow_label_);
-
- // Exit with an exception if the call failed
- masm_.branchTest32(Assembler::Zero, temp0_, temp0_,
- &exit_with_exception_label_);
-
- masm_.bind(&no_stack_overflow);
-}
-
-// This is used to sneak an OOM through the V8 layer.
-static Handle<HeapObject> DummyCode() {
- return Handle<HeapObject>::fromHandleValue(JS::UndefinedHandleValue);
-}
-
-// Finalize code. This is called last, so that we know how many
-// registers we need.
-Handle<HeapObject> SMRegExpMacroAssembler::GetCode(Handle<String> source) {
- if (!cx_->compartment()->ensureJitCompartmentExists(cx_)) {
- return DummyCode();
- }
-
- masm_.bind(&entry_label_);
-
- createStackFrame();
- initFrameAndRegs();
-
- masm_.jump(&start_label_);
-
- successHandler();
- exitHandler();
- backtrackHandler();
- stackOverflowHandler();
-
- Linker linker(masm_);
- JitCode* code = linker.newCode<NoGC>(cx_, REGEXP_CODE);
- if (!code) {
- ReportOutOfMemory(cx_);
- return DummyCode();
- }
-
- for (LabelPatch& lp : labelPatches_) {
- Assembler::PatchDataWithValueCheck(CodeLocationLabel(code, lp.patchOffset_),
- ImmPtr(code->raw() + lp.labelOffset_),
- ImmPtr(nullptr));
- }
-
- return Handle<HeapObject>(JS::PrivateGCThingValue(code), isolate());
-}
-
-/*
- * The stack will have the following structure:
- * sp-> - FrameData
- * - inputStart
- * - backtrack stack base
- * - matches
- * - numMatches
- * - Registers
- * - Capture positions
- * - Scratch registers
- * --- frame alignment ---
- * - Saved register area
- * - Return address
- */
-void SMRegExpMacroAssembler::createStackFrame() {
-#ifdef JS_CODEGEN_ARM64
- // ARM64 communicates stack address via SP, but uses a pseudo-sp (PSP) for
- // addressing. The register we use for PSP may however also be used by
- // calling code, and it is nonvolatile, so save it. Do this as a special
- // case first because the generic save/restore code needs the PSP to be
- // initialized already.
- MOZ_ASSERT(js::jit::PseudoStackPointer64.Is(masm_.GetStackPointer64()));
- masm_.Str(js::jit::PseudoStackPointer64,
- vixl::MemOperand(js::jit::sp, -16, vixl::PreIndex));
-
- // Initialize the PSP from the SP.
- masm_.initPseudoStackPtr();
-#endif
-
- // Push non-volatile registers which might be modified by jitcode.
- size_t pushedNonVolatileRegisters = 0;
- for (GeneralRegisterForwardIterator iter(savedRegisters_); iter.more();
- ++iter) {
- masm_.Push(*iter);
- pushedNonVolatileRegisters++;
- }
-
- // The pointer to InputOutputData is passed as the first argument.
- // On x86 we have to load it off the stack into temp0_.
- // On other platforms it is already in a register.
-#ifdef JS_CODEGEN_X86
- Address ioDataAddr(masm_.getStackPointer(),
- (pushedNonVolatileRegisters + 1) * sizeof(void*));
- masm_.loadPtr(ioDataAddr, temp0_);
-#else
- if (js::jit::IntArgReg0 != temp0_) {
- masm_.movePtr(js::jit::IntArgReg0, temp0_);
- }
-#endif
-
- // Start a new stack frame.
- size_t frameBytes = sizeof(FrameData) + num_registers_ * sizeof(void*);
- frameSize_ = js::jit::StackDecrementForCall(js::jit::ABIStackAlignment,
- masm_.framePushed(), frameBytes);
- masm_.reserveStack(frameSize_);
- masm_.checkStackAlignment();
-
- // Check if we have space on the stack. Use the *NoInterrupt stack limit to
- // avoid failing repeatedly when the regex code is called from Ion JIT code.
- // (See bug 1208819)
- js::jit::Label stack_ok;
- AbsoluteAddress limit_addr(cx_->addressOfJitStackLimitNoInterrupt());
- masm_.branchStackPtrRhs(Assembler::Below, limit_addr, &stack_ok);
-
- // There is not enough space on the stack. Exit with an exception.
- masm_.movePtr(ImmWord(js::RegExpRunStatus_Error), temp0_);
- masm_.jump(&exit_label_);
-
- masm_.bind(&stack_ok);
-}
-
-void SMRegExpMacroAssembler::initFrameAndRegs() {
- // At this point, an uninitialized stack frame has been created,
- // and the address of the InputOutputData is in temp0_.
- Register ioDataReg = temp0_;
-
- Register matchesReg = temp1_;
- masm_.loadPtr(Address(ioDataReg, offsetof(InputOutputData, matches)),
- matchesReg);
-
- // Initialize output registers
- masm_.loadPtr(Address(matchesReg, MatchPairs::offsetOfPairs()), temp2_);
- masm_.storePtr(temp2_, matches());
- masm_.load32(Address(matchesReg, MatchPairs::offsetOfPairCount()), temp2_);
- masm_.store32(temp2_, numMatches());
-
-#ifdef DEBUG
- // Bounds-check numMatches.
- js::jit::Label enoughRegisters;
- masm_.branchPtr(Assembler::GreaterThanOrEqual, temp2_,
- ImmWord(num_capture_registers_ / 2), &enoughRegisters);
- masm_.assumeUnreachable("Not enough output pairs for RegExp");
- masm_.bind(&enoughRegisters);
-#endif
-
- // Load input start pointer.
- masm_.loadPtr(Address(ioDataReg, offsetof(InputOutputData, inputStart)),
- current_position_);
-
- // Load input end pointer
- masm_.loadPtr(Address(ioDataReg, offsetof(InputOutputData, inputEnd)),
- input_end_pointer_);
-
- // Set up input position to be negative offset from string end.
- masm_.subPtr(input_end_pointer_, current_position_);
-
- // Store inputStart
- masm_.storePtr(current_position_, inputStart());
-
- // Load start index
- Register startIndexReg = temp1_;
- masm_.loadPtr(Address(ioDataReg, offsetof(InputOutputData, startIndex)),
- startIndexReg);
- masm_.computeEffectiveAddress(
- BaseIndex(current_position_, startIndexReg, factor()), current_position_);
-
- // Initialize current_character_.
- // Load newline if index is at start, or previous character otherwise.
- js::jit::Label start_regexp;
- js::jit::Label load_previous_character;
- masm_.branchPtr(Assembler::NotEqual, startIndexReg, ImmWord(0),
- &load_previous_character);
- masm_.movePtr(ImmWord('\n'), current_character_);
- masm_.jump(&start_regexp);
-
- masm_.bind(&load_previous_character);
- LoadCurrentCharacterUnchecked(-1, 1);
- masm_.bind(&start_regexp);
-
- // Initialize captured registers with inputStart - 1
- MOZ_ASSERT(num_capture_registers_ > 0);
- Register inputStartMinusOneReg = temp2_;
- masm_.loadPtr(inputStart(), inputStartMinusOneReg);
- masm_.subPtr(Imm32(char_size()), inputStartMinusOneReg);
- if (num_capture_registers_ > 8) {
- masm_.movePtr(ImmWord(register_offset(0)), temp1_);
- js::jit::Label init_loop;
- masm_.bind(&init_loop);
- masm_.storePtr(inputStartMinusOneReg, BaseIndex(masm_.getStackPointer(),
- temp1_, js::jit::TimesOne));
- masm_.addPtr(ImmWord(sizeof(void*)), temp1_);
- masm_.branchPtr(Assembler::LessThan, temp1_,
- ImmWord(register_offset(num_capture_registers_)),
- &init_loop);
- } else {
- // Unroll the loop
- for (int i = 0; i < num_capture_registers_; i++) {
- masm_.storePtr(inputStartMinusOneReg, register_location(i));
- }
- }
-
- // Initialize backtrack stack pointer
- masm_.loadPtr(AbsoluteAddress(isolate()->top_of_regexp_stack()),
- backtrack_stack_pointer_);
- masm_.storePtr(backtrack_stack_pointer_, backtrackStackBase());
-}
-
-void SMRegExpMacroAssembler::successHandler() {
- MOZ_ASSERT(success_label_.used());
- masm_.bind(&success_label_);
-
- // Copy captures to the MatchPairs pointed to by the InputOutputData.
- // Captures are stored as positions, which are negative byte offsets
- // from the end of the string. We must convert them to actual
- // indices.
- //
- // Index: [ 0 ][ 1 ][ 2 ][ 3 ][ 4 ][ 5 ][END]
- // Pos (1-byte): [-6 ][-5 ][-4 ][-3 ][-2 ][-1 ][ 0 ] // IS = -6
- // Pos (2-byte): [-12][-10][-8 ][-6 ][-4 ][-2 ][ 0 ] // IS = -12
- //
- // To convert a position to an index, we subtract InputStart, and
- // divide the result by char_size.
- Register matchesReg = temp1_;
- masm_.loadPtr(matches(), matchesReg);
-
- Register inputStartReg = temp2_;
- masm_.loadPtr(inputStart(), inputStartReg);
-
- for (int i = 0; i < num_capture_registers_; i++) {
- masm_.loadPtr(register_location(i), temp0_);
- masm_.subPtr(inputStartReg, temp0_);
- if (mode_ == UC16) {
- masm_.rshiftPtrArithmetic(Imm32(1), temp0_);
- }
- masm_.store32(temp0_, Address(matchesReg, i * sizeof(int32_t)));
- }
-
- masm_.movePtr(ImmWord(js::RegExpRunStatus_Success), temp0_);
- // This falls through to the exit handler.
-}
-
-void SMRegExpMacroAssembler::exitHandler() {
- masm_.bind(&exit_label_);
-
- if (temp0_ != js::jit::ReturnReg) {
- masm_.movePtr(temp0_, js::jit::ReturnReg);
- }
-
- masm_.freeStack(frameSize_);
-
- // Restore registers which were saved on entry
- for (GeneralRegisterBackwardIterator iter(savedRegisters_); iter.more();
- ++iter) {
- masm_.Pop(*iter);
- }
-
-#ifdef JS_CODEGEN_ARM64
- // Now restore the value that was in the PSP register on entry, and return.
-
- // Obtain the correct SP from the PSP.
- masm_.Mov(js::jit::sp, js::jit::PseudoStackPointer64);
-
- // Restore the saved value of the PSP register, this value is whatever the
- // caller had saved in it, not any actual SP value, and it must not be
- // overwritten subsequently.
- masm_.Ldr(js::jit::PseudoStackPointer64,
- vixl::MemOperand(js::jit::sp, 16, vixl::PostIndex));
-
- // Perform a plain Ret(), as abiret() will move SP <- PSP and that is wrong.
- masm_.Ret(vixl::lr);
-#else
- masm_.abiret();
-#endif
-
- if (exit_with_exception_label_.used()) {
- masm_.bind(&exit_with_exception_label_);
-
- // Exit with an error result to signal thrown exception
- masm_.movePtr(ImmWord(js::RegExpRunStatus_Error), temp0_);
- masm_.jump(&exit_label_);
- }
-}
-
-void SMRegExpMacroAssembler::backtrackHandler() {
- if (!backtrack_label_.used()) {
- return;
- }
- masm_.bind(&backtrack_label_);
- Backtrack();
-}
-
-void SMRegExpMacroAssembler::stackOverflowHandler() {
- if (!stack_overflow_label_.used()) {
- return;
- }
-
- // Called if the backtrack-stack limit has been hit.
- // NOTE: depending on architecture, the call may have
- // changed the stack pointer. We adjust for that below.
- masm_.bind(&stack_overflow_label_);
-
- // Load argument
- masm_.movePtr(ImmPtr(isolate()->regexp_stack()), temp1_);
-
- // Save registers before calling C function
- LiveGeneralRegisterSet volatileRegs(GeneralRegisterSet::Volatile());
-
-#ifdef JS_USE_LINK_REGISTER
- masm.pushReturnAddress();
-#endif
-
- // Adjust for the return address on the stack.
- size_t frameOffset = sizeof(void*);
-
- volatileRegs.takeUnchecked(temp0_);
- volatileRegs.takeUnchecked(temp1_);
- masm_.PushRegsInMask(volatileRegs);
-
- masm_.setupUnalignedABICall(temp0_);
- masm_.passABIArg(temp1_);
- masm_.callWithABI(JS_FUNC_TO_DATA_PTR(void*, GrowBacktrackStack));
- masm_.storeCallBoolResult(temp0_);
-
- masm_.PopRegsInMask(volatileRegs);
-
- // If GrowBacktrackStack returned false, we have failed to grow the
- // stack, and must exit with a stack-overflow exception. Do this in
- // the caller so that the stack is adjusted by our return instruction.
- js::jit::Label overflow_return;
- masm_.branchTest32(Assembler::Zero, temp0_, temp0_, &overflow_return);
-
- // Otherwise, store the new backtrack stack base and recompute the new
- // top of the stack.
- Address bsbAddress(masm_.getStackPointer(),
- offsetof(FrameData, backtrackStackBase) + frameOffset);
- masm_.subPtr(bsbAddress, backtrack_stack_pointer_);
-
- masm_.loadPtr(AbsoluteAddress(isolate()->top_of_regexp_stack()), temp1_);
- masm_.storePtr(temp1_, bsbAddress);
- masm_.addPtr(temp1_, backtrack_stack_pointer_);
-
- // Resume execution in calling code.
- masm_.bind(&overflow_return);
- masm_.ret();
-}
-
-// This is only used by tracing code.
-// The return value doesn't matter.
-RegExpMacroAssembler::IrregexpImplementation
-SMRegExpMacroAssembler::Implementation() {
- return kBytecodeImplementation;
-}
-
-/*static */
-uint32_t SMRegExpMacroAssembler::CaseInsensitiveCompareStrings(
- const char16_t* substring1, const char16_t* substring2, size_t byteLength) {
- JS::AutoCheckCannotGC nogc;
-
- MOZ_ASSERT(byteLength % sizeof(char16_t) == 0);
- size_t length = byteLength / sizeof(char16_t);
-
- for (size_t i = 0; i < length; i++) {
- char16_t c1 = substring1[i];
- char16_t c2 = substring2[i];
- if (c1 != c2) {
- c1 = js::unicode::ToUpperCase(c1);
- c2 = js::unicode::ToUpperCase(c2);
- if (c1 != c2) {
- return 0;
- }
- }
- }
-
- return 1;
-}
-
-/*static */
-uint32_t SMRegExpMacroAssembler::CaseInsensitiveCompareUCStrings(
- const char16_t* substring1, const char16_t* substring2, size_t byteLength) {
- JS::AutoCheckCannotGC nogc;
-
- MOZ_ASSERT(byteLength % sizeof(char16_t) == 0);
- size_t length = byteLength / sizeof(char16_t);
-
- for (size_t i = 0; i < length; i++) {
- char16_t c1 = substring1[i];
- char16_t c2 = substring2[i];
- if (c1 != c2) {
- c1 = js::unicode::FoldCase(c1);
- c2 = js::unicode::FoldCase(c2);
- if (c1 != c2) {
- return 0;
- }
- }
- }
-
- return 1;
-}
-
-/* static */
-bool SMRegExpMacroAssembler::GrowBacktrackStack(RegExpStack* regexp_stack) {
- JS::AutoCheckCannotGC nogc;
- size_t size = regexp_stack->stack_capacity();
- return !!regexp_stack->EnsureCapacity(size * 2);
-}
-
-} // namespace internal
-} // namespace v8