summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatt A. Tobin <email@mattatobin.com>2020-11-09 20:44:26 -0500
committerMatt A. Tobin <email@mattatobin.com>2020-11-09 20:44:26 -0500
commitc732425e1a76ffbff74b42d777e3e00bc89be56e (patch)
tree0b856fa77d052715e1da6ea14efa6194ade39dd7
parent51468e998c8e7191ddecacec3944c806b29dd590 (diff)
downloaduxp-c732425e1a76ffbff74b42d777e3e00bc89be56e.tar.gz
Issue #1677 - Part 6: Implement NativeRegExpMacroAssembler for new regexp import based on irregexp/NativeRegExpMacroAssembler.cpp
-rw-r--r--js/src/regexp/RegExpTypes.h51
-rw-r--r--js/src/regexp/moz.build1
-rw-r--r--js/src/regexp/regexp-macro-assembler-arch.h279
-rw-r--r--js/src/regexp/regexp-native-macro-assembler.cc1215
-rw-r--r--js/src/regexp/regexp-shim.cc7
-rw-r--r--js/src/regexp/regexp-shim.h32
6 files changed, 1572 insertions, 13 deletions
diff --git a/js/src/regexp/RegExpTypes.h b/js/src/regexp/RegExpTypes.h
new file mode 100644
index 0000000000..e260b5bb6d
--- /dev/null
+++ b/js/src/regexp/RegExpTypes.h
@@ -0,0 +1,51 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * vim: set ts=8 sts=2 et sw=2 tw=80:
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+// This file forward-defines Irregexp classes that need to be visible
+// to the rest of Spidermonkey and re-exports them into js::irregexp.
+
+#ifndef regexp_RegExpTypes_h
+#define regexp_RegExpTypes_h
+
+namespace js {
+class MatchPairs;
+}
+
+namespace v8 {
+namespace internal {
+
+struct InputOutputData {
+ const void* inputStart;
+ const void* inputEnd;
+
+ // Index into inputStart (in chars) at which to begin matching.
+ size_t startIndex;
+
+ js::MatchPairs* matches;
+
+ template <typename CharT>
+ InputOutputData(const CharT* inputStart, const CharT* inputEnd,
+ size_t startIndex, js::MatchPairs* matches)
+ : inputStart(inputStart),
+ inputEnd(inputEnd),
+ startIndex(startIndex),
+ matches(matches)
+ {}
+};
+
+} // namespace internal
+} // namespace v8
+
+
+namespace js {
+namespace irregexp {
+
+using InputOutputData = v8::internal::InputOutputData;
+
+} // namespace irregexp
+} // namespace js
+
+#endif // regexp_RegExpTypes_h
diff --git a/js/src/regexp/moz.build b/js/src/regexp/moz.build
index 1ef280b7fa..4caa4589c0 100644
--- a/js/src/regexp/moz.build
+++ b/js/src/regexp/moz.build
@@ -22,6 +22,7 @@ SOURCES += [
'regexp-interpreter.cc',
'regexp-macro-assembler-tracer.cc',
'regexp-macro-assembler.cc',
+ 'regexp-native-macro-assembler.cc',
'regexp-parser.cc',
'regexp-shim.cc',
'regexp-stack.cc',
diff --git a/js/src/regexp/regexp-macro-assembler-arch.h b/js/src/regexp/regexp-macro-assembler-arch.h
index 60b5c94de4..1baa5ddd52 100644
--- a/js/src/regexp/regexp-macro-assembler-arch.h
+++ b/js/src/regexp/regexp-macro-assembler-arch.h
@@ -4,6 +4,10 @@
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+// Copyright 2020 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
// This file implements the NativeRegExpMacroAssembler interface for
// SpiderMonkey. It provides the same interface as each of V8's
// architecture-specific implementations.
@@ -11,6 +15,277 @@
#ifndef RegexpMacroAssemblerArch_h
#define RegexpMacroAssemblerArch_h
-#include "regexp/regexp-shim.h"
+#include "jit/MacroAssembler.h"
+#include "regexp/regexp-macro-assembler.h"
+
+namespace v8 {
+namespace internal {
+
+struct FrameData {
+ // Character position at the start of the input, stored as a
+ // negative offset from the end of the string (input_end_pointer_).
+ size_t inputStart;
+
+ // The backtrack_stack_pointer_ register points to the top of the stack.
+ // This points to the bottom of the backtrack stack.
+ void* backtrackStackBase;
+
+ // Copy of the input MatchPairs.
+ int32_t* matches; // pointer to capture array
+ int32_t numMatches; // size of capture array
+};
+
+class SMRegExpMacroAssembler final : public NativeRegExpMacroAssembler {
+ public:
+ SMRegExpMacroAssembler(JSContext* cx, Isolate* isolate,
+ js::jit::StackMacroAssembler& masm, Zone* zone,
+ Mode mode, uint32_t num_capture_registers);
+ virtual ~SMRegExpMacroAssembler() {} // Nothing to do here
+
+ virtual int stack_limit_slack();
+ virtual IrregexpImplementation Implementation();
+
+ virtual bool Succeed();
+ virtual void Fail();
+
+ virtual void AdvanceCurrentPosition(int by);
+ virtual void PopCurrentPosition();
+ virtual void PushCurrentPosition();
+ virtual void SetCurrentPositionFromEnd(int by);
+
+ virtual void Backtrack();
+ virtual void Bind(Label* label);
+ virtual void GoTo(Label* label);
+ virtual void PushBacktrack(Label* label);
+
+ virtual void CheckCharacter(uint32_t c, Label* on_equal);
+ virtual void CheckNotCharacter(uint32_t c, Label* on_not_equal);
+ virtual void CheckCharacterGT(uc16 limit, Label* on_greater);
+ virtual void CheckCharacterLT(uc16 limit, Label* on_less);
+ virtual void CheckCharacterAfterAnd(uint32_t c, uint32_t mask,
+ Label* on_equal);
+ virtual void CheckNotCharacterAfterAnd(uint32_t c, uint32_t mask,
+ Label* on_not_equal);
+ virtual void CheckNotCharacterAfterMinusAnd(uc16 c, uc16 minus, uc16 mask,
+ Label* on_not_equal);
+ virtual void CheckGreedyLoop(Label* on_tos_equals_current_position);
+ virtual void CheckCharacterInRange(uc16 from, uc16 to, Label* on_in_range);
+ virtual void CheckCharacterNotInRange(uc16 from, uc16 to,
+ Label* on_not_in_range);
+ virtual void CheckAtStart(int cp_offset, Label* on_at_start);
+ virtual void CheckNotAtStart(int cp_offset, Label* on_not_at_start);
+ virtual void CheckPosition(int cp_offset, Label* on_outside_input);
+ virtual void CheckBitInTable(Handle<ByteArray> table, Label* on_bit_set);
+ virtual bool CheckSpecialCharacterClass(uc16 type, Label* on_no_match);
+ virtual void CheckNotBackReference(int start_reg, bool read_backward,
+ Label* on_no_match);
+ virtual void CheckNotBackReferenceIgnoreCase(int start_reg,
+ bool read_backward,
+ Label* on_no_match);
+
+ virtual void LoadCurrentCharacterImpl(int cp_offset, Label* on_end_of_input,
+ bool check_bounds, int characters,
+ int eats_at_least);
+
+ virtual void AdvanceRegister(int reg, int by);
+ virtual void IfRegisterGE(int reg, int comparand, Label* if_ge);
+ virtual void IfRegisterLT(int reg, int comparand, Label* if_lt);
+ virtual void IfRegisterEqPos(int reg, Label* if_eq);
+ virtual void PopRegister(int register_index);
+ virtual void PushRegister(int register_index,
+ StackCheckFlag check_stack_limit);
+ virtual void ReadCurrentPositionFromRegister(int reg);
+ virtual void WriteCurrentPositionToRegister(int reg, int cp_offset);
+ virtual void ReadStackPointerFromRegister(int reg);
+ virtual void WriteStackPointerToRegister(int reg);
+ virtual void SetRegister(int register_index, int to);
+ virtual void ClearRegisters(int reg_from, int reg_to);
+
+ virtual Handle<HeapObject> GetCode(Handle<String> source);
+
+ private:
+ size_t frameSize_ = 0;
+
+ void createStackFrame();
+ void initFrameAndRegs();
+ void successHandler();
+ void exitHandler();
+ void backtrackHandler();
+ void stackOverflowHandler();
+
+ // Push a register on the backtrack stack.
+ void Push(js::jit::Register value);
+
+ // Pop a value from the backtrack stack.
+ void Pop(js::jit::Register target);
+
+ void CheckAtStartImpl(int cp_offset, Label* on_cond,
+ js::jit::Assembler::Condition cond);
+ void CheckCharacterImpl(js::jit::Imm32 c, Label* on_cond,
+ js::jit::Assembler::Condition cond);
+ void CheckCharacterAfterAndImpl(uint32_t c, uint32_t and_with, Label* on_cond,
+ bool negate);
+ void CheckCharacterInRangeImpl(uc16 from, uc16 to, Label* on_cond,
+ js::jit::Assembler::Condition cond);
+ void CheckNotBackReferenceImpl(int start_reg, bool read_backward,
+ Label* on_no_match, bool ignore_case);
+
+ void LoadCurrentCharacterUnchecked(int cp_offset, int characters);
+
+ void JumpOrBacktrack(Label* to);
+
+ // MacroAssembler methods that take a Label can be called with a
+ // null label, which means that we should backtrack if we would jump
+ // to that label. This is a helper to avoid writing out the same
+ // logic a dozen times.
+ inline js::jit::Label* LabelOrBacktrack(Label* to) {
+ return to ? to->inner() : &backtrack_label_;
+ }
+
+ void CheckBacktrackStackLimit();
+
+ static bool GrowBacktrackStack(RegExpStack* regexp_stack);
+
+ static uint32_t CaseInsensitiveCompareStrings(const char16_t* substring1,
+ const char16_t* substring2,
+ size_t byteLength);
+ static uint32_t CaseInsensitiveCompareUCStrings(const char16_t* substring1,
+ const char16_t* substring2,
+ size_t byteLength);
+
+ inline int char_size() { return static_cast<int>(mode_); }
+ inline js::jit::Scale factor() {
+ return mode_ == UC16 ? js::jit::TimesTwo : js::jit::TimesOne;
+ }
+
+ js::jit::Address inputStart() {
+ return js::jit::Address(masm_.getStackPointer(),
+ offsetof(FrameData, inputStart));
+ }
+ js::jit::Address backtrackStackBase() {
+ return js::jit::Address(masm_.getStackPointer(),
+ offsetof(FrameData, backtrackStackBase));
+ }
+ js::jit::Address matches() {
+ return js::jit::Address(masm_.getStackPointer(),
+ offsetof(FrameData, matches));
+ }
+ js::jit::Address numMatches() {
+ return js::jit::Address(masm_.getStackPointer(),
+ offsetof(FrameData, numMatches));
+ }
+
+ // The stack-pointer-relative location of a regexp register.
+ js::jit::Address register_location(int register_index) {
+ return js::jit::Address(masm_.getStackPointer(),
+ register_offset(register_index));
+ }
+
+ int32_t register_offset(int register_index) {
+ MOZ_ASSERT(register_index >= 0 && register_index <= kMaxRegister);
+ if (num_registers_ <= register_index) {
+ num_registers_ = register_index + 1;
+ }
+ static_assert(alignof(uintptr_t) <= alignof(FrameData));
+ return sizeof(FrameData) + register_index * sizeof(uintptr_t*);
+ }
+
+ JSContext* cx_;
+ js::jit::StackMacroAssembler& masm_;
+
+ /*
+ * This assembler uses the following registers:
+ *
+ * - current_character_:
+ * Contains the character (or characters) currently being examined.
+ * Must be loaded using LoadCurrentCharacter before using any of the
+ * dispatch methods. After a matching pass for a global regexp,
+ * temporarily stores the index of capture start.
+ * - current_position_:
+ * Current position in input *as negative byte offset from end of string*.
+ * - input_end_pointer_:
+ * Points to byte after last character in the input. current_position_ is
+ * relative to this.
+ * - backtrack_stack_pointer_:
+ * Points to tip of the (heap-allocated) backtrack stack. The stack grows
+ * downward (like the native stack).
+ * - temp0_, temp1_, temp2_:
+ * Scratch registers.
+ *
+ * The native stack pointer is used to access arguments (InputOutputData),
+ * local variables (FrameData), and irregexp's internal virtual registers
+ * (see register_location).
+ */
+
+ js::jit::Register current_character_;
+ js::jit::Register current_position_;
+ js::jit::Register input_end_pointer_;
+ js::jit::Register backtrack_stack_pointer_;
+ js::jit::Register temp0_, temp1_, temp2_;
+
+ js::jit::Label entry_label_;
+ js::jit::Label start_label_;
+ js::jit::Label backtrack_label_;
+ js::jit::Label success_label_;
+ js::jit::Label exit_label_;
+ js::jit::Label stack_overflow_label_;
+ js::jit::Label exit_with_exception_label_;
+
+ // When we generate the code to push a backtrack label's address
+ // onto the backtrack stack, we don't know its final address. We
+ // have to patch it after linking. This is slightly delicate, as the
+ // Label itself (which is allocated on the stack) may not exist by
+ // the time we link. The approach is as follows:
+ //
+ // 1. When we push a label on the backtrack stack (PushBacktrack),
+ // we bind the label's patchOffset_ field to the offset within
+ // the code that should be overwritten. This works because each
+ // label is only pushed by a single instruction.
+ //
+ // 2. When we bind a label (Bind), we check to see if it has a
+ // bound patchOffset_. If it does, we create a LabelPatch mapping
+ // its patch offset to the offset of the label itself.
+ //
+ // 3. While linking the code, we walk the list of label patches
+ // and patch the code accordingly.
+ class LabelPatch {
+ public:
+ LabelPatch(js::jit::CodeOffset patchOffset, size_t labelOffset)
+ : patchOffset_(patchOffset), labelOffset_(labelOffset) {}
+
+ js::jit::CodeOffset patchOffset_;
+ size_t labelOffset_ = 0;
+ };
+
+ js::Vector<LabelPatch, 4, js::SystemAllocPolicy> labelPatches_;
+ void AddLabelPatch(js::jit::CodeOffset patchOffset, size_t labelOffset) {
+ js::AutoEnterOOMUnsafeRegion oomUnsafe;
+ if (!labelPatches_.emplaceBack(patchOffset, labelOffset)) {
+ oomUnsafe.crash("Irregexp label patch");
+ }
+ }
+
+ Mode mode_;
+ int num_registers_;
+ int num_capture_registers_;
+ js::jit::LiveGeneralRegisterSet savedRegisters_;
+
+ public:
+ using TableVector =
+ js::Vector<PseudoHandle<ByteArrayData>, 4, js::SystemAllocPolicy>;
+ TableVector& tables() { return tables_; }
+
+ private:
+ TableVector tables_;
+ void AddTable(PseudoHandle<ByteArrayData> table) {
+ js::AutoEnterOOMUnsafeRegion oomUnsafe;
+ if (!tables_.append(std::move(table))) {
+ oomUnsafe.crash("Irregexp table append");
+ }
+ }
+};
+
+} // namespace internal
+} // namespace v8
-#endif // RegexpMacroAssemblerArch_h
+#endif // RegexpMacroAssemblerArch_h
diff --git a/js/src/regexp/regexp-native-macro-assembler.cc b/js/src/regexp/regexp-native-macro-assembler.cc
new file mode 100644
index 0000000000..15182ad713
--- /dev/null
+++ b/js/src/regexp/regexp-native-macro-assembler.cc
@@ -0,0 +1,1215 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
+ * vim: set ts=8 sts=2 et sw=2 tw=80:
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+// Copyright 2020 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "jit/Linker.h"
+#include "regexp/regexp-macro-assembler-arch.h"
+#include "regexp/regexp-stack.h"
+#include "vm/MatchPairs.h"
+
+#include "jit/MacroAssembler-inl.h"
+
+namespace v8 {
+namespace internal {
+
+using js::MatchPairs;
+using js::jit::AbsoluteAddress;
+using js::jit::Address;
+using js::jit::AllocatableGeneralRegisterSet;
+using js::jit::Assembler;
+using js::jit::BaseIndex;
+using js::jit::CodeLocationLabel;
+using js::jit::GeneralRegisterBackwardIterator;
+using js::jit::GeneralRegisterForwardIterator;
+using js::jit::GeneralRegisterSet;
+using js::jit::Imm32;
+using js::jit::ImmPtr;
+using js::jit::ImmWord;
+using js::jit::JitCode;
+using js::jit::Linker;
+using js::jit::LiveGeneralRegisterSet;
+using js::jit::Register;
+using js::jit::Registers;
+using js::jit::StackMacroAssembler;
+
+SMRegExpMacroAssembler::SMRegExpMacroAssembler(JSContext* cx, Isolate* isolate,
+ StackMacroAssembler& masm,
+ Zone* zone, Mode mode,
+ uint32_t num_capture_registers)
+ : NativeRegExpMacroAssembler(isolate, zone),
+ cx_(cx),
+ masm_(masm),
+ mode_(mode),
+ num_registers_(num_capture_registers),
+ num_capture_registers_(num_capture_registers) {
+ // Each capture has a start and an end register
+ MOZ_ASSERT(num_capture_registers_ % 2 == 0);
+
+ AllocatableGeneralRegisterSet regs(GeneralRegisterSet::All());
+
+ temp0_ = regs.takeAny();
+ temp1_ = regs.takeAny();
+ temp2_ = regs.takeAny();
+ input_end_pointer_ = regs.takeAny();
+ current_character_ = regs.takeAny();
+ current_position_ = regs.takeAny();
+ backtrack_stack_pointer_ = regs.takeAny();
+ savedRegisters_ = js::jit::SavedNonVolatileRegisters(regs);
+
+ masm_.jump(&entry_label_); // We'll generate the entry code later
+ masm_.bind(&start_label_); // and continue from here.
+}
+
+int SMRegExpMacroAssembler::stack_limit_slack() {
+ return RegExpStack::kStackLimitSlack;
+}
+
+void SMRegExpMacroAssembler::AdvanceCurrentPosition(int by) {
+ if (by != 0) {
+ masm_.addPtr(Imm32(by * char_size()), current_position_);
+ }
+}
+
+void SMRegExpMacroAssembler::AdvanceRegister(int reg, int by) {
+ MOZ_ASSERT(reg >= 0 && reg < num_registers_);
+ if (by != 0) {
+ masm_.addPtr(Imm32(by), register_location(reg));
+ }
+}
+
+void SMRegExpMacroAssembler::Backtrack() {
+ // Check for an interrupt. We have to restart from the beginning if we
+ // are interrupted, so we only check for urgent interrupts.
+ js::jit::Label noInterrupt;
+ masm_.branchTest32(
+ Assembler::Zero, AbsoluteAddress(cx_->addressOfInterruptBits()),
+ Imm32(uint32_t(js::InterruptReason::CallbackUrgent)), &noInterrupt);
+ masm_.movePtr(ImmWord(js::RegExpRunStatus_Error), temp0_);
+ masm_.jump(&exit_label_);
+ masm_.bind(&noInterrupt);
+
+ // Pop code location from backtrack stack and jump to location.
+ Pop(temp0_);
+ masm_.jump(temp0_);
+}
+
+void SMRegExpMacroAssembler::Bind(Label* label) {
+ masm_.bind(label->inner());
+ if (label->patchOffset_.bound()) {
+ AddLabelPatch(label->patchOffset_, label->pos());
+ }
+}
+
+// Check if current_position + cp_offset is the input start
+void SMRegExpMacroAssembler::CheckAtStartImpl(int cp_offset, Label* on_cond,
+ Assembler::Condition cond) {
+ Address addr(current_position_, cp_offset * char_size());
+ masm_.computeEffectiveAddress(addr, temp0_);
+
+ masm_.branchPtr(cond, inputStart(), temp0_,
+ LabelOrBacktrack(on_cond));
+}
+
+void SMRegExpMacroAssembler::CheckAtStart(int cp_offset, Label* on_at_start) {
+ CheckAtStartImpl(cp_offset, on_at_start, Assembler::Equal);
+}
+
+void SMRegExpMacroAssembler::CheckNotAtStart(int cp_offset,
+ Label* on_not_at_start) {
+ CheckAtStartImpl(cp_offset, on_not_at_start, Assembler::NotEqual);
+}
+
+void SMRegExpMacroAssembler::CheckCharacterImpl(Imm32 c, Label* on_cond,
+ Assembler::Condition cond) {
+ masm_.branch32(cond, current_character_, c, LabelOrBacktrack(on_cond));
+}
+
+void SMRegExpMacroAssembler::CheckCharacter(uint32_t c, Label* on_equal) {
+ CheckCharacterImpl(Imm32(c), on_equal, Assembler::Equal);
+}
+
+void SMRegExpMacroAssembler::CheckNotCharacter(uint32_t c,
+ Label* on_not_equal) {
+ CheckCharacterImpl(Imm32(c), on_not_equal, Assembler::NotEqual);
+}
+
+void SMRegExpMacroAssembler::CheckCharacterGT(uc16 c, Label* on_greater) {
+ CheckCharacterImpl(Imm32(c), on_greater, Assembler::GreaterThan);
+}
+
+void SMRegExpMacroAssembler::CheckCharacterLT(uc16 c, Label* on_less) {
+ CheckCharacterImpl(Imm32(c), on_less, Assembler::LessThan);
+}
+
+// Bitwise-and the current character with mask and then check for a
+// match with c.
+void SMRegExpMacroAssembler::CheckCharacterAfterAndImpl(uint32_t c,
+ uint32_t mask,
+ Label* on_cond,
+ bool is_not) {
+ if (c == 0) {
+ Assembler::Condition cond = is_not ? Assembler::NonZero : Assembler::Zero;
+ masm_.branchTest32(cond, current_character_, Imm32(mask),
+ LabelOrBacktrack(on_cond));
+ } else {
+ Assembler::Condition cond = is_not ? Assembler::NotEqual : Assembler::Equal;
+ masm_.move32(Imm32(mask), temp0_);
+ masm_.and32(current_character_, temp0_);
+ masm_.branch32(cond, temp0_, Imm32(c), LabelOrBacktrack(on_cond));
+ }
+}
+
+void SMRegExpMacroAssembler::CheckCharacterAfterAnd(uint32_t c,
+ uint32_t mask,
+ Label* on_equal) {
+ CheckCharacterAfterAndImpl(c, mask, on_equal, /*is_not =*/false);
+}
+
+void SMRegExpMacroAssembler::CheckNotCharacterAfterAnd(uint32_t c,
+ uint32_t mask,
+ Label* on_not_equal) {
+ CheckCharacterAfterAndImpl(c, mask, on_not_equal, /*is_not =*/true);
+}
+
+
+// Subtract minus from the current character, then bitwise-and the
+// result with mask, then check for a match with c.
+void SMRegExpMacroAssembler::CheckNotCharacterAfterMinusAnd(
+ uc16 c, uc16 minus, uc16 mask, Label* on_not_equal) {
+ masm_.computeEffectiveAddress(Address(current_character_, -minus), temp0_);
+ if (c == 0) {
+ masm_.branchTest32(Assembler::NonZero, temp0_, Imm32(mask),
+ LabelOrBacktrack(on_not_equal));
+ } else {
+ masm_.and32(Imm32(mask), temp0_);
+ masm_.branch32(Assembler::NotEqual, temp0_, Imm32(c),
+ LabelOrBacktrack(on_not_equal));
+ }
+}
+
+// If the current position matches the position stored on top of the backtrack
+// stack, pops the backtrack stack and branches to the given label.
+void SMRegExpMacroAssembler::CheckGreedyLoop(Label* on_equal) {
+ js::jit::Label fallthrough;
+ masm_.branchPtr(Assembler::NotEqual, Address(backtrack_stack_pointer_, 0),
+ current_position_, &fallthrough);
+ masm_.addPtr(Imm32(sizeof(void*)), backtrack_stack_pointer_); // Pop.
+ JumpOrBacktrack(on_equal);
+ masm_.bind(&fallthrough);
+}
+
+void SMRegExpMacroAssembler::CheckCharacterInRangeImpl(
+ uc16 from, uc16 to, Label* on_cond, Assembler::Condition cond) {
+ // x is in [from,to] if unsigned(x - from) <= to - from
+ masm_.computeEffectiveAddress(Address(current_character_, -from), temp0_);
+ masm_.branch32(cond, temp0_, Imm32(to - from), LabelOrBacktrack(on_cond));
+}
+
+void SMRegExpMacroAssembler::CheckCharacterInRange(uc16 from, uc16 to,
+ Label* on_in_range) {
+ CheckCharacterInRangeImpl(from, to, on_in_range, Assembler::BelowOrEqual);
+}
+
+void SMRegExpMacroAssembler::CheckCharacterNotInRange(uc16 from, uc16 to,
+ Label* on_not_in_range) {
+ CheckCharacterInRangeImpl(from, to, on_not_in_range, Assembler::Above);
+}
+
+void SMRegExpMacroAssembler::CheckBitInTable(Handle<ByteArray> table,
+ Label* on_bit_set) {
+ // Claim ownership of the ByteArray from the current HandleScope.
+ // ByteArrays are allocated on the C++ heap and are (eventually)
+ // owned by the RegExpShared.
+ PseudoHandle<ByteArrayData> rawTable = table->takeOwnership(isolate());
+
+ masm_.movePtr(ImmPtr(rawTable->data()), temp0_);
+
+ masm_.move32(Imm32(kTableMask), temp1_);
+ masm_.and32(current_character_, temp1_);
+
+ masm_.load8ZeroExtend(BaseIndex(temp0_, temp1_, js::jit::TimesOne), temp0_);
+ masm_.branchTest32(Assembler::NonZero, temp0_, temp0_,
+ LabelOrBacktrack(on_bit_set));
+
+ // Transfer ownership of |rawTable| to the |tables_| vector.
+ AddTable(std::move(rawTable));
+}
+
+void SMRegExpMacroAssembler::CheckNotBackReferenceImpl(int start_reg,
+ bool read_backward,
+ Label* on_no_match,
+ bool ignore_case) {
+ js::jit::Label fallthrough;
+
+ // Captures are stored as a sequential pair of registers.
+ // Find the length of the back-referenced capture and load the
+ // capture's start index into current_character_.
+ masm_.loadPtr(register_location(start_reg), // index of start
+ current_character_);
+ masm_.loadPtr(register_location(start_reg + 1), temp0_); // index of end
+ masm_.subPtr(current_character_, temp0_); // length of capture
+
+ // Capture registers are either both set or both cleared.
+ // If the capture length is zero, then the capture is either empty or cleared.
+ // Fall through in both cases.
+ masm_.branchPtr(Assembler::Equal, temp0_, ImmWord(0), &fallthrough);
+
+ // Check that there are sufficient characters left in the input.
+ if (read_backward) {
+ // If start + len > current, there isn't enough room for a
+ // lookbehind backreference.
+ masm_.loadPtr(inputStart(), temp1_);
+ masm_.addPtr(temp0_, temp1_);
+ masm_.branchPtr(Assembler::GreaterThan, temp1_, current_position_,
+ LabelOrBacktrack(on_no_match));
+ } else {
+ // current_position_ is the negative offset from the end.
+ // If current + len > 0, there isn't enough room for a backreference.
+ masm_.movePtr(current_position_, temp1_);
+ masm_.addPtr(temp0_, temp1_);
+ masm_.branchPtr(Assembler::GreaterThan, temp1_, ImmWord(0),
+ LabelOrBacktrack(on_no_match));
+ }
+
+ if (mode_ == UC16 && ignore_case) {
+ // We call a helper function for case-insensitive non-latin1 strings.
+
+ // Save volatile regs. temp1_ and temp2_ don't need to be saved.
+ LiveGeneralRegisterSet volatileRegs(GeneralRegisterSet::Volatile());
+ volatileRegs.takeUnchecked(temp1_);
+ volatileRegs.takeUnchecked(temp2_);
+ masm_.PushRegsInMask(volatileRegs);
+
+ // Parameters are
+ // Address captured - Address of captured substring's start.
+ // Address current - Address of current character position.
+ // size_t byte_length - length of capture (in bytes)
+
+ // Compute |captured|
+ masm_.addPtr(input_end_pointer_, current_character_);
+
+ // Compute |current|
+ masm_.addPtr(input_end_pointer_, current_position_);
+ if (read_backward) {
+ // Offset by length when matching backwards.
+ masm_.subPtr(temp0_, current_position_);
+ }
+
+ masm_.setupUnalignedABICall(temp1_);
+ masm_.passABIArg(current_character_);
+ masm_.passABIArg(current_position_);
+ masm_.passABIArg(temp0_);
+
+ bool unicode = true; // TODO: Fix V8 bug
+ if (unicode) {
+ uint32_t (*fun)(const char16_t*, const char16_t*, size_t) =
+ CaseInsensitiveCompareUCStrings;
+ masm_.callWithABI(JS_FUNC_TO_DATA_PTR(void*, fun));
+ } else {
+ uint32_t (*fun)(const char16_t*, const char16_t*, size_t) =
+ CaseInsensitiveCompareStrings;
+ masm_.callWithABI(JS_FUNC_TO_DATA_PTR(void*, fun));
+ }
+ masm_.storeCallInt32Result(temp1_);
+ masm_.PopRegsInMask(volatileRegs);
+ masm_.branchTest32(Assembler::Zero, temp1_, temp1_,
+ LabelOrBacktrack(on_no_match));
+
+ // On success, advance position by length of capture
+ if (read_backward) {
+ masm_.subPtr(temp0_, current_position_);
+ } else {
+ masm_.addPtr(temp0_, current_position_);
+ }
+
+ masm_.bind(&fallthrough);
+ return;
+ }
+
+ // We will be modifying current_position_. Save it in case the match fails.
+ masm_.push(current_position_);
+
+ // Compute start of capture string
+ masm_.addPtr(input_end_pointer_, current_character_);
+
+ // Compute start of match string
+ masm_.addPtr(input_end_pointer_, current_position_);
+ if (read_backward) {
+ // Offset by length when matching backwards.
+ masm_.subPtr(temp0_, current_position_);
+ }
+
+ // Compute end of match string
+ masm_.addPtr(current_position_, temp0_);
+
+ js::jit::Label success;
+ js::jit::Label fail;
+ js::jit::Label loop;
+ masm_.bind(&loop);
+
+ // Load next character from each string.
+ if (mode_ == LATIN1) {
+ masm_.load8ZeroExtend(Address(current_character_, 0), temp1_);
+ masm_.load8ZeroExtend(Address(current_position_, 0), temp2_);
+ } else {
+ masm_.load16ZeroExtend(Address(current_character_, 0), temp1_);
+ masm_.load16ZeroExtend(Address(current_position_, 0), temp2_);
+ }
+
+ if (ignore_case) {
+ MOZ_ASSERT(mode_ == LATIN1);
+ // Try exact match.
+ js::jit::Label loop_increment;
+ masm_.branch32(Assembler::Equal, temp1_, temp2_, &loop_increment);
+
+ // Mismatch. Try case-insensitive match.
+ // Force the match character to lower case (by setting bit 0x20)
+ // then check to see if it is a letter.
+ js::jit::Label convert_capture;
+ masm_.or32(Imm32(0x20), temp1_);
+
+ // Check if it is in [a,z].
+ masm_.computeEffectiveAddress(Address(temp1_, -'a'), temp2_);
+ masm_.branch32(Assembler::BelowOrEqual, temp2_, Imm32('z' - 'a'),
+ &convert_capture);
+ // Check for values in range [224,254].
+ // Exclude 247 (U+00F7 DIVISION SIGN).
+ masm_.sub32(Imm32(224 - 'a'), temp2_);
+ masm_.branch32(Assembler::Above, temp2_, Imm32(254 - 224), &fail);
+ masm_.branch32(Assembler::Equal, temp2_, Imm32(247 - 224), &fail);
+
+ // Match character is lower case. Convert capture character
+ // to lower case and compare.
+ masm_.bind(&convert_capture);
+ masm_.load8ZeroExtend(Address(current_character_, 0), temp2_);
+ masm_.or32(Imm32(0x20), temp2_);
+ masm_.branch32(Assembler::NotEqual, temp1_, temp2_, &fail);
+
+ masm_.bind(&loop_increment);
+ } else {
+ // Fail if characters do not match.
+ masm_.branch32(Assembler::NotEqual, temp1_, temp2_, &fail);
+ }
+
+ // Increment pointers into match and capture strings.
+ masm_.addPtr(Imm32(char_size()), current_character_);
+ masm_.addPtr(Imm32(char_size()), current_position_);
+
+ // Loop if we have not reached the end of the match string.
+ masm_.branchPtr(Assembler::Below, current_position_, temp0_, &loop);
+ masm_.jump(&success);
+
+ // If we fail, restore current_position_ and branch.
+ masm_.bind(&fail);
+ masm_.pop(current_position_);
+ JumpOrBacktrack(on_no_match);
+
+ masm_.bind(&success);
+
+ // current_position_ is a pointer. Convert it back to an offset.
+ masm_.subPtr(input_end_pointer_, current_position_);
+ if (read_backward) {
+ // Subtract match length if we matched backward
+ masm_.addPtr(register_location(start_reg), current_position_);
+ masm_.subPtr(register_location(start_reg + 1), current_position_);
+ }
+
+ // Drop saved value of current_position_
+ masm_.addToStackPtr(Imm32(sizeof(uintptr_t)));
+
+ masm_.bind(&fallthrough);
+}
+
+// Branch if a back-reference does not match a previous capture.
+void SMRegExpMacroAssembler::CheckNotBackReference(int start_reg,
+ bool read_backward,
+ Label* on_no_match) {
+ CheckNotBackReferenceImpl(start_reg, read_backward, on_no_match,
+ /*ignore_case = */ false);
+}
+
+void SMRegExpMacroAssembler::CheckNotBackReferenceIgnoreCase(
+ int start_reg, bool read_backward, Label* on_no_match) {
+ CheckNotBackReferenceImpl(start_reg, read_backward, on_no_match,
+ /*ignore_case = */ true);
+}
+
+// Checks whether the given offset from the current position is
+// inside the input string.
+void SMRegExpMacroAssembler::CheckPosition(int cp_offset,
+ Label* on_outside_input) {
+ // Note: current_position_ is a (negative) byte offset relative to
+ // the end of the input string.
+ if (cp_offset >= 0) {
+ // end + current + offset >= end
+ // <=> current + offset >= 0
+ // <=> current >= -offset
+ masm_.branchPtr(Assembler::GreaterThanOrEqual, current_position_,
+ ImmWord(-cp_offset * char_size()),
+ LabelOrBacktrack(on_outside_input));
+ } else {
+ // Compute offset position
+ masm_.computeEffectiveAddress(
+ Address(current_position_, cp_offset * char_size()), temp0_);
+
+ // Compare to start of input.
+ masm_.branchPtr(Assembler::GreaterThanOrEqual, inputStart(), temp0_,
+ LabelOrBacktrack(on_outside_input));
+ }
+}
+
+// This function attempts to generate special case code for character classes.
+// Returns true if a special case is generated.
+// Otherwise returns false and generates no code.
+bool SMRegExpMacroAssembler::CheckSpecialCharacterClass(uc16 type,
+ Label* on_no_match) {
+ js::jit::Label* no_match = LabelOrBacktrack(on_no_match);
+
+ // Note: throughout this function, range checks (c in [min, max])
+ // are implemented by an unsigned (c - min) <= (max - min) check.
+ switch (type) {
+ case 's': {
+ // Match space-characters
+ if (mode_ != LATIN1) {
+ return false;
+ }
+ js::jit::Label success;
+ // One byte space characters are ' ', '\t'..'\r', and '\u00a0' (NBSP).
+
+ // Check ' '
+ masm_.branch32(Assembler::Equal, current_character_, Imm32(' '),
+ &success);
+
+ // Check '\t'..'\r'
+ masm_.computeEffectiveAddress(Address(current_character_, -'\t'),
+ temp0_);
+ masm_.branch32(Assembler::BelowOrEqual, temp0_, Imm32('\r' - '\t'),
+ &success);
+
+ // Check \u00a0.
+ masm_.branch32(Assembler::NotEqual, temp0_, Imm32(0x00a0 - '\t'),
+ no_match);
+
+ masm_.bind(&success);
+ return true;
+ }
+ case 'S':
+ // The emitted code for generic character classes is good enough.
+ return false;
+ case 'd':
+ // Match latin1 digits ('0'-'9')
+ masm_.computeEffectiveAddress(Address(current_character_, -'0'), temp0_);
+ masm_.branch32(Assembler::Above, temp0_, Imm32('9' - '0'), no_match);
+ return true;
+ case 'D':
+ // Match anything except latin1 digits ('0'-'9')
+ masm_.computeEffectiveAddress(Address(current_character_, -'0'), temp0_);
+ masm_.branch32(Assembler::BelowOrEqual, temp0_, Imm32('9' - '0'),
+ no_match);
+ return true;
+ case '.':
+ // Match non-newlines. This excludes '\n' (0x0a), '\r' (0x0d),
+ // U+2028 LINE SEPARATOR, and U+2029 PARAGRAPH SEPARATOR.
+ // See https://tc39.es/ecma262/#prod-LineTerminator
+
+ // To test for 0x0a and 0x0d efficiently, we XOR the input with 1.
+ // This converts 0x0a to 0x0b, and 0x0d to 0x0c, allowing us to
+ // test for the contiguous range 0x0b..0x0c.
+ masm_.move32(current_character_, temp0_);
+ masm_.xor32(Imm32(0x01), temp0_);
+ masm_.sub32(Imm32(0x0b), temp0_);
+ masm_.branch32(Assembler::BelowOrEqual, temp0_, Imm32(0x0c - 0x0b),
+ no_match);
+
+ if (mode_ == UC16) {
+ // Compare original value to 0x2028 and 0x2029, using the already
+ // computed (current_char ^ 0x01 - 0x0b). I.e., check for
+ // 0x201d (0x2028 - 0x0b) or 0x201e.
+ masm_.sub32(Imm32(0x2028 - 0x0b), temp0_);
+ masm_.branch32(Assembler::BelowOrEqual, temp0_, Imm32(0x2029 - 0x2028),
+ no_match);
+ }
+ return true;
+ case 'w':
+ // \w matches the set of 63 characters defined in Runtime Semantics:
+ // WordCharacters. We use a static lookup table, which is defined in
+ // regexp-macro-assembler.cc.
+ // Note: if both Unicode and IgnoreCase are true, \w matches a
+ // larger set of characters. That case is handled elsewhere.
+ if (mode_ != LATIN1) {
+ masm_.branch32(Assembler::Above, current_character_, Imm32('z'),
+ no_match);
+ }
+ static_assert(arraysize(word_character_map) > unibrow::Latin1::kMaxChar);
+ masm_.movePtr(ImmPtr(word_character_map), temp0_);
+ masm_.load8ZeroExtend(
+ BaseIndex(temp0_, current_character_, js::jit::TimesOne), temp0_);
+ masm_.branchTest32(Assembler::Zero, temp0_, temp0_, no_match);
+ return true;
+ case 'W': {
+ // See 'w' above.
+ js::jit::Label done;
+ if (mode_ != LATIN1) {
+ masm_.branch32(Assembler::Above, current_character_, Imm32('z'), &done);
+ }
+ static_assert(arraysize(word_character_map) > unibrow::Latin1::kMaxChar);
+ masm_.movePtr(ImmPtr(word_character_map), temp0_);
+ masm_.load8ZeroExtend(
+ BaseIndex(temp0_, current_character_, js::jit::TimesOne), temp0_);
+ masm_.branchTest32(Assembler::NonZero, temp0_, temp0_, no_match);
+ if (mode_ != LATIN1) {
+ masm_.bind(&done);
+ }
+ return true;
+ }
+ ////////////////////////////////////////////////////////////////////////
+ // Non-standard classes (with no syntactic shorthand) used internally //
+ ////////////////////////////////////////////////////////////////////////
+ case '*':
+ // Match any character
+ return true;
+ case 'n':
+ // Match newlines. The opposite of '.'. See '.' above.
+ masm_.move32(current_character_, temp0_);
+ masm_.xor32(Imm32(0x01), temp0_);
+ masm_.sub32(Imm32(0x0b), temp0_);
+ if (mode_ == LATIN1) {
+ masm_.branch32(Assembler::Above, temp0_, Imm32(0x0c - 0x0b), no_match);
+ } else {
+ MOZ_ASSERT(mode_ == UC16);
+ js::jit::Label done;
+ masm_.branch32(Assembler::BelowOrEqual, temp0_, Imm32(0x0c - 0x0b),
+ &done);
+
+ // Compare original value to 0x2028 and 0x2029, using the already
+ // computed (current_char ^ 0x01 - 0x0b). I.e., check for
+ // 0x201d (0x2028 - 0x0b) or 0x201e.
+ masm_.sub32(Imm32(0x2028 - 0x0b), temp0_);
+ masm_.branch32(Assembler::Above, temp0_, Imm32(0x2029 - 0x2028),
+ no_match);
+ masm_.bind(&done);
+ }
+ return true;
+
+ // No custom implementation
+ default:
+ return false;
+ }
+}
+
+void SMRegExpMacroAssembler::Fail() {
+ masm_.movePtr(ImmWord(js::RegExpRunStatus_Success_NotFound), temp0_);
+ masm_.jump(&exit_label_);
+}
+
+void SMRegExpMacroAssembler::GoTo(Label* to) {
+ masm_.jump(LabelOrBacktrack(to));
+}
+
+void SMRegExpMacroAssembler::IfRegisterGE(int reg, int comparand,
+ Label* if_ge) {
+ masm_.branchPtr(Assembler::GreaterThanOrEqual, register_location(reg),
+ ImmWord(comparand), LabelOrBacktrack(if_ge));
+}
+
+void SMRegExpMacroAssembler::IfRegisterLT(int reg, int comparand,
+ Label* if_lt) {
+ masm_.branchPtr(Assembler::LessThan, register_location(reg),
+ ImmWord(comparand), LabelOrBacktrack(if_lt));
+}
+
+void SMRegExpMacroAssembler::IfRegisterEqPos(int reg, Label* if_eq) {
+ masm_.branchPtr(Assembler::Equal, register_location(reg), current_position_,
+ LabelOrBacktrack(if_eq));
+}
+
+// This is a word-for-word identical copy of the V8 code, which is
+// duplicated in at least nine different places in V8 (one per
+// supported architecture) with no differences outside of comments and
+// formatting. It should be hoisted into the superclass. Once that is
+// done upstream, this version can be deleted.
+void SMRegExpMacroAssembler::LoadCurrentCharacterImpl(int cp_offset,
+ Label* on_end_of_input,
+ bool check_bounds,
+ int characters,
+ int eats_at_least) {
+ // It's possible to preload a small number of characters when each success
+ // path requires a large number of characters, but not the reverse.
+ MOZ_ASSERT(eats_at_least >= characters);
+ MOZ_ASSERT(cp_offset < (1 << 30)); // Be sane! (And ensure negation works)
+
+ if (check_bounds) {
+ if (cp_offset >= 0) {
+ CheckPosition(cp_offset + eats_at_least - 1, on_end_of_input);
+ } else {
+ CheckPosition(cp_offset, on_end_of_input);
+ }
+ }
+ LoadCurrentCharacterUnchecked(cp_offset, characters);
+}
+
+// Load the character (or characters) at the specified offset from the
+// current position. Zero-extend to 32 bits.
+void SMRegExpMacroAssembler::LoadCurrentCharacterUnchecked(int cp_offset,
+ int characters) {
+ BaseIndex address(input_end_pointer_, current_position_, js::jit::TimesOne,
+ cp_offset * char_size());
+ if (mode_ == LATIN1) {
+ if (characters == 4) {
+ masm_.load32(address, current_character_);
+ } else if (characters == 2) {
+ masm_.load16ZeroExtend(address, current_character_);
+ } else {
+ MOZ_ASSERT(characters == 1);
+ masm_.load8ZeroExtend(address, current_character_);
+ }
+ } else {
+ MOZ_ASSERT(mode_ == UC16);
+ if (characters == 2) {
+ masm_.load32(address, current_character_);
+ } else {
+ MOZ_ASSERT(characters == 1);
+ masm_.load16ZeroExtend(address, current_character_);
+ }
+ }
+}
+
+void SMRegExpMacroAssembler::PopCurrentPosition() { Pop(current_position_); }
+
+void SMRegExpMacroAssembler::PopRegister(int register_index) {
+ Pop(temp0_);
+ masm_.storePtr(temp0_, register_location(register_index));
+}
+
+void SMRegExpMacroAssembler::PushBacktrack(Label* label) {
+ MOZ_ASSERT(!label->is_bound());
+ MOZ_ASSERT(!label->patchOffset_.bound());
+ label->patchOffset_ = masm_.movWithPatch(ImmPtr(nullptr), temp0_);
+ MOZ_ASSERT(label->patchOffset_.bound());
+
+ Push(temp0_);
+
+ CheckBacktrackStackLimit();
+}
+
+void SMRegExpMacroAssembler::PushCurrentPosition() { Push(current_position_); }
+
+void SMRegExpMacroAssembler::PushRegister(int register_index,
+ StackCheckFlag check_stack_limit) {
+ masm_.loadPtr(register_location(register_index), temp0_);
+ Push(temp0_);
+ if (check_stack_limit) {
+ CheckBacktrackStackLimit();
+ }
+}
+
+void SMRegExpMacroAssembler::ReadCurrentPositionFromRegister(int reg) {
+ masm_.loadPtr(register_location(reg), current_position_);
+}
+
+void SMRegExpMacroAssembler::WriteCurrentPositionToRegister(int reg,
+ int cp_offset) {
+ if (cp_offset == 0) {
+ masm_.storePtr(current_position_, register_location(reg));
+ } else {
+ Address addr(current_position_, cp_offset * char_size());
+ masm_.computeEffectiveAddress(addr, temp0_);
+ masm_.storePtr(temp0_, register_location(reg));
+ }
+}
+
+// Note: The backtrack stack pointer is stored in a register as an
+// offset from the stack top, not as a bare pointer, so that it is not
+// corrupted if the backtrack stack grows (and therefore moves).
+void SMRegExpMacroAssembler::ReadStackPointerFromRegister(int reg) {
+ masm_.loadPtr(register_location(reg), backtrack_stack_pointer_);
+ masm_.addPtr(backtrackStackBase(), backtrack_stack_pointer_);
+}
+void SMRegExpMacroAssembler::WriteStackPointerToRegister(int reg) {
+ masm_.movePtr(backtrack_stack_pointer_, temp0_);
+ masm_.subPtr(backtrackStackBase(), temp0_);
+ masm_.storePtr(temp0_, register_location(reg));
+}
+
+// When matching a regexp that is anchored at the end, this operation
+// is used to try skipping the beginning of long strings. If the
+// maximum length of a match is less than the length of the string, we
+// can skip the initial len - max_len bytes.
+void SMRegExpMacroAssembler::SetCurrentPositionFromEnd(int by) {
+ js::jit::Label after_position;
+ masm_.branchPtr(Assembler::GreaterThanOrEqual, current_position_,
+ ImmWord(-by * char_size()), &after_position);
+ masm_.movePtr(ImmWord(-by * char_size()), current_position_);
+
+ // On RegExp code entry (where this operation is used), the character before
+ // the current position is expected to be already loaded.
+ // We have advanced the position, so it's safe to read backwards.
+ LoadCurrentCharacterUnchecked(-1, 1);
+ masm_.bind(&after_position);
+}
+
+void SMRegExpMacroAssembler::SetRegister(int register_index, int to) {
+ MOZ_ASSERT(register_index >= num_capture_registers_);
+ masm_.storePtr(ImmWord(to), register_location(register_index));
+}
+
+// Returns true if a regexp match can be restarted (aka the regexp is global).
+// The return value is not used anywhere, but we implement it to be safe.
+bool SMRegExpMacroAssembler::Succeed() {
+ masm_.jump(&success_label_);
+ return global();
+}
+
+// Capture registers are initialized to input[-1]
+void SMRegExpMacroAssembler::ClearRegisters(int reg_from, int reg_to) {
+ MOZ_ASSERT(reg_from <= reg_to);
+ masm_.loadPtr(inputStart(), temp0_);
+ masm_.subPtr(Imm32(char_size()), temp0_);
+ for (int reg = reg_from; reg <= reg_to; reg++) {
+ masm_.storePtr(temp0_, register_location(reg));
+ }
+}
+
+void SMRegExpMacroAssembler::Push(Register source) {
+ MOZ_ASSERT(source != backtrack_stack_pointer_);
+
+ masm_.subPtr(Imm32(sizeof(void*)), backtrack_stack_pointer_);
+ masm_.storePtr(source, Address(backtrack_stack_pointer_, 0));
+}
+
+void SMRegExpMacroAssembler::Pop(Register target) {
+ MOZ_ASSERT(target != backtrack_stack_pointer_);
+
+ masm_.loadPtr(Address(backtrack_stack_pointer_, 0), target);
+ masm_.addPtr(Imm32(sizeof(void*)), backtrack_stack_pointer_);
+}
+
+void SMRegExpMacroAssembler::JumpOrBacktrack(Label* to) {
+ if (to) {
+ masm_.jump(to->inner());
+ } else {
+ Backtrack();
+ }
+}
+
+// Generate a quick inline test for backtrack stack overflow.
+// If the test fails, call an OOL handler to try growing the stack.
+void SMRegExpMacroAssembler::CheckBacktrackStackLimit() {
+ js::jit::Label no_stack_overflow;
+ masm_.branchPtr(
+ Assembler::BelowOrEqual,
+ AbsoluteAddress(isolate()->regexp_stack()->limit_address_address()),
+ backtrack_stack_pointer_, &no_stack_overflow);
+
+ masm_.call(&stack_overflow_label_);
+
+ // Exit with an exception if the call failed
+ masm_.branchTest32(Assembler::Zero, temp0_, temp0_,
+ &exit_with_exception_label_);
+
+ masm_.bind(&no_stack_overflow);
+}
+
+// This is used to sneak an OOM through the V8 layer.
+static Handle<HeapObject> DummyCode() {
+ return Handle<HeapObject>::fromHandleValue(JS::UndefinedHandleValue);
+}
+
+// Finalize code. This is called last, so that we know how many
+// registers we need.
+Handle<HeapObject> SMRegExpMacroAssembler::GetCode(Handle<String> source) {
+ if (!cx_->realm()->ensureJitRealmExists(cx_)) {
+ return DummyCode();
+ }
+
+ masm_.bind(&entry_label_);
+
+ createStackFrame();
+ initFrameAndRegs();
+
+ masm_.jump(&start_label_);
+
+ successHandler();
+ exitHandler();
+ backtrackHandler();
+ stackOverflowHandler();
+
+ Linker linker(masm_);
+ JitCode* code = linker.newCode(cx_, js::jit::CodeKind::RegExp);
+ if (!code) {
+ return DummyCode();
+ }
+
+ for (LabelPatch& lp : labelPatches_) {
+ Assembler::PatchDataWithValueCheck(CodeLocationLabel(code, lp.patchOffset_),
+ ImmPtr(code->raw() + lp.labelOffset_),
+ ImmPtr(nullptr));
+ }
+
+ return Handle<HeapObject>(JS::PrivateGCThingValue(code), isolate());
+}
+
+/*
+ * The stack will have the following structure:
+ * sp-> - FrameData
+ * - inputStart
+ * - backtrack stack base
+ * - matches
+ * - numMatches
+ * - Registers
+ * - Capture positions
+ * - Scratch registers
+ * --- frame alignment ---
+ * - Saved register area
+ * - Return address
+ */
+void SMRegExpMacroAssembler::createStackFrame() {
+#ifdef JS_CODEGEN_ARM64
+ // ARM64 communicates stack address via SP, but uses a pseudo-sp (PSP) for
+ // addressing. The register we use for PSP may however also be used by
+ // calling code, and it is nonvolatile, so save it. Do this as a special
+ // case first because the generic save/restore code needs the PSP to be
+ // initialized already.
+ MOZ_ASSERT(js::jit::PseudoStackPointer64.Is(masm_.GetStackPointer64()));
+ masm_.Str(js::jit::PseudoStackPointer64,
+ vixl::MemOperand(js::jit::sp, -16, vixl::PreIndex));
+
+ // Initialize the PSP from the SP.
+ masm_.initPseudoStackPtr();
+#endif
+
+ // Push non-volatile registers which might be modified by jitcode.
+ size_t pushedNonVolatileRegisters = 0;
+ for (GeneralRegisterForwardIterator iter(savedRegisters_); iter.more();
+ ++iter) {
+ masm_.Push(*iter);
+ pushedNonVolatileRegisters++;
+ }
+
+ // The pointer to InputOutputData is passed as the first argument.
+ // On x86 we have to load it off the stack into temp0_.
+ // On other platforms it is already in a register.
+#ifdef JS_CODEGEN_X86
+ Address ioDataAddr(masm_.getStackPointer(),
+ (pushedNonVolatileRegisters + 1) * sizeof(void*));
+ masm_.loadPtr(ioDataAddr, temp0_);
+#else
+ if (js::jit::IntArgReg0 != temp0_) {
+ masm_.movePtr(js::jit::IntArgReg0, temp0_);
+ }
+#endif
+
+ // Start a new stack frame.
+ size_t frameBytes = sizeof(FrameData) + num_registers_ * sizeof(void*);
+ frameSize_ = js::jit::StackDecrementForCall(js::jit::ABIStackAlignment,
+ masm_.framePushed(), frameBytes);
+ masm_.reserveStack(frameSize_);
+ masm_.checkStackAlignment();
+
+ // Check if we have space on the stack. Use the *NoInterrupt stack limit to
+ // avoid failing repeatedly when the regex code is called from Ion JIT code.
+ // (See bug 1208819)
+ js::jit::Label stack_ok;
+ AbsoluteAddress limit_addr(cx_->addressOfJitStackLimitNoInterrupt());
+ masm_.branchStackPtrRhs(Assembler::Below, limit_addr, &stack_ok);
+
+ // There is not enough space on the stack. Exit with an exception.
+ masm_.movePtr(ImmWord(js::RegExpRunStatus_Error), temp0_);
+ masm_.jump(&exit_label_);
+
+ masm_.bind(&stack_ok);
+}
+
+void SMRegExpMacroAssembler::initFrameAndRegs() {
+ // At this point, an uninitialized stack frame has been created,
+ // and the address of the InputOutputData is in temp0_.
+ Register ioDataReg = temp0_;
+
+ Register matchesReg = temp1_;
+ masm_.loadPtr(Address(ioDataReg, offsetof(InputOutputData, matches)),
+ matchesReg);
+
+ // Initialize output registers
+ masm_.loadPtr(Address(matchesReg, MatchPairs::offsetOfPairs()), temp2_);
+ masm_.storePtr(temp2_, matches());
+ masm_.load32(Address(matchesReg, MatchPairs::offsetOfPairCount()), temp2_);
+ masm_.store32(temp2_, numMatches());
+
+#ifdef DEBUG
+ // Bounds-check numMatches.
+ js::jit::Label enoughRegisters;
+ masm_.branchPtr(Assembler::GreaterThanOrEqual, temp2_,
+ ImmWord(num_capture_registers_ / 2), &enoughRegisters);
+ masm_.assumeUnreachable("Not enough output pairs for RegExp");
+ masm_.bind(&enoughRegisters);
+#endif
+
+ // Load input start pointer.
+ masm_.loadPtr(Address(ioDataReg, offsetof(InputOutputData, inputStart)),
+ current_position_);
+
+ // Load input end pointer
+ masm_.loadPtr(Address(ioDataReg, offsetof(InputOutputData, inputEnd)),
+ input_end_pointer_);
+
+ // Set up input position to be negative offset from string end.
+ masm_.subPtr(input_end_pointer_, current_position_);
+
+ // Store inputStart
+ masm_.storePtr(current_position_, inputStart());
+
+ // Load start index
+ Register startIndexReg = temp1_;
+ masm_.loadPtr(Address(ioDataReg, offsetof(InputOutputData, startIndex)),
+ startIndexReg);
+ masm_.computeEffectiveAddress(
+ BaseIndex(current_position_, startIndexReg, factor()), current_position_);
+
+ // Initialize current_character_.
+ // Load newline if index is at start, or previous character otherwise.
+ js::jit::Label start_regexp;
+ js::jit::Label load_previous_character;
+ masm_.branchPtr(Assembler::NotEqual, startIndexReg, ImmWord(0),
+ &load_previous_character);
+ masm_.movePtr(ImmWord('\n'), current_character_);
+ masm_.jump(&start_regexp);
+
+ masm_.bind(&load_previous_character);
+ LoadCurrentCharacterUnchecked(-1, 1);
+ masm_.bind(&start_regexp);
+
+ // Initialize captured registers with inputStart - 1
+ MOZ_ASSERT(num_capture_registers_ > 0);
+ Register inputStartMinusOneReg = temp2_;
+ masm_.loadPtr(inputStart(), inputStartMinusOneReg);
+ masm_.subPtr(Imm32(char_size()), inputStartMinusOneReg);
+ if (num_capture_registers_ > 8) {
+ masm_.movePtr(ImmWord(register_offset(0)), temp1_);
+ js::jit::Label init_loop;
+ masm_.bind(&init_loop);
+ masm_.storePtr(inputStartMinusOneReg, BaseIndex(masm_.getStackPointer(),
+ temp1_, js::jit::TimesOne));
+ masm_.addPtr(ImmWord(sizeof(void*)), temp1_);
+ masm_.branchPtr(Assembler::LessThan, temp1_,
+ ImmWord(register_offset(num_capture_registers_)),
+ &init_loop);
+ } else {
+ // Unroll the loop
+ for (int i = 0; i < num_capture_registers_; i++) {
+ masm_.storePtr(inputStartMinusOneReg, register_location(i));
+ }
+ }
+
+ // Initialize backtrack stack pointer
+ masm_.loadPtr(AbsoluteAddress(isolate()->top_of_regexp_stack()),
+ backtrack_stack_pointer_);
+ masm_.storePtr(backtrack_stack_pointer_, backtrackStackBase());
+}
+
+void SMRegExpMacroAssembler::successHandler() {
+ MOZ_ASSERT(success_label_.used());
+ masm_.bind(&success_label_);
+
+ // Copy captures to the MatchPairs pointed to by the InputOutputData.
+ // Captures are stored as positions, which are negative byte offsets
+ // from the end of the string. We must convert them to actual
+ // indices.
+ //
+ // Index: [ 0 ][ 1 ][ 2 ][ 3 ][ 4 ][ 5 ][END]
+ // Pos (1-byte): [-6 ][-5 ][-4 ][-3 ][-2 ][-1 ][ 0 ] // IS = -6
+ // Pos (2-byte): [-12][-10][-8 ][-6 ][-4 ][-2 ][ 0 ] // IS = -12
+ //
+ // To convert a position to an index, we subtract InputStart, and
+ // divide the result by char_size.
+ Register matchesReg = temp1_;
+ masm_.loadPtr(matches(), matchesReg);
+
+ Register inputStartReg = temp2_;
+ masm_.loadPtr(inputStart(), inputStartReg);
+
+ for (int i = 0; i < num_capture_registers_; i++) {
+ masm_.loadPtr(register_location(i), temp0_);
+ masm_.subPtr(inputStartReg, temp0_);
+ if (mode_ == UC16) {
+ masm_.rshiftPtrArithmetic(Imm32(1), temp0_);
+ }
+ masm_.store32(temp0_, Address(matchesReg, i * sizeof(int32_t)));
+ }
+
+ masm_.movePtr(ImmWord(js::RegExpRunStatus_Success), temp0_);
+ // This falls through to the exit handler.
+}
+
+void SMRegExpMacroAssembler::exitHandler() {
+ masm_.bind(&exit_label_);
+
+ if (temp0_ != js::jit::ReturnReg) {
+ masm_.movePtr(temp0_, js::jit::ReturnReg);
+ }
+
+ masm_.freeStack(frameSize_);
+
+ // Restore registers which were saved on entry
+ for (GeneralRegisterBackwardIterator iter(savedRegisters_); iter.more();
+ ++iter) {
+ masm_.Pop(*iter);
+ }
+
+#ifdef JS_CODEGEN_ARM64
+ // Now restore the value that was in the PSP register on entry, and return.
+
+ // Obtain the correct SP from the PSP.
+ masm_.Mov(js::jit::sp, js::jit::PseudoStackPointer64);
+
+ // Restore the saved value of the PSP register, this value is whatever the
+ // caller had saved in it, not any actual SP value, and it must not be
+ // overwritten subsequently.
+ masm_.Ldr(js::jit::PseudoStackPointer64,
+ vixl::MemOperand(js::jit::sp, 16, vixl::PostIndex));
+
+ // Perform a plain Ret(), as abiret() will move SP <- PSP and that is wrong.
+ masm_.Ret(vixl::lr);
+#else
+ masm_.abiret();
+#endif
+
+ if (exit_with_exception_label_.used()) {
+ masm_.bind(&exit_with_exception_label_);
+
+ // Exit with an error result to signal thrown exception
+ masm_.movePtr(ImmWord(js::RegExpRunStatus_Error), temp0_);
+ masm_.jump(&exit_label_);
+ }
+}
+
+void SMRegExpMacroAssembler::backtrackHandler() {
+ if (!backtrack_label_.used()) {
+ return;
+ }
+ masm_.bind(&backtrack_label_);
+ Backtrack();
+}
+
+void SMRegExpMacroAssembler::stackOverflowHandler() {
+ if (!stack_overflow_label_.used()) {
+ return;
+ }
+
+ // Called if the backtrack-stack limit has been hit.
+ // NOTE: depending on architecture, the call may have
+ // changed the stack pointer. We adjust for that below.
+ masm_.bind(&stack_overflow_label_);
+
+ // Load argument
+ masm_.movePtr(ImmPtr(isolate()->regexp_stack()), temp1_);
+
+ // Save registers before calling C function
+ LiveGeneralRegisterSet volatileRegs(GeneralRegisterSet::Volatile());
+
+#ifdef JS_USE_LINK_REGISTER
+ masm.pushReturnAddress();
+#endif
+
+ // Adjust for the return address on the stack.
+ size_t frameOffset = sizeof(void*);
+
+ volatileRegs.takeUnchecked(temp0_);
+ volatileRegs.takeUnchecked(temp1_);
+ masm_.PushRegsInMask(volatileRegs);
+
+ masm_.setupUnalignedABICall(temp0_);
+ masm_.passABIArg(temp1_);
+ masm_.callWithABI(JS_FUNC_TO_DATA_PTR(void*, GrowBacktrackStack));
+ masm_.storeCallBoolResult(temp0_);
+
+ masm_.PopRegsInMask(volatileRegs);
+
+ // If GrowBacktrackStack returned false, we have failed to grow the
+ // stack, and must exit with a stack-overflow exception. Do this in
+ // the caller so that the stack is adjusted by our return instruction.
+ js::jit::Label overflow_return;
+ masm_.branchTest32(Assembler::Zero, temp0_, temp0_, &overflow_return);
+
+ // Otherwise, store the new backtrack stack base and recompute the new
+ // top of the stack.
+ Address bsbAddress(masm_.getStackPointer(),
+ offsetof(FrameData, backtrackStackBase) + frameOffset);
+ masm_.subPtr(bsbAddress, backtrack_stack_pointer_);
+
+ masm_.loadPtr(AbsoluteAddress(isolate()->top_of_regexp_stack()), temp1_);
+ masm_.storePtr(temp1_, bsbAddress);
+ masm_.addPtr(temp1_, backtrack_stack_pointer_);
+
+ // Resume execution in calling code.
+ masm_.bind(&overflow_return);
+ masm_.ret();
+}
+
+// This is only used by tracing code.
+// The return value doesn't matter.
+RegExpMacroAssembler::IrregexpImplementation
+SMRegExpMacroAssembler::Implementation() {
+ return kBytecodeImplementation;
+}
+
+/*static */
+uint32_t SMRegExpMacroAssembler::CaseInsensitiveCompareStrings(
+ const char16_t* substring1, const char16_t* substring2, size_t byteLength) {
+ js::AutoUnsafeCallWithABI unsafe;
+
+ MOZ_ASSERT(byteLength % sizeof(char16_t) == 0);
+ size_t length = byteLength / sizeof(char16_t);
+
+ for (size_t i = 0; i < length; i++) {
+ char16_t c1 = substring1[i];
+ char16_t c2 = substring2[i];
+ if (c1 != c2) {
+ c1 = js::unicode::ToUpperCase(c1);
+ c2 = js::unicode::ToUpperCase(c2);
+ if (c1 != c2) {
+ return 0;
+ }
+ }
+ }
+
+ return 1;
+}
+
+/*static */
+uint32_t SMRegExpMacroAssembler::CaseInsensitiveCompareUCStrings(
+ const char16_t* substring1, const char16_t* substring2, size_t byteLength) {
+ js::AutoUnsafeCallWithABI unsafe;
+
+ MOZ_ASSERT(byteLength % sizeof(char16_t) == 0);
+ size_t length = byteLength / sizeof(char16_t);
+
+ for (size_t i = 0; i < length; i++) {
+ char16_t c1 = substring1[i];
+ char16_t c2 = substring2[i];
+ if (c1 != c2) {
+ c1 = js::unicode::FoldCase(c1);
+ c2 = js::unicode::FoldCase(c2);
+ if (c1 != c2) {
+ return 0;
+ }
+ }
+ }
+
+ return 1;
+}
+
+/* static */
+bool SMRegExpMacroAssembler::GrowBacktrackStack(RegExpStack* regexp_stack) {
+ js::AutoUnsafeCallWithABI unsafe;
+ size_t size = regexp_stack->stack_capacity();
+ return !!regexp_stack->EnsureCapacity(size * 2);
+}
+
+} // namespace internal
+} // namespace v8
diff --git a/js/src/regexp/regexp-shim.cc b/js/src/regexp/regexp-shim.cc
index 773c17eadd..3f3fa40eb0 100644
--- a/js/src/regexp/regexp-shim.cc
+++ b/js/src/regexp/regexp-shim.cc
@@ -11,6 +11,7 @@
#include <iostream>
#include "regexp/regexp-shim.h"
+#include "regexp/regexp-stack.h"
namespace v8 {
namespace internal {
@@ -73,6 +74,8 @@ Handle<T>::Handle(T object, Isolate* isolate)
: location_(isolate->getHandleLocation(JS::Value(object))) {}
template Handle<ByteArray>::Handle(ByteArray b, Isolate* isolate);
+template Handle<HeapObject>::Handle(JS::Value v, Isolate* isolate);
+template Handle<JSRegExp>::Handle(JSRegExp re, Isolate* isolate);
template Handle<String>::Handle(String s, Isolate* isolate);
template <typename T>
@@ -150,6 +153,10 @@ std::unique_ptr<char[]> String::ToCString() {
return std::unique_ptr<char[]>();
}
+byte* Isolate::top_of_regexp_stack() const {
+ return reinterpret_cast<byte*>(regexpStack_->memory_top_address_address());
+}
+
Handle<ByteArray> Isolate::NewByteArray(int length, AllocationType alloc) {
MOZ_RELEASE_ASSERT(length >= 0);
diff --git a/js/src/regexp/regexp-shim.h b/js/src/regexp/regexp-shim.h
index 462e396f40..942fcf733f 100644
--- a/js/src/regexp/regexp-shim.h
+++ b/js/src/regexp/regexp-shim.h
@@ -22,7 +22,9 @@
#include <cctype>
#include "jit/Label.h"
+#include "jit/shared/Assembler-shared.h"
#include "js/Value.h"
+#include "regexp/RegExpTypes.h"
#include "regexp/util/flags.h"
#include "regexp/util/vector.h"
#include "regexp/util/zone.h"
@@ -571,10 +573,8 @@ class ByteArray : public HeapObject {
ByteArrayData* inner() const {
return static_cast<ByteArrayData*>(value_.toPrivate());
}
- PseudoHandle<ByteArrayData> takeOwnership(Isolate* isolate);
-
- friend class SMRegExpMacroAssembler;
public:
+ PseudoHandle<ByteArrayData> takeOwnership(Isolate* isolate);
byte get(uint32_t index) {
MOZ_ASSERT(index < length());
return inner()->data()[index];
@@ -674,15 +674,19 @@ class MOZ_NONHEAP_CLASS Handle {
};
inline ObjectRef operator->() const { return ObjectRef{**this}; }
+ static Handle<T> fromHandleValue(JS::HandleValue handle) {
+ return Handle(handle.address());
+ }
+
private:
- Handle(JS::Value* location) : location_(location) {}
+ Handle(const JS::Value* location) : location_(location) {}
template <typename>
friend class Handle;
template <typename>
friend class MaybeHandle;
- JS::Value* location_;
+ const JS::Value* location_;
};
// A Handle can be converted into a MaybeHandle. Converting a MaybeHandle
@@ -985,9 +989,13 @@ using Factory = Isolate;
class Isolate {
public:
//********** Isolate code **********//
- RegExpStack* regexp_stack() const { return regexp_stack_; }
- bool has_pending_exception() { return cx()->isExceptionPending(); }
- void StackOverflow() { js::ReportOverRecursed(cx()); }
+ RegExpStack* regexp_stack() const { return regexpStack_; }
+ byte* top_of_regexp_stack() const;
+
+ // This is called from inside no-GC code. Instead of suppressing GC
+ // to allocate the error, we return false from Execute and call
+ // ReportOverRecursed in the caller.
+ void StackOverflow() {}
#ifndef V8_INTL_SUPPORT
unibrow::Mapping<unibrow::Ecma262UnCanonicalize>* jsregexp_uncanonicalize() {
@@ -1066,7 +1074,7 @@ private:
friend class HandleScope;
JSContext* cx_;
- RegExpStack* regexp_stack_;
+ RegExpStack* regexpStack_;
Counters counters_;
};
@@ -1101,7 +1109,6 @@ class Code : public HeapObject {
c.value_ = JS::PrivateGCThingValue(JS::Value(object).toGCThing());
return c;
}
-private:
js::jit::JitCode* inner() {
return value_.toGCThing()->as<js::jit::JitCode>();
}
@@ -1124,7 +1131,7 @@ class Label {
public:
Label() : inner_(js::jit::Label()) {}
- operator js::jit::Label*() { return &inner_; }
+ js::jit::Label* inner() { return &inner_; }
void Unuse() { inner_.reset(); }
@@ -1138,6 +1145,9 @@ class Label {
private:
js::jit::Label inner_;
+ js::jit::CodeOffset patchOffset_;
+
+ friend class SMRegExpMacroAssembler;
};
// TODO: Map flags to jitoptions