summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorwolfbeast <mcwerewolf@wolfbeast.com>2020-01-19 14:38:36 +0100
committerwolfbeast <mcwerewolf@wolfbeast.com>2020-01-19 14:44:48 +0100
commitea51616e31da82241c707407edbb82c20f84d319 (patch)
treeac4de72c0c983cfd712911f295837aed87487006
parent35dc8e5abb3a9f8cbbab4da5e57754b37dd54d59 (diff)
downloaduxp-ea51616e31da82241c707407edbb82c20f84d319.tar.gz
Issue #1362 - Revert "Implement regular expression lookbehind"
This reverts commit fa473930f424bf17a9e545b601c84dd2e61364e3.
-rw-r--r--js/src/irregexp/NativeRegExpMacroAssembler.cpp8
-rw-r--r--js/src/irregexp/NativeRegExpMacroAssembler.h7
-rw-r--r--js/src/irregexp/RegExpAST.cpp8
-rw-r--r--js/src/irregexp/RegExpAST.h33
-rw-r--r--js/src/irregexp/RegExpBytecode.h23
-rw-r--r--js/src/irregexp/RegExpEngine.cpp151
-rw-r--r--js/src/irregexp/RegExpEngine.h31
-rw-r--r--js/src/irregexp/RegExpInterpreter.cpp74
-rw-r--r--js/src/irregexp/RegExpMacroAssembler.cpp17
-rw-r--r--js/src/irregexp/RegExpMacroAssembler.h15
-rw-r--r--js/src/irregexp/RegExpParser.cpp130
-rw-r--r--js/src/irregexp/RegExpParser.h19
12 files changed, 157 insertions, 359 deletions
diff --git a/js/src/irregexp/NativeRegExpMacroAssembler.cpp b/js/src/irregexp/NativeRegExpMacroAssembler.cpp
index e17eecb9bb..0fb5072973 100644
--- a/js/src/irregexp/NativeRegExpMacroAssembler.cpp
+++ b/js/src/irregexp/NativeRegExpMacroAssembler.cpp
@@ -582,7 +582,7 @@ NativeRegExpMacroAssembler::CheckAtStart(Label* on_at_start)
}
void
-NativeRegExpMacroAssembler::CheckNotAtStart(int cp_offset, Label* on_not_at_start)
+NativeRegExpMacroAssembler::CheckNotAtStart(Label* on_not_at_start)
{
JitSpew(SPEW_PREFIX "CheckNotAtStart");
@@ -673,7 +673,7 @@ NativeRegExpMacroAssembler::CheckGreedyLoop(Label* on_tos_equals_current_positio
}
void
-NativeRegExpMacroAssembler::CheckNotBackReference(int start_reg, bool read_backward, Label* on_no_match)
+NativeRegExpMacroAssembler::CheckNotBackReference(int start_reg, Label* on_no_match)
{
JitSpew(SPEW_PREFIX "CheckNotBackReference(%d)", start_reg);
@@ -744,8 +744,8 @@ NativeRegExpMacroAssembler::CheckNotBackReference(int start_reg, bool read_backw
}
void
-NativeRegExpMacroAssembler::CheckNotBackReferenceIgnoreCase(int start_reg, bool read_backward,
- Label* on_no_match, bool unicode)
+NativeRegExpMacroAssembler::CheckNotBackReferenceIgnoreCase(int start_reg, Label* on_no_match,
+ bool unicode)
{
JitSpew(SPEW_PREFIX "CheckNotBackReferenceIgnoreCase(%d, %d)", start_reg, unicode);
diff --git a/js/src/irregexp/NativeRegExpMacroAssembler.h b/js/src/irregexp/NativeRegExpMacroAssembler.h
index fc582dccf2..7a72e252ff 100644
--- a/js/src/irregexp/NativeRegExpMacroAssembler.h
+++ b/js/src/irregexp/NativeRegExpMacroAssembler.h
@@ -105,10 +105,9 @@ class MOZ_STACK_CLASS NativeRegExpMacroAssembler final : public RegExpMacroAssem
void CheckCharacterGT(char16_t limit, jit::Label* on_greater);
void CheckCharacterLT(char16_t limit, jit::Label* on_less);
void CheckGreedyLoop(jit::Label* on_tos_equals_current_position);
- void CheckNotAtStart(int cp_offset, jit::Label* on_not_at_start);
- void CheckNotBackReference(int start_reg, bool read_backward, jit::Label* on_no_match);
- void CheckNotBackReferenceIgnoreCase(int start_reg, bool read_backward,
- jit::Label* on_no_match, bool unicode);
+ void CheckNotAtStart(jit::Label* on_not_at_start);
+ void CheckNotBackReference(int start_reg, jit::Label* on_no_match);
+ void CheckNotBackReferenceIgnoreCase(int start_reg, jit::Label* on_no_match, bool unicode);
void CheckNotCharacter(unsigned c, jit::Label* on_not_equal);
void CheckNotCharacterAfterAnd(unsigned c, unsigned and_with, jit::Label* on_not_equal);
void CheckNotCharacterAfterMinusAnd(char16_t c, char16_t minus, char16_t and_with,
diff --git a/js/src/irregexp/RegExpAST.cpp b/js/src/irregexp/RegExpAST.cpp
index 43867c3123..8dfd99057b 100644
--- a/js/src/irregexp/RegExpAST.cpp
+++ b/js/src/irregexp/RegExpAST.cpp
@@ -250,16 +250,16 @@ RegExpCapture::CaptureRegisters()
}
// ----------------------------------------------------------------------------
-// RegExpLookaround
+// RegExpLookahead
Interval
-RegExpLookaround::CaptureRegisters()
+RegExpLookahead::CaptureRegisters()
{
return body()->CaptureRegisters();
}
bool
-RegExpLookaround::IsAnchoredAtStart()
+RegExpLookahead::IsAnchoredAtStart()
{
- return is_positive() && type() == LOOKAHEAD && body()->IsAnchoredAtStart();
+ return is_positive() && body()->IsAnchoredAtStart();
}
diff --git a/js/src/irregexp/RegExpAST.h b/js/src/irregexp/RegExpAST.h
index 6f59842bcb..7bda6fc7e6 100644
--- a/js/src/irregexp/RegExpAST.h
+++ b/js/src/irregexp/RegExpAST.h
@@ -360,7 +360,6 @@ class RegExpCapture : public RegExpTree
virtual int min_match() { return body_->min_match(); }
virtual int max_match() { return body_->max_match(); }
RegExpTree* body() { return body_; }
- void set_body(RegExpTree* body) { body_ = body; }
int index() { return index_; }
static int StartRegister(int index) { return index * 2; }
static int EndRegister(int index) { return index * 2 + 1; }
@@ -370,29 +369,25 @@ class RegExpCapture : public RegExpTree
int index_;
};
-class RegExpLookaround : public RegExpTree
+class RegExpLookahead : public RegExpTree
{
public:
- enum Type { LOOKAHEAD, LOOKBEHIND };
-
- RegExpLookaround(RegExpTree* body,
- bool is_positive,
- int capture_count,
- int capture_from,
- Type type)
+ RegExpLookahead(RegExpTree* body,
+ bool is_positive,
+ int capture_count,
+ int capture_from)
: body_(body),
is_positive_(is_positive),
capture_count_(capture_count),
- capture_from_(capture_from),
- type_(type)
+ capture_from_(capture_from)
{}
virtual void* Accept(RegExpVisitor* visitor, void* data);
virtual RegExpNode* ToNode(RegExpCompiler* compiler,
RegExpNode* on_success);
- virtual RegExpLookaround* AsLookaround();
+ virtual RegExpLookahead* AsLookahead();
virtual Interval CaptureRegisters();
- virtual bool IsLookaround();
+ virtual bool IsLookahead();
virtual bool IsAnchoredAtStart();
virtual int min_match() { return 0; }
virtual int max_match() { return 0; }
@@ -400,14 +395,12 @@ class RegExpLookaround : public RegExpTree
bool is_positive() { return is_positive_; }
int capture_count() { return capture_count_; }
int capture_from() { return capture_from_; }
- Type type() { return type_; }
private:
RegExpTree* body_;
bool is_positive_;
int capture_count_;
int capture_from_;
- Type type_;
};
typedef InfallibleVector<RegExpCapture*, 1> RegExpCaptureVector;
@@ -424,14 +417,8 @@ class RegExpBackReference : public RegExpTree
RegExpNode* on_success);
virtual RegExpBackReference* AsBackReference();
virtual bool IsBackReference();
- virtual int min_match() override { return 0; }
- // The capture may not be completely parsed yet, if the reference occurs
- // before the capture. In the ordinary case, nothing has been captured yet,
- // so the back reference must have the length 0. If the back reference is
- // inside a lookbehind, effectively making it a forward reference, we return
- virtual int max_match() override {
- return capture_->body() ? capture_->max_match() : 0;
- }
+ virtual int min_match() { return 0; }
+ virtual int max_match() { return capture_->max_match(); }
int index() { return capture_->index(); }
RegExpCapture* capture() { return capture_; }
private:
diff --git a/js/src/irregexp/RegExpBytecode.h b/js/src/irregexp/RegExpBytecode.h
index ea3f80b4f0..f31b78c593 100644
--- a/js/src/irregexp/RegExpBytecode.h
+++ b/js/src/irregexp/RegExpBytecode.h
@@ -82,19 +82,16 @@ V(CHECK_LT, 35, 8) /* bc8 pad8 uc16 addr32 */ \
V(CHECK_GT, 36, 8) /* bc8 pad8 uc16 addr32 */ \
V(CHECK_NOT_BACK_REF, 37, 8) /* bc8 reg_idx24 addr32 */ \
V(CHECK_NOT_BACK_REF_NO_CASE, 38, 8) /* bc8 reg_idx24 addr32 */ \
-V(CHECK_NOT_BACK_REF_BACKWARD, 39, 8) /* bc8 reg_idx24 addr32 */ \
-V(CHECK_NOT_BACK_REF_NO_CASE_BACKWARD, 40, 8) /* bc8 reg_idx24 addr32 */ \
-V(CHECK_NOT_REGS_EQUAL, 41, 12) /* bc8 regidx24 reg_idx32 addr32 */ \
-V(CHECK_REGISTER_LT, 42, 12) /* bc8 reg_idx24 value32 addr32 */ \
-V(CHECK_REGISTER_GE, 43, 12) /* bc8 reg_idx24 value32 addr32 */ \
-V(CHECK_REGISTER_EQ_POS, 44, 8) /* bc8 reg_idx24 addr32 */ \
-V(CHECK_AT_START, 45, 8) /* bc8 pad24 addr32 */ \
-V(CHECK_NOT_AT_START, 46, 8) /* bc8 pad24 addr32 */ \
-V(CHECK_GREEDY, 47, 8) /* bc8 pad24 addr32 */ \
-V(ADVANCE_CP_AND_GOTO, 48, 8) /* bc8 offset24 addr32 */ \
-V(SET_CURRENT_POSITION_FROM_END, 49, 4) /* bc8 idx24 */ \
-V(CHECK_NOT_BACK_REF_NO_CASE_UNICODE, 50, 8) /* bc8 reg_idx24 addr32 */ \
-V(CHECK_NOT_BACK_REF_NO_CASE_BACKWARD_UNICODE, 51, 8) /* bc8 reg_idx24 addr32 */
+V(CHECK_NOT_REGS_EQUAL, 39, 12) /* bc8 regidx24 reg_idx32 addr32 */ \
+V(CHECK_REGISTER_LT, 40, 12) /* bc8 reg_idx24 value32 addr32 */ \
+V(CHECK_REGISTER_GE, 41, 12) /* bc8 reg_idx24 value32 addr32 */ \
+V(CHECK_REGISTER_EQ_POS, 42, 8) /* bc8 reg_idx24 addr32 */ \
+V(CHECK_AT_START, 43, 8) /* bc8 pad24 addr32 */ \
+V(CHECK_NOT_AT_START, 44, 8) /* bc8 pad24 addr32 */ \
+V(CHECK_GREEDY, 45, 8) /* bc8 pad24 addr32 */ \
+V(ADVANCE_CP_AND_GOTO, 46, 8) /* bc8 offset24 addr32 */ \
+V(SET_CURRENT_POSITION_FROM_END, 47, 4) /* bc8 idx24 */ \
+V(CHECK_NOT_BACK_REF_NO_CASE_UNICODE, 48, 8) /* bc8 reg_idx24 addr32 */
#define DECLARE_BYTECODES(name, code, length) \
static const int BC_##name = code;
diff --git a/js/src/irregexp/RegExpEngine.cpp b/js/src/irregexp/RegExpEngine.cpp
index 62f94c3e72..4d691a5dc9 100644
--- a/js/src/irregexp/RegExpEngine.cpp
+++ b/js/src/irregexp/RegExpEngine.cpp
@@ -721,8 +721,6 @@ ActionNode::EmptyMatchCheck(int start_register,
int
TextNode::EatsAtLeast(int still_to_find, int budget, bool not_at_start)
{
- if (read_backward())
- return 0;
int answer = Length();
if (answer >= still_to_find)
return answer;
@@ -738,7 +736,8 @@ TextNode::EatsAtLeast(int still_to_find, int budget, bool not_at_start)
int
TextNode::GreedyLoopTextLength()
{
- return Length();
+ TextElement elm = elements()[elements().length() - 1];
+ return elm.cp_offset() + elm.length();
}
RegExpNode*
@@ -888,8 +887,6 @@ AssertionNode::FillInBMInfo(int offset, int budget, BoyerMooreLookahead* bm, boo
int
BackReferenceNode::EatsAtLeast(int still_to_find, int budget, bool not_at_start)
{
- if (read_backward())
- return 0;
if (budget <= 0)
return 0;
return on_success()->EatsAtLeast(still_to_find, budget - 1, not_at_start);
@@ -1581,9 +1578,6 @@ class irregexp::RegExpCompiler
current_expansion_factor_ = value;
}
- bool read_backward() { return read_backward_; }
- void set_read_backward(bool value) { read_backward_ = value; }
-
JSContext* cx() const { return cx_; }
LifoAlloc* alloc() const { return alloc_; }
@@ -1601,7 +1595,6 @@ class irregexp::RegExpCompiler
bool unicode_;
bool reg_exp_too_big_;
int current_expansion_factor_;
- bool read_backward_;
FrequencyCollator frequency_collator_;
JSContext* cx_;
LifoAlloc* alloc_;
@@ -1631,7 +1624,6 @@ RegExpCompiler::RegExpCompiler(JSContext* cx, LifoAlloc* alloc, int capture_coun
unicode_(unicode),
reg_exp_too_big_(false),
current_expansion_factor_(1),
- read_backward_(false),
frequency_collator_(),
cx_(cx),
alloc_(alloc)
@@ -1755,7 +1747,7 @@ irregexp::CompilePattern(JSContext* cx, RegExpShared* shared, RegExpCompileData*
// at the start of input.
ChoiceNode* first_step_node = alloc.newInfallible<ChoiceNode>(&alloc, 2);
RegExpNode* char_class =
- alloc.newInfallible<TextNode>(alloc.newInfallible<RegExpCharacterClass>('*'), false, loop_node);
+ alloc.newInfallible<TextNode>(alloc.newInfallible<RegExpCharacterClass>('*'), loop_node);
first_step_node->AddAlternative(GuardedAlternative(captured_body));
first_step_node->AddAlternative(GuardedAlternative(char_class));
node = first_step_node;
@@ -1858,19 +1850,19 @@ RegExpAtom::ToNode(RegExpCompiler* compiler, RegExpNode* on_success)
TextElementVector* elms =
compiler->alloc()->newInfallible<TextElementVector>(*compiler->alloc());
elms->append(TextElement::Atom(this));
- return compiler->alloc()->newInfallible<TextNode>(elms, compiler->read_backward(), on_success);
+ return compiler->alloc()->newInfallible<TextNode>(elms, on_success);
}
RegExpNode*
RegExpText::ToNode(RegExpCompiler* compiler, RegExpNode* on_success)
{
- return compiler->alloc()->newInfallible<TextNode>(&elements_, compiler->read_backward(), on_success);
+ return compiler->alloc()->newInfallible<TextNode>(&elements_, on_success);
}
RegExpNode*
RegExpCharacterClass::ToNode(RegExpCompiler* compiler, RegExpNode* on_success)
{
- return compiler->alloc()->newInfallible<TextNode>(this, compiler->read_backward(), on_success);
+ return compiler->alloc()->newInfallible<TextNode>(this, on_success);
}
RegExpNode*
@@ -2011,8 +2003,7 @@ RegExpQuantifier::ToNode(int min,
alternation->AddAlternative(GuardedAlternative(body->ToNode(compiler, answer)));
}
answer = alternation;
- if (not_at_start && !compiler->read_backward())
- alternation->set_not_at_start();
+ if (not_at_start) alternation->set_not_at_start();
}
return answer;
}
@@ -2024,9 +2015,8 @@ RegExpQuantifier::ToNode(int min,
int reg_ctr = needs_counter
? compiler->AllocateRegister()
: RegExpCompiler::kNoRegister;
- LoopChoiceNode* center = alloc->newInfallible<LoopChoiceNode>(alloc, body->min_match() == 0,
- compiler->read_backward());
- if (not_at_start && !compiler->read_backward())
+ LoopChoiceNode* center = alloc->newInfallible<LoopChoiceNode>(alloc, body->min_match() == 0);
+ if (not_at_start)
center->set_not_at_start();
RegExpNode* loop_return = needs_counter
? static_cast<RegExpNode*>(ActionNode::IncrementRegister(reg_ctr, center))
@@ -2102,7 +2092,7 @@ RegExpAssertion::ToNode(RegExpCompiler* compiler,
CharacterRange::AddClassEscape(alloc, 'n', newline_ranges);
RegExpCharacterClass* newline_atom = alloc->newInfallible<RegExpCharacterClass>('n');
TextNode* newline_matcher =
- alloc->newInfallible<TextNode>(newline_atom, false,
+ alloc->newInfallible<TextNode>(newline_atom,
ActionNode::PositiveSubmatchSuccess(stack_pointer_register,
position_register,
0, // No captures inside.
@@ -2134,7 +2124,6 @@ RegExpBackReference::ToNode(RegExpCompiler* compiler, RegExpNode* on_success)
{
return compiler->alloc()->newInfallible<BackReferenceNode>(RegExpCapture::StartRegister(index()),
RegExpCapture::EndRegister(index()),
- compiler->read_backward(),
on_success);
}
@@ -2145,7 +2134,7 @@ RegExpEmpty::ToNode(RegExpCompiler* compiler, RegExpNode* on_success)
}
RegExpNode*
-RegExpLookaround::ToNode(RegExpCompiler* compiler, RegExpNode* on_success)
+RegExpLookahead::ToNode(RegExpCompiler* compiler, RegExpNode* on_success)
{
int stack_pointer_register = compiler->AllocateRegister();
int position_register = compiler->AllocateRegister();
@@ -2156,10 +2145,6 @@ RegExpLookaround::ToNode(RegExpCompiler* compiler, RegExpNode* on_success)
int register_start =
register_of_first_capture + capture_from_ * registers_per_capture;
- RegExpNode* result;
- bool was_reading_backward = compiler->read_backward();
- compiler->set_read_backward(type() == LOOKBEHIND);
-
if (is_positive()) {
RegExpNode* bodyNode =
body()->ToNode(compiler,
@@ -2168,39 +2153,37 @@ RegExpLookaround::ToNode(RegExpCompiler* compiler, RegExpNode* on_success)
register_count,
register_start,
on_success));
- result = ActionNode::BeginSubmatch(stack_pointer_register,
- position_register,
- bodyNode);
- } else {
- // We use a ChoiceNode for a negative lookahead because it has most of
- // the characteristics we need. It has the body of the lookahead as its
- // first alternative and the expression after the lookahead of the second
- // alternative. If the first alternative succeeds then the
- // NegativeSubmatchSuccess will unwind the stack including everything the
- // choice node set up and backtrack. If the first alternative fails then
- // the second alternative is tried, which is exactly the desired result
- // for a negative lookahead. The NegativeLookaheadChoiceNode is a special
- // ChoiceNode that knows to ignore the first exit when calculating quick
- // checks.
- LifoAlloc* alloc = compiler->alloc();
-
- RegExpNode* success =
- alloc->newInfallible<NegativeSubmatchSuccess>(alloc,
- stack_pointer_register,
- position_register,
- register_count,
- register_start);
- GuardedAlternative body_alt(body()->ToNode(compiler, success));
-
- ChoiceNode* choice_node =
- alloc->newInfallible<NegativeLookaheadChoiceNode>(alloc, body_alt, GuardedAlternative(on_success));
-
- result = ActionNode::BeginSubmatch(stack_pointer_register,
+ return ActionNode::BeginSubmatch(stack_pointer_register,
position_register,
- choice_node);
- }
- compiler->set_read_backward(was_reading_backward);
- return result;
+ bodyNode);
+ }
+
+ // We use a ChoiceNode for a negative lookahead because it has most of
+ // the characteristics we need. It has the body of the lookahead as its
+ // first alternative and the expression after the lookahead of the second
+ // alternative. If the first alternative succeeds then the
+ // NegativeSubmatchSuccess will unwind the stack including everything the
+ // choice node set up and backtrack. If the first alternative fails then
+ // the second alternative is tried, which is exactly the desired result
+ // for a negative lookahead. The NegativeLookaheadChoiceNode is a special
+ // ChoiceNode that knows to ignore the first exit when calculating quick
+ // checks.
+ LifoAlloc* alloc = compiler->alloc();
+
+ RegExpNode* success =
+ alloc->newInfallible<NegativeSubmatchSuccess>(alloc,
+ stack_pointer_register,
+ position_register,
+ register_count,
+ register_start);
+ GuardedAlternative body_alt(body()->ToNode(compiler, success));
+
+ ChoiceNode* choice_node =
+ alloc->newInfallible<NegativeLookaheadChoiceNode>(alloc, body_alt, GuardedAlternative(on_success));
+
+ return ActionNode::BeginSubmatch(stack_pointer_register,
+ position_register,
+ choice_node);
}
RegExpNode*
@@ -2215,14 +2198,8 @@ RegExpCapture::ToNode(RegExpTree* body,
RegExpCompiler* compiler,
RegExpNode* on_success)
{
- MOZ_ASSERT(body);
int start_reg = RegExpCapture::StartRegister(index);
int end_reg = RegExpCapture::EndRegister(index);
- if (compiler->read_backward()) {
- // std::swap(start_reg, end_reg);
- start_reg = RegExpCapture::EndRegister(index);
- end_reg = RegExpCapture::StartRegister(index);
- }
RegExpNode* store_end = ActionNode::StorePosition(end_reg, true, on_success);
RegExpNode* body_node = body->ToNode(compiler, store_end);
return ActionNode::StorePosition(start_reg, true, body_node);
@@ -2233,15 +2210,8 @@ RegExpAlternative::ToNode(RegExpCompiler* compiler, RegExpNode* on_success)
{
const RegExpTreeVector& children = nodes();
RegExpNode* current = on_success;
- if (compiler->read_backward()) {
- for (int i = 0; i < children.length(); i++) {
- current = children[i]->ToNode(compiler, current);
- }
- } else {
- for (int i = children.length() - 1; i >= 0; i--) {
- current = children[i]->ToNode(compiler, current);
- }
- }
+ for (int i = children.length() - 1; i >= 0; i--)
+ current = children[i]->ToNode(compiler, current);
return current;
}
@@ -2794,6 +2764,7 @@ Trace::InvalidateCurrentCharacter()
void
Trace::AdvanceCurrentPositionInTrace(int by, RegExpCompiler* compiler)
{
+ MOZ_ASSERT(by > 0);
// We don't have an instruction for shifting the current character register
// down or for using a shifted value for anything so lets just forget that
// we preloaded any characters into it.
@@ -3138,9 +3109,9 @@ AssertionNode::Emit(RegExpCompiler* compiler, Trace* trace)
return;
}
if (trace->at_start() == Trace::UNKNOWN) {
- assembler->CheckNotAtStart(trace->cp_offset(), trace->backtrack());
+ assembler->CheckNotAtStart(trace->backtrack());
Trace at_start_trace = *trace;
- at_start_trace.set_at_start(Trace::TRUE_VALUE);
+ at_start_trace.set_at_start(true);
on_success()->Emit(compiler, &at_start_trace);
return;
}
@@ -3843,10 +3814,9 @@ TextNode::TextEmitPass(RegExpCompiler* compiler,
jit::Label* backtrack = trace->backtrack();
QuickCheckDetails* quick_check = trace->quick_check_performed();
int element_count = elements().length();
- int backward_offset = read_backward() ? -Length() : 0;
for (int i = preloaded ? 0 : element_count - 1; i >= 0; i--) {
TextElement elm = elements()[i];
- int cp_offset = trace->cp_offset() + elm.cp_offset() + backward_offset;
+ int cp_offset = trace->cp_offset() + elm.cp_offset();
if (elm.text_type() == TextElement::ATOM) {
const CharacterVector& quarks = elm.atom()->data();
for (int j = preloaded ? 0 : quarks.length() - 1; j >= 0; j--) {
@@ -3874,12 +3844,11 @@ TextNode::TextEmitPass(RegExpCompiler* compiler,
break;
}
if (emit_function != nullptr) {
- bool bounds_check = *checked_up_to < cp_offset + j || read_backward();
bool bound_checked = emit_function(compiler,
quarks[j],
backtrack,
cp_offset + j,
- bounds_check,
+ *checked_up_to < cp_offset + j,
preloaded);
if (bound_checked) UpdateBoundsCheck(cp_offset + j, checked_up_to);
}
@@ -3890,14 +3859,13 @@ TextNode::TextEmitPass(RegExpCompiler* compiler,
if (first_element_checked && i == 0) continue;
if (DeterminedAlready(quick_check, elm.cp_offset())) continue;
RegExpCharacterClass* cc = elm.char_class();
- bool bounds_check = *checked_up_to < cp_offset || read_backward();
EmitCharClass(alloc(),
assembler,
cc,
ascii,
backtrack,
cp_offset,
- bounds_check,
+ *checked_up_to < cp_offset,
preloaded);
UpdateBoundsCheck(cp_offset, checked_up_to);
}
@@ -3977,11 +3945,8 @@ TextNode::Emit(RegExpCompiler* compiler, Trace* trace)
}
Trace successor_trace(*trace);
- // If we advance backward, we may end up at the start.
- successor_trace.AdvanceCurrentPositionInTrace(
- read_backward() ? -Length() : Length(), compiler);
- successor_trace.set_at_start(read_backward() ? Trace::UNKNOWN
- : Trace::FALSE_VALUE);
+ successor_trace.set_at_start(false);
+ successor_trace.AdvanceCurrentPositionInTrace(Length(), compiler);
RecursionCheck rc(compiler);
on_success()->Emit(compiler, &successor_trace);
}
@@ -4153,8 +4118,6 @@ ChoiceNode::CalculatePreloadCharacters(RegExpCompiler* compiler, int eats_at_lea
RegExpNode*
TextNode::GetSuccessorOfOmnivorousTextNode(RegExpCompiler* compiler)
{
- if (read_backward()) return NULL;
-
if (elements().length() != 1)
return nullptr;
@@ -4202,7 +4165,7 @@ ChoiceNode::GreedyLoopTextLengthForAlternative(GuardedAlternative* alternative)
SeqRegExpNode* seq_node = static_cast<SeqRegExpNode*>(node);
node = seq_node->on_success();
}
- return read_backward() ? -length : length;
+ return length;
}
// Creates a list of AlternativeGenerations. If the list has a reasonable
@@ -4277,7 +4240,7 @@ ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace)
jit::Label greedy_loop_label;
Trace counter_backtrack_trace;
counter_backtrack_trace.set_backtrack(&greedy_loop_label);
- if (not_at_start()) counter_backtrack_trace.set_at_start(Trace::FALSE_VALUE);
+ if (not_at_start()) counter_backtrack_trace.set_at_start(false);
if (choice_count > 1 && text_length != kNodeIsTooComplexForGreedyLoops) {
// Here we have special handling for greedy loops containing only text nodes
@@ -4293,7 +4256,7 @@ ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace)
current_trace = &counter_backtrack_trace;
jit::Label greedy_match_failed;
Trace greedy_match_trace;
- if (not_at_start()) greedy_match_trace.set_at_start(Trace::FALSE_VALUE);
+ if (not_at_start()) greedy_match_trace.set_at_start(false);
greedy_match_trace.set_backtrack(&greedy_match_failed);
jit::Label loop_label;
macro_assembler->Bind(&loop_label);
@@ -4642,14 +4605,11 @@ BackReferenceNode::Emit(RegExpCompiler* compiler, Trace* trace)
MOZ_ASSERT(start_reg_ + 1 == end_reg_);
if (compiler->ignore_case()) {
assembler->CheckNotBackReferenceIgnoreCase(start_reg_,
- read_backward(),
trace->backtrack(),
compiler->unicode());
} else {
- assembler->CheckNotBackReference(start_reg_, read_backward(), trace->backtrack());
+ assembler->CheckNotBackReference(start_reg_, trace->backtrack());
}
- // We are going to advance backward, so we may end up at the start.
- if (read_backward()) trace->set_at_start(Trace::UNKNOWN);
on_success()->Emit(compiler, trace);
}
@@ -5017,6 +4977,7 @@ QuickCheckDetails::Clear()
void
QuickCheckDetails::Advance(int by, bool ascii)
{
+ MOZ_ASSERT(by >= 0);
if (by >= characters_) {
Clear();
return;
diff --git a/js/src/irregexp/RegExpEngine.h b/js/src/irregexp/RegExpEngine.h
index c4409dcca0..1a8fd4b220 100644
--- a/js/src/irregexp/RegExpEngine.h
+++ b/js/src/irregexp/RegExpEngine.h
@@ -119,7 +119,7 @@ InterpretCode(JSContext* cx, const uint8_t* byteCode, const CharT* chars, size_t
VISIT(Atom) \
VISIT(Quantifier) \
VISIT(Capture) \
- VISIT(Lookaround) \
+ VISIT(Lookahead) \
VISIT(BackReference) \
VISIT(Empty) \
VISIT(Text)
@@ -763,19 +763,15 @@ class TextNode : public SeqRegExpNode
{
public:
TextNode(TextElementVector* elements,
- bool read_backward,
RegExpNode* on_success)
: SeqRegExpNode(on_success),
- elements_(elements),
- read_backward_(read_backward)
+ elements_(elements)
{}
TextNode(RegExpCharacterClass* that,
- bool read_backward,
RegExpNode* on_success)
: SeqRegExpNode(on_success),
- elements_(alloc()->newInfallible<TextElementVector>(*alloc())),
- read_backward_(read_backward)
+ elements_(alloc()->newInfallible<TextElementVector>(*alloc()))
{
elements_->append(TextElement::CharClass(that));
}
@@ -788,7 +784,6 @@ class TextNode : public SeqRegExpNode
int characters_filled_in,
bool not_at_start);
TextElementVector& elements() { return *elements_; }
- bool read_backward() { return read_backward_; }
void MakeCaseIndependent(bool is_ascii, bool unicode);
virtual int GreedyLoopTextLength();
virtual RegExpNode* GetSuccessorOfOmnivorousTextNode(
@@ -819,7 +814,6 @@ class TextNode : public SeqRegExpNode
int* checked_up_to);
int Length();
TextElementVector* elements_;
- bool read_backward_;
};
class AssertionNode : public SeqRegExpNode
@@ -888,18 +882,15 @@ class BackReferenceNode : public SeqRegExpNode
public:
BackReferenceNode(int start_reg,
int end_reg,
- bool read_backward,
RegExpNode* on_success)
: SeqRegExpNode(on_success),
start_reg_(start_reg),
- end_reg_(end_reg),
- read_backward_(read_backward)
+ end_reg_(end_reg)
{}
virtual void Accept(NodeVisitor* visitor);
int start_register() { return start_reg_; }
int end_register() { return end_reg_; }
- bool read_backward() { return read_backward_; }
virtual void Emit(RegExpCompiler* compiler, Trace* trace);
virtual int EatsAtLeast(int still_to_find,
int recursion_depth,
@@ -918,7 +909,6 @@ class BackReferenceNode : public SeqRegExpNode
private:
int start_reg_;
int end_reg_;
- bool read_backward_;
};
class EndNode : public RegExpNode
@@ -1063,7 +1053,6 @@ class ChoiceNode : public RegExpNode
void set_being_calculated(bool b) { being_calculated_ = b; }
virtual bool try_to_emit_quick_check_for_alternative(int i) { return true; }
virtual RegExpNode* FilterASCII(int depth, bool ignore_case, bool unicode);
- virtual bool read_backward() { return false; }
protected:
int GreedyLoopTextLengthForAlternative(GuardedAlternative* alternative);
@@ -1122,13 +1111,11 @@ class NegativeLookaheadChoiceNode : public ChoiceNode
class LoopChoiceNode : public ChoiceNode
{
public:
- explicit LoopChoiceNode(LifoAlloc* alloc, bool body_can_be_zero_length,
- bool read_backward)
+ explicit LoopChoiceNode(LifoAlloc* alloc, bool body_can_be_zero_length)
: ChoiceNode(alloc, 2),
loop_node_(nullptr),
continue_node_(nullptr),
- body_can_be_zero_length_(body_can_be_zero_length),
- read_backward_(read_backward)
+ body_can_be_zero_length_(body_can_be_zero_length)
{}
void AddLoopAlternative(GuardedAlternative alt);
@@ -1146,7 +1133,6 @@ class LoopChoiceNode : public ChoiceNode
RegExpNode* loop_node() { return loop_node_; }
RegExpNode* continue_node() { return continue_node_; }
bool body_can_be_zero_length() { return body_can_be_zero_length_; }
- virtual bool read_backward() { return read_backward_; }
virtual void Accept(NodeVisitor* visitor);
virtual RegExpNode* FilterASCII(int depth, bool ignore_case, bool unicode);
@@ -1161,7 +1147,6 @@ class LoopChoiceNode : public ChoiceNode
RegExpNode* loop_node_;
RegExpNode* continue_node_;
bool body_can_be_zero_length_;
- bool read_backward_;
};
// Improve the speed that we scan for an initial point where a non-anchored
@@ -1437,8 +1422,8 @@ class Trace
}
TriBool at_start() { return at_start_; }
- void set_at_start(TriBool at_start) {
- at_start_ = at_start;
+ void set_at_start(bool at_start) {
+ at_start_ = at_start ? TRUE_VALUE : FALSE_VALUE;
}
jit::Label* backtrack() { return backtrack_; }
jit::Label* loop_label() { return loop_label_; }
diff --git a/js/src/irregexp/RegExpInterpreter.cpp b/js/src/irregexp/RegExpInterpreter.cpp
index d09b4671e4..7fd2d983a5 100644
--- a/js/src/irregexp/RegExpInterpreter.cpp
+++ b/js/src/irregexp/RegExpInterpreter.cpp
@@ -222,8 +222,8 @@ irregexp::InterpretCode(JSContext* cx, const uint8_t* byteCode, const CharT* cha
}
break;
BYTECODE(LOAD_CURRENT_CHAR) {
- int pos = current + (insn >> BYTECODE_SHIFT);
- if (pos >= (int)length || pos < 0) {
+ size_t pos = current + (insn >> BYTECODE_SHIFT);
+ if (pos >= length) {
pc = byteCode + Load32Aligned(pc + 4);
} else {
current_char = chars[pos];
@@ -238,8 +238,8 @@ irregexp::InterpretCode(JSContext* cx, const uint8_t* byteCode, const CharT* cha
break;
}
BYTECODE(LOAD_2_CURRENT_CHARS) {
- int pos = current + (insn >> BYTECODE_SHIFT);
- if (pos + 2 > (int)length || pos < 0) {
+ size_t pos = current + (insn >> BYTECODE_SHIFT);
+ if (pos + 2 > length) {
pc = byteCode + Load32Aligned(pc + 4);
} else {
CharT next = chars[pos + 1];
@@ -425,30 +425,6 @@ irregexp::InterpretCode(JSContext* cx, const uint8_t* byteCode, const CharT* cha
pc += BC_CHECK_NOT_BACK_REF_LENGTH;
break;
}
- BYTECODE(CHECK_NOT_BACK_REF_BACKWARD) {
- int from = registers[insn >> BYTECODE_SHIFT];
- int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from;
- if (from < 0 || len <= 0) {
- pc += BC_CHECK_NOT_BACK_REF_BACKWARD_LENGTH;
- break;
- }
- if (int(current) - len < 0) {
- pc = byteCode + Load32Aligned(pc + 4);
- break;
- } else {
- int i;
- for (i = 0; i < len; i++) {
- if (chars[from + i] != chars[int(current) - len + i]) {
- pc = byteCode + Load32Aligned(pc + 4);
- break;
- }
- }
- if (i < len) break;
- current -= len;
- }
- pc += BC_CHECK_NOT_BACK_REF_BACKWARD_LENGTH;
- break;
- }
BYTECODE(CHECK_NOT_BACK_REF_NO_CASE) {
int from = registers[insn >> BYTECODE_SHIFT];
int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from;
@@ -489,46 +465,6 @@ irregexp::InterpretCode(JSContext* cx, const uint8_t* byteCode, const CharT* cha
}
break;
}
- BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_BACKWARD) {
- int from = registers[insn >> BYTECODE_SHIFT];
- int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from;
- if (from < 0 || len <= 0) {
- pc += BC_CHECK_NOT_BACK_REF_NO_CASE_BACKWARD_LENGTH;
- break;
- }
- if (int(current) - len < 0) {
- pc = byteCode + Load32Aligned(pc + 4);
- break;
- }
- if (CaseInsensitiveCompareStrings(chars + from, chars + int(current) - len, len * sizeof(CharT))) {
- current -= len;
- pc += BC_CHECK_NOT_BACK_REF_NO_CASE_BACKWARD_LENGTH;
- } else {
- pc = byteCode + Load32Aligned(pc + 4);
- }
- break;
-
- }
- BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_BACKWARD_UNICODE) {
- int from = registers[insn >> BYTECODE_SHIFT];
- int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from;
- if (from < 0 || len <= 0) {
- pc += BC_CHECK_NOT_BACK_REF_NO_CASE_BACKWARD_LENGTH;
- break;
- }
- if (int(current) - len < 0) {
- pc = byteCode + Load32Aligned(pc + 4);
- break;
- }
- if (CaseInsensitiveCompareUCStrings(chars + from, chars + int(current) - len, len * sizeof(CharT))) {
- current -= len;
- pc += BC_CHECK_NOT_BACK_REF_NO_CASE_BACKWARD_LENGTH;
- } else {
- pc = byteCode + Load32Aligned(pc + 4);
- }
- break;
-
- }
BYTECODE(CHECK_AT_START)
if (current == 0)
pc = byteCode + Load32Aligned(pc + 4);
@@ -536,7 +472,7 @@ irregexp::InterpretCode(JSContext* cx, const uint8_t* byteCode, const CharT* cha
pc += BC_CHECK_AT_START_LENGTH;
break;
BYTECODE(CHECK_NOT_AT_START)
- if (current + (insn >> BYTECODE_SHIFT) == 0)
+ if (current == 0)
pc += BC_CHECK_NOT_AT_START_LENGTH;
else
pc = byteCode + Load32Aligned(pc + 4);
diff --git a/js/src/irregexp/RegExpMacroAssembler.cpp b/js/src/irregexp/RegExpMacroAssembler.cpp
index 6b1ceba8ad..d66d0d204a 100644
--- a/js/src/irregexp/RegExpMacroAssembler.cpp
+++ b/js/src/irregexp/RegExpMacroAssembler.cpp
@@ -226,37 +226,32 @@ InterpretedRegExpMacroAssembler::CheckGreedyLoop(jit::Label* on_tos_equals_curre
}
void
-InterpretedRegExpMacroAssembler::CheckNotAtStart(int cp_offset, jit::Label* on_not_at_start)
+InterpretedRegExpMacroAssembler::CheckNotAtStart(jit::Label* on_not_at_start)
{
- Emit(BC_CHECK_NOT_AT_START, cp_offset);
+ Emit(BC_CHECK_NOT_AT_START, 0);
EmitOrLink(on_not_at_start);
}
void
-InterpretedRegExpMacroAssembler::CheckNotBackReference(int start_reg, bool read_backward,
- jit::Label* on_no_match)
+InterpretedRegExpMacroAssembler::CheckNotBackReference(int start_reg, jit::Label* on_no_match)
{
MOZ_ASSERT(start_reg >= 0);
MOZ_ASSERT(start_reg <= kMaxRegister);
- Emit(read_backward ? BC_CHECK_NOT_BACK_REF_BACKWARD : BC_CHECK_NOT_BACK_REF,
- start_reg);
+ Emit(BC_CHECK_NOT_BACK_REF, start_reg);
EmitOrLink(on_no_match);
}
void
InterpretedRegExpMacroAssembler::CheckNotBackReferenceIgnoreCase(int start_reg,
- bool read_backward,
jit::Label* on_no_match,
bool unicode)
{
MOZ_ASSERT(start_reg >= 0);
MOZ_ASSERT(start_reg <= kMaxRegister);
if (unicode)
- Emit(read_backward ? BC_CHECK_NOT_BACK_REF_NO_CASE_BACKWARD_UNICODE : BC_CHECK_NOT_BACK_REF_NO_CASE_UNICODE,
- start_reg);
+ Emit(BC_CHECK_NOT_BACK_REF_NO_CASE_UNICODE, start_reg);
else
- Emit(read_backward ? BC_CHECK_NOT_BACK_REF_NO_CASE_BACKWARD : BC_CHECK_NOT_BACK_REF_NO_CASE,
- start_reg);
+ Emit(BC_CHECK_NOT_BACK_REF_NO_CASE, start_reg);
EmitOrLink(on_no_match);
}
diff --git a/js/src/irregexp/RegExpMacroAssembler.h b/js/src/irregexp/RegExpMacroAssembler.h
index c5def92f29..dca2edf905 100644
--- a/js/src/irregexp/RegExpMacroAssembler.h
+++ b/js/src/irregexp/RegExpMacroAssembler.h
@@ -110,10 +110,10 @@ class MOZ_STACK_CLASS RegExpMacroAssembler
virtual void CheckCharacterGT(char16_t limit, jit::Label* on_greater) = 0;
virtual void CheckCharacterLT(char16_t limit, jit::Label* on_less) = 0;
virtual void CheckGreedyLoop(jit::Label* on_tos_equals_current_position) = 0;
- virtual void CheckNotAtStart(int cp_offset, jit::Label* on_not_at_start) = 0;
- virtual void CheckNotBackReference(int start_reg, bool read_backward, jit::Label* on_no_match) = 0;
- virtual void CheckNotBackReferenceIgnoreCase(int start_reg, bool read_backward,
- jit::Label* on_no_match, bool unicode) = 0;
+ virtual void CheckNotAtStart(jit::Label* on_not_at_start) = 0;
+ virtual void CheckNotBackReference(int start_reg, jit::Label* on_no_match) = 0;
+ virtual void CheckNotBackReferenceIgnoreCase(int start_reg, jit::Label* on_no_match,
+ bool unicode) = 0;
// Check the current character for a match with a literal character. If we
// fail to match then goto the on_failure label. End of input always
@@ -245,10 +245,9 @@ class MOZ_STACK_CLASS InterpretedRegExpMacroAssembler final : public RegExpMacro
void CheckCharacterGT(char16_t limit, jit::Label* on_greater);
void CheckCharacterLT(char16_t limit, jit::Label* on_less);
void CheckGreedyLoop(jit::Label* on_tos_equals_current_position);
- void CheckNotAtStart(int cp_offset, jit::Label* on_not_at_start);
- void CheckNotBackReference(int start_reg, bool read_backward, jit::Label* on_no_match);
- void CheckNotBackReferenceIgnoreCase(int start_reg, bool read_backward,
- jit::Label* on_no_match, bool unicode);
+ void CheckNotAtStart(jit::Label* on_not_at_start);
+ void CheckNotBackReference(int start_reg, jit::Label* on_no_match);
+ void CheckNotBackReferenceIgnoreCase(int start_reg, jit::Label* on_no_match, bool unicode);
void CheckNotCharacter(unsigned c, jit::Label* on_not_equal);
void CheckNotCharacterAfterAnd(unsigned c, unsigned and_with, jit::Label* on_not_equal);
void CheckNotCharacterAfterMinusAnd(char16_t c, char16_t minus, char16_t and_with,
diff --git a/js/src/irregexp/RegExpParser.cpp b/js/src/irregexp/RegExpParser.cpp
index 1ad044e8e2..d4308d0d84 100644
--- a/js/src/irregexp/RegExpParser.cpp
+++ b/js/src/irregexp/RegExpParser.cpp
@@ -227,7 +227,6 @@ RegExpParser<CharT>::RegExpParser(frontend::TokenStream& ts, LifoAlloc* alloc,
alloc(alloc),
captures_(nullptr),
next_pos_(chars),
- captures_started_(0),
end_(end),
current_(kEndMarker),
capture_count_(0),
@@ -420,8 +419,7 @@ RangeAtom(LifoAlloc* alloc, char16_t from, char16_t to)
static inline RegExpTree*
NegativeLookahead(LifoAlloc* alloc, char16_t from, char16_t to)
{
- return alloc->newInfallible<RegExpLookaround>(RangeAtom(alloc, from, to), false,
- 0, 0, RegExpLookaround::LOOKAHEAD);
+ return alloc->newInfallible<RegExpLookahead>(RangeAtom(alloc, from, to), false, 0, 0);
}
static bool
@@ -1216,38 +1214,6 @@ RegExpParser<CharT>::ParseBackReferenceIndex(int* index_out)
return true;
}
-template <typename CharT>
-RegExpCapture*
-RegExpParser<CharT>::GetCapture(int index) {
- // The index for the capture groups are one-based. Its index in the list is
- // zero-based.
- int known_captures =
- is_scanned_for_captures_ ? capture_count_ : captures_started_;
- MOZ_ASSERT(index <= known_captures);
- if (captures_ == NULL) {
- captures_ = alloc->newInfallible<RegExpCaptureVector>(*alloc);
- }
- while ((int)captures_->length() < known_captures) {
- RegExpCapture* capture = alloc->newInfallible<RegExpCapture>(nullptr, captures_->length() + 1);
- captures_->append(capture);
- }
- return (*captures_)[index - 1];
-}
-
-
-template <typename CharT>
-bool
-RegExpParser<CharT>::RegExpParserState::IsInsideCaptureGroup(int index) {
- for (RegExpParserState* s = this; s != NULL; s = s->previous_state()) {
- if (s->group_type() != CAPTURE) continue;
- // Return true if we found the matching capture index.
- if (index == s->capture_index()) return true;
- // Abort if index is larger than what has been parsed up till this state.
- if (index > s->capture_index()) return false;
- }
- return false;
-}
-
// QuantifierPrefix ::
// { DecimalDigits }
// { DecimalDigits , }
@@ -1490,24 +1456,24 @@ RegExpTree*
RegExpParser<CharT>::ParseDisjunction()
{
// Used to store current state while parsing subexpressions.
- RegExpParserState initial_state(alloc, nullptr, INITIAL, RegExpLookaround::LOOKAHEAD, 0);
- RegExpParserState* state = &initial_state;
+ RegExpParserState initial_state(alloc, nullptr, INITIAL, 0);
+ RegExpParserState* stored_state = &initial_state;
// Cache the builder in a local variable for quick access.
RegExpBuilder* builder = initial_state.builder();
while (true) {
switch (current()) {
case kEndMarker:
- if (state->IsSubexpression()) {
+ if (stored_state->IsSubexpression()) {
// Inside a parenthesized group when hitting end of input.
return ReportError(JSMSG_MISSING_PAREN);
}
- MOZ_ASSERT(INITIAL == state->group_type());
+ MOZ_ASSERT(INITIAL == stored_state->group_type());
// Parsing completed successfully.
return builder->ToRegExp();
case ')': {
- if (!state->IsSubexpression())
+ if (!stored_state->IsSubexpression())
return ReportError(JSMSG_UNMATCHED_RIGHT_PAREN);
- MOZ_ASSERT(INITIAL != state->group_type());
+ MOZ_ASSERT(INITIAL != stored_state->group_type());
Advance();
// End disjunction parsing and convert builder content to new single
@@ -1516,30 +1482,29 @@ RegExpParser<CharT>::ParseDisjunction()
int end_capture_index = captures_started();
- int capture_index = state->capture_index();
- SubexpressionType group_type = state->group_type();
+ int capture_index = stored_state->capture_index();
+ SubexpressionType group_type = stored_state->group_type();
+
+ // Restore previous state.
+ stored_state = stored_state->previous_state();
+ builder = stored_state->builder();
// Build result of subexpression.
if (group_type == CAPTURE) {
- RegExpCapture* capture = GetCapture(capture_index);
- capture->set_body(body);
+ RegExpCapture* capture = alloc->newInfallible<RegExpCapture>(body, capture_index);
+ (*captures_)[capture_index - 1] = capture;
body = capture;
} else if (group_type != GROUPING) {
- MOZ_ASSERT(group_type == POSITIVE_LOOKAROUND ||
- group_type == NEGATIVE_LOOKAROUND);
- bool is_positive = (group_type == POSITIVE_LOOKAROUND);
- body = alloc->newInfallible<RegExpLookaround>(body,
+ MOZ_ASSERT(group_type == POSITIVE_LOOKAHEAD ||
+ group_type == NEGATIVE_LOOKAHEAD);
+ bool is_positive = (group_type == POSITIVE_LOOKAHEAD);
+ body = alloc->newInfallible<RegExpLookahead>(body,
is_positive,
end_capture_index - capture_index,
- capture_index,
- state->lookaround_type());
+ capture_index);
}
-
- // Restore previous state.
- state = state->previous_state();
- builder = state->builder();
builder->AddAtom(body);
- if (unicode_ && (group_type == POSITIVE_LOOKAROUND || group_type == NEGATIVE_LOOKAROUND))
+ if (unicode_ && (group_type == POSITIVE_LOOKAHEAD || group_type == NEGATIVE_LOOKAHEAD))
continue;
// For compatability with JSC and ES3, we allow quantifiers after
// lookaheads, and break in all cases.
@@ -1599,7 +1564,6 @@ RegExpParser<CharT>::ParseDisjunction()
}
case '(': {
SubexpressionType subexpr_type = CAPTURE;
- RegExpLookaround::Type lookaround_type = state->lookaround_type();
Advance();
if (current() == '?') {
switch (Next()) {
@@ -1607,39 +1571,26 @@ RegExpParser<CharT>::ParseDisjunction()
subexpr_type = GROUPING;
break;
case '=':
- lookaround_type = RegExpLookaround::LOOKAHEAD;
- subexpr_type = POSITIVE_LOOKAROUND;
+ subexpr_type = POSITIVE_LOOKAHEAD;
break;
case '!':
- lookaround_type = RegExpLookaround::LOOKAHEAD;
- subexpr_type = NEGATIVE_LOOKAROUND;
+ subexpr_type = NEGATIVE_LOOKAHEAD;
break;
- case '<':
- Advance();
- lookaround_type = RegExpLookaround::LOOKBEHIND;
- if (Next() == '=') {
- subexpr_type = POSITIVE_LOOKAROUND;
- break;
- } else if (Next() == '!') {
- subexpr_type = NEGATIVE_LOOKAROUND;
- break;
- }
- // We didn't get a positive or negative after '<'.
- // That's an error.
- return ReportError(JSMSG_INVALID_GROUP);
default:
return ReportError(JSMSG_INVALID_GROUP);
}
Advance(2);
} else {
+ if (captures_ == nullptr)
+ captures_ = alloc->newInfallible<RegExpCaptureVector>(*alloc);
if (captures_started() >= kMaxCaptures)
return ReportError(JSMSG_TOO_MANY_PARENS);
- captures_started_++;
+ captures_->append((RegExpCapture*) nullptr);
}
// Store current state and begin new disjunction parsing.
- state = alloc->newInfallible<RegExpParserState>(alloc, state, subexpr_type,
- lookaround_type, captures_started_);
- builder = state->builder();
+ stored_state = alloc->newInfallible<RegExpParserState>(alloc, stored_state, subexpr_type,
+ captures_started());
+ builder = stored_state->builder();
continue;
}
case '[': {
@@ -1694,18 +1645,19 @@ RegExpParser<CharT>::ParseDisjunction()
case '7': case '8': case '9': {
int index = 0;
if (ParseBackReferenceIndex(&index)) {
- if (state->IsInsideCaptureGroup(index)) {
- // The backreference is inside the capture group it refers to.
- // Nothing can possibly have been captured yet.
- builder->AddEmpty();
- } else {
- RegExpCapture* capture = GetCapture(index);
- RegExpTree* atom = alloc->newInfallible<RegExpBackReference>(capture);
- if (unicode_)
- builder->AddAtom(UnicodeBackReferenceAtom(alloc, atom));
- else
- builder->AddAtom(atom);
+ RegExpCapture* capture = nullptr;
+ if (captures_ != nullptr && index <= (int) captures_->length()) {
+ capture = (*captures_)[index - 1];
+ }
+ if (capture == nullptr) {
+ builder->AddEmpty();
+ break;
}
+ RegExpTree* atom = alloc->newInfallible<RegExpBackReference>(capture);
+ if (unicode_)
+ builder->AddAtom(UnicodeBackReferenceAtom(alloc, atom));
+ else
+ builder->AddAtom(atom);
break;
}
if (unicode_)
diff --git a/js/src/irregexp/RegExpParser.h b/js/src/irregexp/RegExpParser.h
index ee57f04365..a58800a910 100644
--- a/js/src/irregexp/RegExpParser.h
+++ b/js/src/irregexp/RegExpParser.h
@@ -229,7 +229,7 @@ class RegExpParser
bool simple() { return simple_; }
bool contains_anchor() { return contains_anchor_; }
void set_contains_anchor() { contains_anchor_ = true; }
- int captures_started() { return captures_started_; }
+ int captures_started() { return captures_ == nullptr ? 0 : captures_->length(); }
const CharT* position() { return next_pos_ - 1; }
static const int kMaxCaptures = 1 << 16;
@@ -239,8 +239,8 @@ class RegExpParser
enum SubexpressionType {
INITIAL,
CAPTURE, // All positive values represent captures.
- POSITIVE_LOOKAROUND,
- NEGATIVE_LOOKAROUND,
+ POSITIVE_LOOKAHEAD,
+ NEGATIVE_LOOKAHEAD,
GROUPING
};
@@ -249,12 +249,10 @@ class RegExpParser
RegExpParserState(LifoAlloc* alloc,
RegExpParserState* previous_state,
SubexpressionType group_type,
- RegExpLookaround::Type lookaround_type,
int disjunction_capture_index)
: previous_state_(previous_state),
builder_(alloc->newInfallible<RegExpBuilder>(alloc)),
group_type_(group_type),
- lookaround_type_(lookaround_type),
disjunction_capture_index_(disjunction_capture_index)
{}
// Parser state of containing expression, if any.
@@ -264,16 +262,11 @@ class RegExpParser
RegExpBuilder* builder() { return builder_; }
// Type of regexp being parsed (parenthesized group or entire regexp).
SubexpressionType group_type() { return group_type_; }
- // Lookahead or Lookbehind.
- RegExpLookaround::Type lookaround_type() { return lookaround_type_; }
// Index in captures array of first capture in this sub-expression, if any.
// Also the capture index of this sub-expression itself, if group_type
// is CAPTURE.
int capture_index() { return disjunction_capture_index_; }
- // Check whether the parser is inside a capture group with the given index.
- bool IsInsideCaptureGroup(int index);
-
private:
// Linked list implementation of stack of states.
RegExpParserState* previous_state_;
@@ -281,15 +274,10 @@ class RegExpParser
RegExpBuilder* builder_;
// Stored disjunction type (capture, look-ahead or grouping), if any.
SubexpressionType group_type_;
- // Stored read direction.
- RegExpLookaround::Type lookaround_type_;
// Stored disjunction's capture index (if any).
int disjunction_capture_index_;
};
- // Return the 1-indexed RegExpCapture object, allocate if necessary.
- RegExpCapture* GetCapture(int index);
-
widechar current() { return current_; }
bool has_more() { return has_more_; }
bool has_next() { return next_pos_ < end_; }
@@ -306,7 +294,6 @@ class RegExpParser
const CharT* next_pos_;
const CharT* end_;
widechar current_;
- int captures_started_;
// The capture count is only valid after we have scanned for captures.
int capture_count_;
bool has_more_;