summaryrefslogtreecommitdiff
path: root/js/src
diff options
context:
space:
mode:
authorMoonchild <moonchild@palemoon.org>2022-10-11 10:59:43 +0000
committerMoonchild <moonchild@palemoon.org>2022-10-11 10:59:43 +0000
commitd10e8a5b06b6e27b3ee3f8505e6d269ddc4bd1cb (patch)
tree129483c1d6f98a86df8645dd6624edf6aae419d5 /js/src
parentfb173db64bdeeca0b236a360ed79b36ad9dd2273 (diff)
downloaduxp-d10e8a5b06b6e27b3ee3f8505e6d269ddc4bd1cb.tar.gz
Issue #1279 - Implement regular expressions lookbehind (v2)
Because of some additional changes in the JS engine in the meantime, this implementation should work and not crash due to slot allocation.
Diffstat (limited to 'js/src')
-rw-r--r--js/src/builtin/TestingFunctions.cpp6
-rw-r--r--js/src/irregexp/NativeRegExpMacroAssembler.cpp8
-rw-r--r--js/src/irregexp/NativeRegExpMacroAssembler.h7
-rw-r--r--js/src/irregexp/RegExpAST.cpp8
-rw-r--r--js/src/irregexp/RegExpAST.h33
-rw-r--r--js/src/irregexp/RegExpBytecode.h23
-rw-r--r--js/src/irregexp/RegExpEngine.cpp151
-rw-r--r--js/src/irregexp/RegExpEngine.h31
-rw-r--r--js/src/irregexp/RegExpInterpreter.cpp74
-rw-r--r--js/src/irregexp/RegExpMacroAssembler.cpp17
-rw-r--r--js/src/irregexp/RegExpMacroAssembler.h15
-rw-r--r--js/src/irregexp/RegExpParser.cpp130
-rw-r--r--js/src/irregexp/RegExpParser.h19
13 files changed, 362 insertions, 160 deletions
diff --git a/js/src/builtin/TestingFunctions.cpp b/js/src/builtin/TestingFunctions.cpp
index 8bcae4d826..cb691893f2 100644
--- a/js/src/builtin/TestingFunctions.cpp
+++ b/js/src/builtin/TestingFunctions.cpp
@@ -3827,10 +3827,10 @@ ConvertRegExpTreeToObject(JSContext* cx, irregexp::RegExpTree* tree)
return nullptr;
return obj;
}
- if (tree->IsLookahead()) {
- if (!StringProp(cx, obj, "type", "Lookahead"))
+ if (tree->IsLookaround()) {
+ if (!StringProp(cx, obj, "type", "Lookaround"))
return nullptr;
- irregexp::RegExpLookahead* t = tree->AsLookahead();
+ irregexp::RegExpLookaround* t = tree->AsLookaround();
if (!BooleanProp(cx, obj, "is_positive", t->is_positive()))
return nullptr;
if (!TreeProp(cx, obj, "body", t->body()))
diff --git a/js/src/irregexp/NativeRegExpMacroAssembler.cpp b/js/src/irregexp/NativeRegExpMacroAssembler.cpp
index a3756f5fff..2efbf1bf76 100644
--- a/js/src/irregexp/NativeRegExpMacroAssembler.cpp
+++ b/js/src/irregexp/NativeRegExpMacroAssembler.cpp
@@ -568,7 +568,7 @@ NativeRegExpMacroAssembler::CheckAtStart(Label* on_at_start)
}
void
-NativeRegExpMacroAssembler::CheckNotAtStart(Label* on_not_at_start)
+NativeRegExpMacroAssembler::CheckNotAtStart(int cp_offset, Label* on_not_at_start)
{
JitSpew(SPEW_PREFIX "CheckNotAtStart");
@@ -659,7 +659,7 @@ NativeRegExpMacroAssembler::CheckGreedyLoop(Label* on_tos_equals_current_positio
}
void
-NativeRegExpMacroAssembler::CheckNotBackReference(int start_reg, Label* on_no_match)
+NativeRegExpMacroAssembler::CheckNotBackReference(int start_reg, bool read_backward, Label* on_no_match)
{
JitSpew(SPEW_PREFIX "CheckNotBackReference(%d)", start_reg);
@@ -730,8 +730,8 @@ NativeRegExpMacroAssembler::CheckNotBackReference(int start_reg, Label* on_no_ma
}
void
-NativeRegExpMacroAssembler::CheckNotBackReferenceIgnoreCase(int start_reg, Label* on_no_match,
- bool unicode)
+NativeRegExpMacroAssembler::CheckNotBackReferenceIgnoreCase(int start_reg, bool read_backward,
+ Label* on_no_match, bool unicode)
{
JitSpew(SPEW_PREFIX "CheckNotBackReferenceIgnoreCase(%d, %d)", start_reg, unicode);
diff --git a/js/src/irregexp/NativeRegExpMacroAssembler.h b/js/src/irregexp/NativeRegExpMacroAssembler.h
index 6bb14ab662..3a04a33cf2 100644
--- a/js/src/irregexp/NativeRegExpMacroAssembler.h
+++ b/js/src/irregexp/NativeRegExpMacroAssembler.h
@@ -104,9 +104,10 @@ class MOZ_STACK_CLASS NativeRegExpMacroAssembler final : public RegExpMacroAssem
void CheckCharacterGT(char16_t limit, jit::Label* on_greater);
void CheckCharacterLT(char16_t limit, jit::Label* on_less);
void CheckGreedyLoop(jit::Label* on_tos_equals_current_position);
- void CheckNotAtStart(jit::Label* on_not_at_start);
- void CheckNotBackReference(int start_reg, jit::Label* on_no_match);
- void CheckNotBackReferenceIgnoreCase(int start_reg, jit::Label* on_no_match, bool unicode);
+ void CheckNotAtStart(int cp_offset, jit::Label* on_not_at_start);
+ void CheckNotBackReference(int start_reg, bool read_backward, jit::Label* on_no_match);
+ void CheckNotBackReferenceIgnoreCase(int start_reg, bool read_backward,
+ jit::Label* on_no_match, bool unicode);
void CheckNotCharacter(unsigned c, jit::Label* on_not_equal);
void CheckNotCharacterAfterAnd(unsigned c, unsigned and_with, jit::Label* on_not_equal);
void CheckNotCharacterAfterMinusAnd(char16_t c, char16_t minus, char16_t and_with,
diff --git a/js/src/irregexp/RegExpAST.cpp b/js/src/irregexp/RegExpAST.cpp
index 14dfe8cea5..dc8d3b4c2c 100644
--- a/js/src/irregexp/RegExpAST.cpp
+++ b/js/src/irregexp/RegExpAST.cpp
@@ -249,16 +249,16 @@ RegExpCapture::CaptureRegisters()
}
// ----------------------------------------------------------------------------
-// RegExpLookahead
+// RegExpLookaround
Interval
-RegExpLookahead::CaptureRegisters()
+RegExpLookaround::CaptureRegisters()
{
return body()->CaptureRegisters();
}
bool
-RegExpLookahead::IsAnchoredAtStart()
+RegExpLookaround::IsAnchoredAtStart()
{
- return is_positive() && body()->IsAnchoredAtStart();
+ return is_positive() && type() == LOOKAHEAD && body()->IsAnchoredAtStart();
}
diff --git a/js/src/irregexp/RegExpAST.h b/js/src/irregexp/RegExpAST.h
index bff4ee81dd..2cdf71aad8 100644
--- a/js/src/irregexp/RegExpAST.h
+++ b/js/src/irregexp/RegExpAST.h
@@ -359,6 +359,7 @@ class RegExpCapture : public RegExpTree
virtual int min_match() { return body_->min_match(); }
virtual int max_match() { return body_->max_match(); }
RegExpTree* body() { return body_; }
+ void set_body(RegExpTree* body) { body_ = body; }
int index() { return index_; }
static int StartRegister(int index) { return index * 2; }
static int EndRegister(int index) { return index * 2 + 1; }
@@ -368,25 +369,29 @@ class RegExpCapture : public RegExpTree
int index_;
};
-class RegExpLookahead : public RegExpTree
+class RegExpLookaround : public RegExpTree
{
public:
- RegExpLookahead(RegExpTree* body,
- bool is_positive,
- int capture_count,
- int capture_from)
+ enum Type { LOOKAHEAD, LOOKBEHIND };
+
+ RegExpLookaround(RegExpTree* body,
+ bool is_positive,
+ int capture_count,
+ int capture_from,
+ Type type)
: body_(body),
is_positive_(is_positive),
capture_count_(capture_count),
- capture_from_(capture_from)
+ capture_from_(capture_from),
+ type_(type)
{}
virtual void* Accept(RegExpVisitor* visitor, void* data);
virtual RegExpNode* ToNode(RegExpCompiler* compiler,
RegExpNode* on_success);
- virtual RegExpLookahead* AsLookahead();
+ virtual RegExpLookaround* AsLookaround();
virtual Interval CaptureRegisters();
- virtual bool IsLookahead();
+ virtual bool IsLookaround();
virtual bool IsAnchoredAtStart();
virtual int min_match() { return 0; }
virtual int max_match() { return 0; }
@@ -394,12 +399,14 @@ class RegExpLookahead : public RegExpTree
bool is_positive() { return is_positive_; }
int capture_count() { return capture_count_; }
int capture_from() { return capture_from_; }
+ Type type() { return type_; }
private:
RegExpTree* body_;
bool is_positive_;
int capture_count_;
int capture_from_;
+ Type type_;
};
typedef InfallibleVector<RegExpCapture*, 1> RegExpCaptureVector;
@@ -416,8 +423,14 @@ class RegExpBackReference : public RegExpTree
RegExpNode* on_success);
virtual RegExpBackReference* AsBackReference();
virtual bool IsBackReference();
- virtual int min_match() { return 0; }
- virtual int max_match() { return capture_->max_match(); }
+ virtual int min_match() override { return 0; }
+ // The capture may not be completely parsed yet, if the reference occurs
+ // before the capture. In the ordinary case, nothing has been captured yet,
+ // so the back reference must have the length 0. If the back reference is
+ // inside a lookbehind, effectively making it a forward reference, we return
+ virtual int max_match() override {
+ return capture_->body() ? capture_->max_match() : 0;
+ }
int index() { return capture_->index(); }
RegExpCapture* capture() { return capture_; }
private:
diff --git a/js/src/irregexp/RegExpBytecode.h b/js/src/irregexp/RegExpBytecode.h
index 7454f88f73..42326b3d47 100644
--- a/js/src/irregexp/RegExpBytecode.h
+++ b/js/src/irregexp/RegExpBytecode.h
@@ -81,16 +81,19 @@ V(CHECK_LT, 35, 8) /* bc8 pad8 uc16 addr32 */ \
V(CHECK_GT, 36, 8) /* bc8 pad8 uc16 addr32 */ \
V(CHECK_NOT_BACK_REF, 37, 8) /* bc8 reg_idx24 addr32 */ \
V(CHECK_NOT_BACK_REF_NO_CASE, 38, 8) /* bc8 reg_idx24 addr32 */ \
-V(CHECK_NOT_REGS_EQUAL, 39, 12) /* bc8 regidx24 reg_idx32 addr32 */ \
-V(CHECK_REGISTER_LT, 40, 12) /* bc8 reg_idx24 value32 addr32 */ \
-V(CHECK_REGISTER_GE, 41, 12) /* bc8 reg_idx24 value32 addr32 */ \
-V(CHECK_REGISTER_EQ_POS, 42, 8) /* bc8 reg_idx24 addr32 */ \
-V(CHECK_AT_START, 43, 8) /* bc8 pad24 addr32 */ \
-V(CHECK_NOT_AT_START, 44, 8) /* bc8 pad24 addr32 */ \
-V(CHECK_GREEDY, 45, 8) /* bc8 pad24 addr32 */ \
-V(ADVANCE_CP_AND_GOTO, 46, 8) /* bc8 offset24 addr32 */ \
-V(SET_CURRENT_POSITION_FROM_END, 47, 4) /* bc8 idx24 */ \
-V(CHECK_NOT_BACK_REF_NO_CASE_UNICODE, 48, 8) /* bc8 reg_idx24 addr32 */
+V(CHECK_NOT_BACK_REF_BACKWARD, 39, 8) /* bc8 reg_idx24 addr32 */ \
+V(CHECK_NOT_BACK_REF_NO_CASE_BACKWARD, 40, 8) /* bc8 reg_idx24 addr32 */ \
+V(CHECK_NOT_REGS_EQUAL, 41, 12) /* bc8 regidx24 reg_idx32 addr32 */ \
+V(CHECK_REGISTER_LT, 42, 12) /* bc8 reg_idx24 value32 addr32 */ \
+V(CHECK_REGISTER_GE, 43, 12) /* bc8 reg_idx24 value32 addr32 */ \
+V(CHECK_REGISTER_EQ_POS, 44, 8) /* bc8 reg_idx24 addr32 */ \
+V(CHECK_AT_START, 45, 8) /* bc8 pad24 addr32 */ \
+V(CHECK_NOT_AT_START, 46, 8) /* bc8 pad24 addr32 */ \
+V(CHECK_GREEDY, 47, 8) /* bc8 pad24 addr32 */ \
+V(ADVANCE_CP_AND_GOTO, 48, 8) /* bc8 offset24 addr32 */ \
+V(SET_CURRENT_POSITION_FROM_END, 49, 4) /* bc8 idx24 */ \
+V(CHECK_NOT_BACK_REF_NO_CASE_UNICODE, 50, 8) /* bc8 reg_idx24 addr32 */ \
+V(CHECK_NOT_BACK_REF_NO_CASE_BACKWARD_UNICODE, 51, 8) /* bc8 reg_idx24 addr32 */
#define DECLARE_BYTECODES(name, code, length) \
static const int BC_##name = code;
diff --git a/js/src/irregexp/RegExpEngine.cpp b/js/src/irregexp/RegExpEngine.cpp
index 07679e21b8..4ed61fd364 100644
--- a/js/src/irregexp/RegExpEngine.cpp
+++ b/js/src/irregexp/RegExpEngine.cpp
@@ -720,6 +720,8 @@ ActionNode::EmptyMatchCheck(int start_register,
int
TextNode::EatsAtLeast(int still_to_find, int budget, bool not_at_start)
{
+ if (read_backward())
+ return 0;
int answer = Length();
if (answer >= still_to_find)
return answer;
@@ -735,8 +737,7 @@ TextNode::EatsAtLeast(int still_to_find, int budget, bool not_at_start)
int
TextNode::GreedyLoopTextLength()
{
- TextElement elm = elements()[elements().length() - 1];
- return elm.cp_offset() + elm.length();
+ return Length();
}
RegExpNode*
@@ -886,6 +887,8 @@ AssertionNode::FillInBMInfo(int offset, int budget, BoyerMooreLookahead* bm, boo
int
BackReferenceNode::EatsAtLeast(int still_to_find, int budget, bool not_at_start)
{
+ if (read_backward())
+ return 0;
if (budget <= 0)
return 0;
return on_success()->EatsAtLeast(still_to_find, budget - 1, not_at_start);
@@ -1577,6 +1580,9 @@ class irregexp::RegExpCompiler
current_expansion_factor_ = value;
}
+ bool read_backward() { return read_backward_; }
+ void set_read_backward(bool value) { read_backward_ = value; }
+
JSContext* cx() const { return cx_; }
LifoAlloc* alloc() const { return alloc_; }
@@ -1594,6 +1600,7 @@ class irregexp::RegExpCompiler
bool unicode_;
bool reg_exp_too_big_;
int current_expansion_factor_;
+ bool read_backward_;
FrequencyCollator frequency_collator_;
JSContext* cx_;
LifoAlloc* alloc_;
@@ -1623,6 +1630,7 @@ RegExpCompiler::RegExpCompiler(JSContext* cx, LifoAlloc* alloc, int capture_coun
unicode_(unicode),
reg_exp_too_big_(false),
current_expansion_factor_(1),
+ read_backward_(false),
frequency_collator_(),
cx_(cx),
alloc_(alloc)
@@ -1746,7 +1754,7 @@ irregexp::CompilePattern(JSContext* cx, RegExpShared* shared, RegExpCompileData*
// at the start of input.
ChoiceNode* first_step_node = alloc.newInfallible<ChoiceNode>(&alloc, 2);
RegExpNode* char_class =
- alloc.newInfallible<TextNode>(alloc.newInfallible<RegExpCharacterClass>('*'), loop_node);
+ alloc.newInfallible<TextNode>(alloc.newInfallible<RegExpCharacterClass>('*'), false, loop_node);
first_step_node->AddAlternative(GuardedAlternative(captured_body));
first_step_node->AddAlternative(GuardedAlternative(char_class));
node = first_step_node;
@@ -1849,19 +1857,19 @@ RegExpAtom::ToNode(RegExpCompiler* compiler, RegExpNode* on_success)
TextElementVector* elms =
compiler->alloc()->newInfallible<TextElementVector>(*compiler->alloc());
elms->append(TextElement::Atom(this));
- return compiler->alloc()->newInfallible<TextNode>(elms, on_success);
+ return compiler->alloc()->newInfallible<TextNode>(elms, compiler->read_backward(), on_success);
}
RegExpNode*
RegExpText::ToNode(RegExpCompiler* compiler, RegExpNode* on_success)
{
- return compiler->alloc()->newInfallible<TextNode>(&elements_, on_success);
+ return compiler->alloc()->newInfallible<TextNode>(&elements_, compiler->read_backward(), on_success);
}
RegExpNode*
RegExpCharacterClass::ToNode(RegExpCompiler* compiler, RegExpNode* on_success)
{
- return compiler->alloc()->newInfallible<TextNode>(this, on_success);
+ return compiler->alloc()->newInfallible<TextNode>(this, compiler->read_backward(), on_success);
}
RegExpNode*
@@ -2002,7 +2010,8 @@ RegExpQuantifier::ToNode(int min,
alternation->AddAlternative(GuardedAlternative(body->ToNode(compiler, answer)));
}
answer = alternation;
- if (not_at_start) alternation->set_not_at_start();
+ if (not_at_start && !compiler->read_backward())
+ alternation->set_not_at_start();
}
return answer;
}
@@ -2014,8 +2023,9 @@ RegExpQuantifier::ToNode(int min,
int reg_ctr = needs_counter
? compiler->AllocateRegister()
: RegExpCompiler::kNoRegister;
- LoopChoiceNode* center = alloc->newInfallible<LoopChoiceNode>(alloc, body->min_match() == 0);
- if (not_at_start)
+ LoopChoiceNode* center = alloc->newInfallible<LoopChoiceNode>(alloc, body->min_match() == 0,
+ compiler->read_backward());
+ if (not_at_start && !compiler->read_backward())
center->set_not_at_start();
RegExpNode* loop_return = needs_counter
? static_cast<RegExpNode*>(ActionNode::IncrementRegister(reg_ctr, center))
@@ -2091,7 +2101,7 @@ RegExpAssertion::ToNode(RegExpCompiler* compiler,
CharacterRange::AddClassEscape(alloc, 'n', newline_ranges);
RegExpCharacterClass* newline_atom = alloc->newInfallible<RegExpCharacterClass>('n');
TextNode* newline_matcher =
- alloc->newInfallible<TextNode>(newline_atom,
+ alloc->newInfallible<TextNode>(newline_atom, false,
ActionNode::PositiveSubmatchSuccess(stack_pointer_register,
position_register,
0, // No captures inside.
@@ -2123,6 +2133,7 @@ RegExpBackReference::ToNode(RegExpCompiler* compiler, RegExpNode* on_success)
{
return compiler->alloc()->newInfallible<BackReferenceNode>(RegExpCapture::StartRegister(index()),
RegExpCapture::EndRegister(index()),
+ compiler->read_backward(),
on_success);
}
@@ -2133,7 +2144,7 @@ RegExpEmpty::ToNode(RegExpCompiler* compiler, RegExpNode* on_success)
}
RegExpNode*
-RegExpLookahead::ToNode(RegExpCompiler* compiler, RegExpNode* on_success)
+RegExpLookaround::ToNode(RegExpCompiler* compiler, RegExpNode* on_success)
{
int stack_pointer_register = compiler->AllocateRegister();
int position_register = compiler->AllocateRegister();
@@ -2144,6 +2155,10 @@ RegExpLookahead::ToNode(RegExpCompiler* compiler, RegExpNode* on_success)
int register_start =
register_of_first_capture + capture_from_ * registers_per_capture;
+ RegExpNode* result;
+ bool was_reading_backward = compiler->read_backward();
+ compiler->set_read_backward(type() == LOOKBEHIND);
+
if (is_positive()) {
RegExpNode* bodyNode =
body()->ToNode(compiler,
@@ -2152,37 +2167,39 @@ RegExpLookahead::ToNode(RegExpCompiler* compiler, RegExpNode* on_success)
register_count,
register_start,
on_success));
- return ActionNode::BeginSubmatch(stack_pointer_register,
+ result = ActionNode::BeginSubmatch(stack_pointer_register,
+ position_register,
+ bodyNode);
+ } else {
+ // We use a ChoiceNode for a negative lookahead because it has most of
+ // the characteristics we need. It has the body of the lookahead as its
+ // first alternative and the expression after the lookahead of the second
+ // alternative. If the first alternative succeeds then the
+ // NegativeSubmatchSuccess will unwind the stack including everything the
+ // choice node set up and backtrack. If the first alternative fails then
+ // the second alternative is tried, which is exactly the desired result
+ // for a negative lookahead. The NegativeLookaheadChoiceNode is a special
+ // ChoiceNode that knows to ignore the first exit when calculating quick
+ // checks.
+ LifoAlloc* alloc = compiler->alloc();
+
+ RegExpNode* success =
+ alloc->newInfallible<NegativeSubmatchSuccess>(alloc,
+ stack_pointer_register,
+ position_register,
+ register_count,
+ register_start);
+ GuardedAlternative body_alt(body()->ToNode(compiler, success));
+
+ ChoiceNode* choice_node =
+ alloc->newInfallible<NegativeLookaheadChoiceNode>(alloc, body_alt, GuardedAlternative(on_success));
+
+ result = ActionNode::BeginSubmatch(stack_pointer_register,
position_register,
- bodyNode);
- }
-
- // We use a ChoiceNode for a negative lookahead because it has most of
- // the characteristics we need. It has the body of the lookahead as its
- // first alternative and the expression after the lookahead of the second
- // alternative. If the first alternative succeeds then the
- // NegativeSubmatchSuccess will unwind the stack including everything the
- // choice node set up and backtrack. If the first alternative fails then
- // the second alternative is tried, which is exactly the desired result
- // for a negative lookahead. The NegativeLookaheadChoiceNode is a special
- // ChoiceNode that knows to ignore the first exit when calculating quick
- // checks.
- LifoAlloc* alloc = compiler->alloc();
-
- RegExpNode* success =
- alloc->newInfallible<NegativeSubmatchSuccess>(alloc,
- stack_pointer_register,
- position_register,
- register_count,
- register_start);
- GuardedAlternative body_alt(body()->ToNode(compiler, success));
-
- ChoiceNode* choice_node =
- alloc->newInfallible<NegativeLookaheadChoiceNode>(alloc, body_alt, GuardedAlternative(on_success));
-
- return ActionNode::BeginSubmatch(stack_pointer_register,
- position_register,
- choice_node);
+ choice_node);
+ }
+ compiler->set_read_backward(was_reading_backward);
+ return result;
}
RegExpNode*
@@ -2197,8 +2214,14 @@ RegExpCapture::ToNode(RegExpTree* body,
RegExpCompiler* compiler,
RegExpNode* on_success)
{
+ MOZ_ASSERT(body);
int start_reg = RegExpCapture::StartRegister(index);
int end_reg = RegExpCapture::EndRegister(index);
+ if (compiler->read_backward()) {
+ // std::swap(start_reg, end_reg);
+ start_reg = RegExpCapture::EndRegister(index);
+ end_reg = RegExpCapture::StartRegister(index);
+ }
RegExpNode* store_end = ActionNode::StorePosition(end_reg, true, on_success);
RegExpNode* body_node = body->ToNode(compiler, store_end);
return ActionNode::StorePosition(start_reg, true, body_node);
@@ -2209,8 +2232,15 @@ RegExpAlternative::ToNode(RegExpCompiler* compiler, RegExpNode* on_success)
{
const RegExpTreeVector& children = nodes();
RegExpNode* current = on_success;
- for (int i = children.length() - 1; i >= 0; i--)
- current = children[i]->ToNode(compiler, current);
+ if (compiler->read_backward()) {
+ for (int i = 0; i < children.length(); i++) {
+ current = children[i]->ToNode(compiler, current);
+ }
+ } else {
+ for (int i = children.length() - 1; i >= 0; i--) {
+ current = children[i]->ToNode(compiler, current);
+ }
+ }
return current;
}
@@ -2763,7 +2793,6 @@ Trace::InvalidateCurrentCharacter()
void
Trace::AdvanceCurrentPositionInTrace(int by, RegExpCompiler* compiler)
{
- MOZ_ASSERT(by > 0);
// We don't have an instruction for shifting the current character register
// down or for using a shifted value for anything so lets just forget that
// we preloaded any characters into it.
@@ -3108,9 +3137,9 @@ AssertionNode::Emit(RegExpCompiler* compiler, Trace* trace)
return;
}
if (trace->at_start() == Trace::UNKNOWN) {
- assembler->CheckNotAtStart(trace->backtrack());
+ assembler->CheckNotAtStart(trace->cp_offset(), trace->backtrack());
Trace at_start_trace = *trace;
- at_start_trace.set_at_start(true);
+ at_start_trace.set_at_start(Trace::TRUE_VALUE);
on_success()->Emit(compiler, &at_start_trace);
return;
}
@@ -3813,9 +3842,10 @@ TextNode::TextEmitPass(RegExpCompiler* compiler,
jit::Label* backtrack = trace->backtrack();
QuickCheckDetails* quick_check = trace->quick_check_performed();
int element_count = elements().length();
+ int backward_offset = read_backward() ? -Length() : 0;
for (int i = preloaded ? 0 : element_count - 1; i >= 0; i--) {
TextElement elm = elements()[i];
- int cp_offset = trace->cp_offset() + elm.cp_offset();
+ int cp_offset = trace->cp_offset() + elm.cp_offset() + backward_offset;
if (elm.text_type() == TextElement::ATOM) {
const CharacterVector& quarks = elm.atom()->data();
for (int j = preloaded ? 0 : quarks.length() - 1; j >= 0; j--) {
@@ -3843,11 +3873,12 @@ TextNode::TextEmitPass(RegExpCompiler* compiler,
break;
}
if (emit_function != nullptr) {
+ bool bounds_check = *checked_up_to < cp_offset + j || read_backward();
bool bound_checked = emit_function(compiler,
quarks[j],
backtrack,
cp_offset + j,
- *checked_up_to < cp_offset + j,
+ bounds_check,
preloaded);
if (bound_checked) UpdateBoundsCheck(cp_offset + j, checked_up_to);
}
@@ -3858,13 +3889,14 @@ TextNode::TextEmitPass(RegExpCompiler* compiler,
if (first_element_checked && i == 0) continue;
if (DeterminedAlready(quick_check, elm.cp_offset())) continue;
RegExpCharacterClass* cc = elm.char_class();
+ bool bounds_check = *checked_up_to < cp_offset || read_backward();
EmitCharClass(alloc(),
assembler,
cc,
ascii,
backtrack,
cp_offset,
- *checked_up_to < cp_offset,
+ bounds_check,
preloaded);
UpdateBoundsCheck(cp_offset, checked_up_to);
}
@@ -3944,8 +3976,11 @@ TextNode::Emit(RegExpCompiler* compiler, Trace* trace)
}
Trace successor_trace(*trace);
- successor_trace.set_at_start(false);
- successor_trace.AdvanceCurrentPositionInTrace(Length(), compiler);
+ // If we advance backward, we may end up at the start.
+ successor_trace.AdvanceCurrentPositionInTrace(
+ read_backward() ? -Length() : Length(), compiler);
+ successor_trace.set_at_start(read_backward() ? Trace::UNKNOWN
+ : Trace::FALSE_VALUE);
RecursionCheck rc(compiler);
on_success()->Emit(compiler, &successor_trace);
}
@@ -4117,6 +4152,8 @@ ChoiceNode::CalculatePreloadCharacters(RegExpCompiler* compiler, int eats_at_lea
RegExpNode*
TextNode::GetSuccessorOfOmnivorousTextNode(RegExpCompiler* compiler)
{
+ if (read_backward()) return NULL;
+
if (elements().length() != 1)
return nullptr;
@@ -4164,7 +4201,7 @@ ChoiceNode::GreedyLoopTextLengthForAlternative(GuardedAlternative* alternative)
SeqRegExpNode* seq_node = static_cast<SeqRegExpNode*>(node);
node = seq_node->on_success();
}
- return length;
+ return read_backward() ? -length : length;
}
// Creates a list of AlternativeGenerations. If the list has a reasonable
@@ -4239,7 +4276,7 @@ ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace)
jit::Label greedy_loop_label;
Trace counter_backtrack_trace;
counter_backtrack_trace.set_backtrack(&greedy_loop_label);
- if (not_at_start()) counter_backtrack_trace.set_at_start(false);
+ if (not_at_start()) counter_backtrack_trace.set_at_start(Trace::FALSE_VALUE);
if (choice_count > 1 && text_length != kNodeIsTooComplexForGreedyLoops) {
// Here we have special handling for greedy loops containing only text nodes
@@ -4255,7 +4292,7 @@ ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace)
current_trace = &counter_backtrack_trace;
jit::Label greedy_match_failed;
Trace greedy_match_trace;
- if (not_at_start()) greedy_match_trace.set_at_start(false);
+ if (not_at_start()) greedy_match_trace.set_at_start(Trace::FALSE_VALUE);
greedy_match_trace.set_backtrack(&greedy_match_failed);
jit::Label loop_label;
macro_assembler->Bind(&loop_label);
@@ -4604,11 +4641,14 @@ BackReferenceNode::Emit(RegExpCompiler* compiler, Trace* trace)
MOZ_ASSERT(start_reg_ + 1 == end_reg_);
if (compiler->ignore_case()) {
assembler->CheckNotBackReferenceIgnoreCase(start_reg_,
+ read_backward(),
trace->backtrack(),
compiler->unicode());
} else {
- assembler->CheckNotBackReference(start_reg_, trace->backtrack());
+ assembler->CheckNotBackReference(start_reg_, read_backward(), trace->backtrack());
}
+ // We are going to advance backward, so we may end up at the start.
+ if (read_backward()) trace->set_at_start(Trace::UNKNOWN);
on_success()->Emit(compiler, trace);
}
@@ -4976,7 +5016,6 @@ QuickCheckDetails::Clear()
void
QuickCheckDetails::Advance(int by, bool ascii)
{
- MOZ_ASSERT(by >= 0);
if (by >= characters_) {
Clear();
return;
diff --git a/js/src/irregexp/RegExpEngine.h b/js/src/irregexp/RegExpEngine.h
index e2cabaa026..0d760145f9 100644
--- a/js/src/irregexp/RegExpEngine.h
+++ b/js/src/irregexp/RegExpEngine.h
@@ -118,7 +118,7 @@ InterpretCode(JSContext* cx, const uint8_t* byteCode, const CharT* chars, size_t
VISIT(Atom) \
VISIT(Quantifier) \
VISIT(Capture) \
- VISIT(Lookahead) \
+ VISIT(Lookaround) \
VISIT(BackReference) \
VISIT(Empty) \
VISIT(Text)
@@ -762,15 +762,19 @@ class TextNode : public SeqRegExpNode
{
public:
TextNode(TextElementVector* elements,
+ bool read_backward,
RegExpNode* on_success)
: SeqRegExpNode(on_success),
- elements_(elements)
+ elements_(elements),
+ read_backward_(read_backward)
{}
TextNode(RegExpCharacterClass* that,
+ bool read_backward,
RegExpNode* on_success)
: SeqRegExpNode(on_success),
- elements_(alloc()->newInfallible<TextElementVector>(*alloc()))
+ elements_(alloc()->newInfallible<TextElementVector>(*alloc())),
+ read_backward_(read_backward)
{
elements_->append(TextElement::CharClass(that));
}
@@ -783,6 +787,7 @@ class TextNode : public SeqRegExpNode
int characters_filled_in,
bool not_at_start);
TextElementVector& elements() { return *elements_; }
+ bool read_backward() { return read_backward_; }
void MakeCaseIndependent(bool is_ascii, bool unicode);
virtual int GreedyLoopTextLength();
virtual RegExpNode* GetSuccessorOfOmnivorousTextNode(
@@ -813,6 +818,7 @@ class TextNode : public SeqRegExpNode
int* checked_up_to);
int Length();
TextElementVector* elements_;
+ bool read_backward_;
};
class AssertionNode : public SeqRegExpNode
@@ -881,15 +887,18 @@ class BackReferenceNode : public SeqRegExpNode
public:
BackReferenceNode(int start_reg,
int end_reg,
+ bool read_backward,
RegExpNode* on_success)
: SeqRegExpNode(on_success),
start_reg_(start_reg),
- end_reg_(end_reg)
+ end_reg_(end_reg),
+ read_backward_(read_backward)
{}
virtual void Accept(NodeVisitor* visitor);
int start_register() { return start_reg_; }
int end_register() { return end_reg_; }
+ bool read_backward() { return read_backward_; }
virtual void Emit(RegExpCompiler* compiler, Trace* trace);
virtual int EatsAtLeast(int still_to_find,
int recursion_depth,
@@ -908,6 +917,7 @@ class BackReferenceNode : public SeqRegExpNode
private:
int start_reg_;
int end_reg_;
+ bool read_backward_;
};
class EndNode : public RegExpNode
@@ -1052,6 +1062,7 @@ class ChoiceNode : public RegExpNode
void set_being_calculated(bool b) { being_calculated_ = b; }
virtual bool try_to_emit_quick_check_for_alternative(int i) { return true; }
virtual RegExpNode* FilterASCII(int depth, bool ignore_case, bool unicode);
+ virtual bool read_backward() { return false; }
protected:
int GreedyLoopTextLengthForAlternative(GuardedAlternative* alternative);
@@ -1110,11 +1121,13 @@ class NegativeLookaheadChoiceNode : public ChoiceNode
class LoopChoiceNode : public ChoiceNode
{
public:
- explicit LoopChoiceNode(LifoAlloc* alloc, bool body_can_be_zero_length)
+ explicit LoopChoiceNode(LifoAlloc* alloc, bool body_can_be_zero_length,
+ bool read_backward)
: ChoiceNode(alloc, 2),
loop_node_(nullptr),
continue_node_(nullptr),
- body_can_be_zero_length_(body_can_be_zero_length)
+ body_can_be_zero_length_(body_can_be_zero_length),
+ read_backward_(read_backward)
{}
void AddLoopAlternative(GuardedAlternative alt);
@@ -1132,6 +1145,7 @@ class LoopChoiceNode : public ChoiceNode
RegExpNode* loop_node() { return loop_node_; }
RegExpNode* continue_node() { return continue_node_; }
bool body_can_be_zero_length() { return body_can_be_zero_length_; }
+ virtual bool read_backward() { return read_backward_; }
virtual void Accept(NodeVisitor* visitor);
virtual RegExpNode* FilterASCII(int depth, bool ignore_case, bool unicode);
@@ -1146,6 +1160,7 @@ class LoopChoiceNode : public ChoiceNode
RegExpNode* loop_node_;
RegExpNode* continue_node_;
bool body_can_be_zero_length_;
+ bool read_backward_;
};
// Improve the speed that we scan for an initial point where a non-anchored
@@ -1421,8 +1436,8 @@ class Trace
}
TriBool at_start() { return at_start_; }
- void set_at_start(bool at_start) {
- at_start_ = at_start ? TRUE_VALUE : FALSE_VALUE;
+ void set_at_start(TriBool at_start) {
+ at_start_ = at_start;
}
jit::Label* backtrack() { return backtrack_; }
jit::Label* loop_label() { return loop_label_; }
diff --git a/js/src/irregexp/RegExpInterpreter.cpp b/js/src/irregexp/RegExpInterpreter.cpp
index 5d1f0ea805..eab428faa9 100644
--- a/js/src/irregexp/RegExpInterpreter.cpp
+++ b/js/src/irregexp/RegExpInterpreter.cpp
@@ -221,8 +221,8 @@ irregexp::InterpretCode(JSContext* cx, const uint8_t* byteCode, const CharT* cha
}
break;
BYTECODE(LOAD_CURRENT_CHAR) {
- size_t pos = current + (insn >> BYTECODE_SHIFT);
- if (pos >= length) {
+ int pos = current + (insn >> BYTECODE_SHIFT);
+ if (pos >= (int)length || pos < 0) {
pc = byteCode + Load32Aligned(pc + 4);
} else {
current_char = chars[pos];
@@ -237,8 +237,8 @@ irregexp::InterpretCode(JSContext* cx, const uint8_t* byteCode, const CharT* cha
break;
}
BYTECODE(LOAD_2_CURRENT_CHARS) {
- size_t pos = current + (insn >> BYTECODE_SHIFT);
- if (pos + 2 > length) {
+ int pos = current + (insn >> BYTECODE_SHIFT);
+ if (pos + 2 > (int)length || pos < 0) {
pc = byteCode + Load32Aligned(pc + 4);
} else {
CharT next = chars[pos + 1];
@@ -424,6 +424,30 @@ irregexp::InterpretCode(JSContext* cx, const uint8_t* byteCode, const CharT* cha
pc += BC_CHECK_NOT_BACK_REF_LENGTH;
break;
}
+ BYTECODE(CHECK_NOT_BACK_REF_BACKWARD) {
+ int from = registers[insn >> BYTECODE_SHIFT];
+ int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from;
+ if (from < 0 || len <= 0) {
+ pc += BC_CHECK_NOT_BACK_REF_BACKWARD_LENGTH;
+ break;
+ }
+ if (int(current) - len < 0) {
+ pc = byteCode + Load32Aligned(pc + 4);
+ break;
+ } else {
+ int i;
+ for (i = 0; i < len; i++) {
+ if (chars[from + i] != chars[int(current) - len + i]) {
+ pc = byteCode + Load32Aligned(pc + 4);
+ break;
+ }
+ }
+ if (i < len) break;
+ current -= len;
+ }
+ pc += BC_CHECK_NOT_BACK_REF_BACKWARD_LENGTH;
+ break;
+ }
BYTECODE(CHECK_NOT_BACK_REF_NO_CASE) {
int from = registers[insn >> BYTECODE_SHIFT];
int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from;
@@ -464,6 +488,46 @@ irregexp::InterpretCode(JSContext* cx, const uint8_t* byteCode, const CharT* cha
}
break;
}
+ BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_BACKWARD) {
+ int from = registers[insn >> BYTECODE_SHIFT];
+ int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from;
+ if (from < 0 || len <= 0) {
+ pc += BC_CHECK_NOT_BACK_REF_NO_CASE_BACKWARD_LENGTH;
+ break;
+ }
+ if (int(current) - len < 0) {
+ pc = byteCode + Load32Aligned(pc + 4);
+ break;
+ }
+ if (CaseInsensitiveCompareStrings(chars + from, chars + int(current) - len, len * sizeof(CharT))) {
+ current -= len;
+ pc += BC_CHECK_NOT_BACK_REF_NO_CASE_BACKWARD_LENGTH;
+ } else {
+ pc = byteCode + Load32Aligned(pc + 4);
+ }
+ break;
+
+ }
+ BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_BACKWARD_UNICODE) {
+ int from = registers[insn >> BYTECODE_SHIFT];
+ int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from;
+ if (from < 0 || len <= 0) {
+ pc += BC_CHECK_NOT_BACK_REF_NO_CASE_BACKWARD_LENGTH;
+ break;
+ }
+ if (int(current) - len < 0) {
+ pc = byteCode + Load32Aligned(pc + 4);
+ break;
+ }
+ if (CaseInsensitiveCompareUCStrings(chars + from, chars + int(current) - len, len * sizeof(CharT))) {
+ current -= len;
+ pc += BC_CHECK_NOT_BACK_REF_NO_CASE_BACKWARD_LENGTH;
+ } else {
+ pc = byteCode + Load32Aligned(pc + 4);
+ }
+ break;
+
+ }
BYTECODE(CHECK_AT_START)
if (current == 0)
pc = byteCode + Load32Aligned(pc + 4);
@@ -471,7 +535,7 @@ irregexp::InterpretCode(JSContext* cx, const uint8_t* byteCode, const CharT* cha
pc += BC_CHECK_AT_START_LENGTH;
break;
BYTECODE(CHECK_NOT_AT_START)
- if (current == 0)
+ if (current + (insn >> BYTECODE_SHIFT) == 0)
pc += BC_CHECK_NOT_AT_START_LENGTH;
else
pc = byteCode + Load32Aligned(pc + 4);
diff --git a/js/src/irregexp/RegExpMacroAssembler.cpp b/js/src/irregexp/RegExpMacroAssembler.cpp
index 94f6934d3f..acb78203bd 100644
--- a/js/src/irregexp/RegExpMacroAssembler.cpp
+++ b/js/src/irregexp/RegExpMacroAssembler.cpp
@@ -225,32 +225,37 @@ InterpretedRegExpMacroAssembler::CheckGreedyLoop(jit::Label* on_tos_equals_curre
}
void
-InterpretedRegExpMacroAssembler::CheckNotAtStart(jit::Label* on_not_at_start)
+InterpretedRegExpMacroAssembler::CheckNotAtStart(int cp_offset, jit::Label* on_not_at_start)
{
- Emit(BC_CHECK_NOT_AT_START, 0);
+ Emit(BC_CHECK_NOT_AT_START, cp_offset);
EmitOrLink(on_not_at_start);
}
void
-InterpretedRegExpMacroAssembler::CheckNotBackReference(int start_reg, jit::Label* on_no_match)
+InterpretedRegExpMacroAssembler::CheckNotBackReference(int start_reg, bool read_backward,
+ jit::Label* on_no_match)
{
MOZ_ASSERT(start_reg >= 0);
MOZ_ASSERT(start_reg <= kMaxRegister);
- Emit(BC_CHECK_NOT_BACK_REF, start_reg);
+ Emit(read_backward ? BC_CHECK_NOT_BACK_REF_BACKWARD : BC_CHECK_NOT_BACK_REF,
+ start_reg);
EmitOrLink(on_no_match);
}
void
InterpretedRegExpMacroAssembler::CheckNotBackReferenceIgnoreCase(int start_reg,
+ bool read_backward,
jit::Label* on_no_match,
bool unicode)
{
MOZ_ASSERT(start_reg >= 0);
MOZ_ASSERT(start_reg <= kMaxRegister);
if (unicode)
- Emit(BC_CHECK_NOT_BACK_REF_NO_CASE_UNICODE, start_reg);
+ Emit(read_backward ? BC_CHECK_NOT_BACK_REF_NO_CASE_BACKWARD_UNICODE : BC_CHECK_NOT_BACK_REF_NO_CASE_UNICODE,
+ start_reg);
else
- Emit(BC_CHECK_NOT_BACK_REF_NO_CASE, start_reg);
+ Emit(read_backward ? BC_CHECK_NOT_BACK_REF_NO_CASE_BACKWARD : BC_CHECK_NOT_BACK_REF_NO_CASE,
+ start_reg);
EmitOrLink(on_no_match);
}
diff --git a/js/src/irregexp/RegExpMacroAssembler.h b/js/src/irregexp/RegExpMacroAssembler.h
index e8275faf40..06784ce853 100644
--- a/js/src/irregexp/RegExpMacroAssembler.h
+++ b/js/src/irregexp/RegExpMacroAssembler.h
@@ -109,10 +109,10 @@ class MOZ_STACK_CLASS RegExpMacroAssembler
virtual void CheckCharacterGT(char16_t limit, jit::Label* on_greater) = 0;
virtual void CheckCharacterLT(char16_t limit, jit::Label* on_less) = 0;
virtual void CheckGreedyLoop(jit::Label* on_tos_equals_current_position) = 0;
- virtual void CheckNotAtStart(jit::Label* on_not_at_start) = 0;
- virtual void CheckNotBackReference(int start_reg, jit::Label* on_no_match) = 0;
- virtual void CheckNotBackReferenceIgnoreCase(int start_reg, jit::Label* on_no_match,
- bool unicode) = 0;
+ virtual void CheckNotAtStart(int cp_offset, jit::Label* on_not_at_start) = 0;
+ virtual void CheckNotBackReference(int start_reg, bool read_backward, jit::Label* on_no_match) = 0;
+ virtual void CheckNotBackReferenceIgnoreCase(int start_reg, bool read_backward,
+ jit::Label* on_no_match, bool unicode) = 0;
// Check the current character for a match with a literal character. If we
// fail to match then goto the on_failure label. End of input always
@@ -244,9 +244,10 @@ class MOZ_STACK_CLASS InterpretedRegExpMacroAssembler final : public RegExpMacro
void CheckCharacterGT(char16_t limit, jit::Label* on_greater);
void CheckCharacterLT(char16_t limit, jit::Label* on_less);
void CheckGreedyLoop(jit::Label* on_tos_equals_current_position);
- void CheckNotAtStart(jit::Label* on_not_at_start);
- void CheckNotBackReference(int start_reg, jit::Label* on_no_match);
- void CheckNotBackReferenceIgnoreCase(int start_reg, jit::Label* on_no_match, bool unicode);
+ void CheckNotAtStart(int cp_offset, jit::Label* on_not_at_start);
+ void CheckNotBackReference(int start_reg, bool read_backward, jit::Label* on_no_match);
+ void CheckNotBackReferenceIgnoreCase(int start_reg, bool read_backward,
+ jit::Label* on_no_match, bool unicode);
void CheckNotCharacter(unsigned c, jit::Label* on_not_equal);
void CheckNotCharacterAfterAnd(unsigned c, unsigned and_with, jit::Label* on_not_equal);
void CheckNotCharacterAfterMinusAnd(char16_t c, char16_t minus, char16_t and_with,
diff --git a/js/src/irregexp/RegExpParser.cpp b/js/src/irregexp/RegExpParser.cpp
index d0b19d471e..1233aa316c 100644
--- a/js/src/irregexp/RegExpParser.cpp
+++ b/js/src/irregexp/RegExpParser.cpp
@@ -226,6 +226,7 @@ RegExpParser<CharT>::RegExpParser(frontend::TokenStream& ts, LifoAlloc* alloc,
alloc(alloc),
captures_(nullptr),
next_pos_(chars),
+ captures_started_(0),
end_(end),
current_(kEndMarker),
capture_count_(0),
@@ -418,7 +419,8 @@ RangeAtom(LifoAlloc* alloc, char16_t from, char16_t to)
static inline RegExpTree*
NegativeLookahead(LifoAlloc* alloc, char16_t from, char16_t to)
{
- return alloc->newInfallible<RegExpLookahead>(RangeAtom(alloc, from, to), false, 0, 0);
+ return alloc->newInfallible<RegExpLookaround>(RangeAtom(alloc, from, to), false,
+ 0, 0, RegExpLookaround::LOOKAHEAD);
}
static bool
@@ -1213,6 +1215,38 @@ RegExpParser<CharT>::ParseBackReferenceIndex(int* index_out)
return true;
}
+template <typename CharT>
+RegExpCapture*
+RegExpParser<CharT>::GetCapture(int index) {
+ // The index for the capture groups are one-based. Its index in the list is
+ // zero-based.
+ int known_captures =
+ is_scanned_for_captures_ ? capture_count_ : captures_started_;
+ MOZ_ASSERT(index <= known_captures);
+ if (captures_ == NULL) {
+ captures_ = alloc->newInfallible<RegExpCaptureVector>(*alloc);
+ }
+ while ((int)captures_->length() < known_captures) {
+ RegExpCapture* capture = alloc->newInfallible<RegExpCapture>(nullptr, captures_->length() + 1);
+ captures_->append(capture);
+ }
+ return (*captures_)[index - 1];
+}
+
+
+template <typename CharT>
+bool
+RegExpParser<CharT>::RegExpParserState::IsInsideCaptureGroup(int index) {
+ for (RegExpParserState* s = this; s != NULL; s = s->previous_state()) {
+ if (s->group_type() != CAPTURE) continue;
+ // Return true if we found the matching capture index.
+ if (index == s->capture_index()) return true;
+ // Abort if index is larger than what has been parsed up till this state.
+ if (index > s->capture_index()) return false;
+ }
+ return false;
+}
+
// QuantifierPrefix ::
// { DecimalDigits }
// { DecimalDigits , }
@@ -1455,24 +1489,24 @@ RegExpTree*
RegExpParser<CharT>::ParseDisjunction()
{
// Used to store current state while parsing subexpressions.
- RegExpParserState initial_state(alloc, nullptr, INITIAL, 0);
- RegExpParserState* stored_state = &initial_state;
+ RegExpParserState initial_state(alloc, nullptr, INITIAL, RegExpLookaround::LOOKAHEAD, 0);
+ RegExpParserState* state = &initial_state;
// Cache the builder in a local variable for quick access.
RegExpBuilder* builder = initial_state.builder();
while (true) {
switch (current()) {
case kEndMarker:
- if (stored_state->IsSubexpression()) {
+ if (state->IsSubexpression()) {
// Inside a parenthesized group when hitting end of input.
return ReportError(JSMSG_MISSING_PAREN);
}
- MOZ_ASSERT(INITIAL == stored_state->group_type());
+ MOZ_ASSERT(INITIAL == state->group_type());
// Parsing completed successfully.
return builder->ToRegExp();
case ')': {
- if (!stored_state->IsSubexpression())
+ if (!state->IsSubexpression())
return ReportError(JSMSG_UNMATCHED_RIGHT_PAREN);
- MOZ_ASSERT(INITIAL != stored_state->group_type());
+ MOZ_ASSERT(INITIAL != state->group_type());
Advance();
// End disjunction parsing and convert builder content to new single
@@ -1481,29 +1515,30 @@ RegExpParser<CharT>::ParseDisjunction()
int end_capture_index = captures_started();
- int capture_index = stored_state->capture_index();
- SubexpressionType group_type = stored_state->group_type();
-
- // Restore previous state.
- stored_state = stored_state->previous_state();
- builder = stored_state->builder();
+ int capture_index = state->capture_index();
+ SubexpressionType group_type = state->group_type();
// Build result of subexpression.
if (group_type == CAPTURE) {
- RegExpCapture* capture = alloc->newInfallible<RegExpCapture>(body, capture_index);
- (*captures_)[capture_index - 1] = capture;
+ RegExpCapture* capture = GetCapture(capture_index);
+ capture->set_body(body);
body = capture;
} else if (group_type != GROUPING) {
- MOZ_ASSERT(group_type == POSITIVE_LOOKAHEAD ||
- group_type == NEGATIVE_LOOKAHEAD);
- bool is_positive = (group_type == POSITIVE_LOOKAHEAD);
- body = alloc->newInfallible<RegExpLookahead>(body,
+ MOZ_ASSERT(group_type == POSITIVE_LOOKAROUND ||
+ group_type == NEGATIVE_LOOKAROUND);
+ bool is_positive = (group_type == POSITIVE_LOOKAROUND);
+ body = alloc->newInfallible<RegExpLookaround>(body,
is_positive,
end_capture_index - capture_index,
- capture_index);
+ capture_index,
+ state->lookaround_type());
}
+
+ // Restore previous state.
+ state = state->previous_state();
+ builder = state->builder();
builder->AddAtom(body);
- if (unicode_ && (group_type == POSITIVE_LOOKAHEAD || group_type == NEGATIVE_LOOKAHEAD))
+ if (unicode_ && (group_type == POSITIVE_LOOKAROUND || group_type == NEGATIVE_LOOKAROUND))
continue;
// For compatability with JSC and ES3, we allow quantifiers after
// lookaheads, and break in all cases.
@@ -1563,6 +1598,7 @@ RegExpParser<CharT>::ParseDisjunction()
}
case '(': {
SubexpressionType subexpr_type = CAPTURE;
+ RegExpLookaround::Type lookaround_type = state->lookaround_type();
Advance();
if (current() == '?') {
switch (Next()) {
@@ -1570,26 +1606,39 @@ RegExpParser<CharT>::ParseDisjunction()
subexpr_type = GROUPING;
break;
case '=':
- subexpr_type = POSITIVE_LOOKAHEAD;
+ lookaround_type = RegExpLookaround::LOOKAHEAD;
+ subexpr_type = POSITIVE_LOOKAROUND;
break;
case '!':
- subexpr_type = NEGATIVE_LOOKAHEAD;
+ lookaround_type = RegExpLookaround::LOOKAHEAD;
+ subexpr_type = NEGATIVE_LOOKAROUND;
break;
+ case '<':
+ Advance();
+ lookaround_type = RegExpLookaround::LOOKBEHIND;
+ if (Next() == '=') {
+ subexpr_type = POSITIVE_LOOKAROUND;
+ break;
+ } else if (Next() == '!') {
+ subexpr_type = NEGATIVE_LOOKAROUND;
+ break;
+ }
+ // We didn't get a positive or negative after '<'.
+ // That's an error.
+ return ReportError(JSMSG_INVALID_GROUP);
default:
return ReportError(JSMSG_INVALID_GROUP);
}
Advance(2);
} else {
- if (captures_ == nullptr)
- captures_ = alloc->newInfallible<RegExpCaptureVector>(*alloc);
if (captures_started() >= kMaxCaptures)
return ReportError(JSMSG_TOO_MANY_PARENS);
- captures_->append((RegExpCapture*) nullptr);
+ captures_started_++;
}
// Store current state and begin new disjunction parsing.
- stored_state = alloc->newInfallible<RegExpParserState>(alloc, stored_state, subexpr_type,
- captures_started());
- builder = stored_state->builder();
+ state = alloc->newInfallible<RegExpParserState>(alloc, state, subexpr_type,
+ lookaround_type, captures_started_);
+ builder = state->builder();
continue;
}
case '[': {
@@ -1644,19 +1693,18 @@ RegExpParser<CharT>::ParseDisjunction()
case '7': case '8': case '9': {
int index = 0;
if (ParseBackReferenceIndex(&index)) {
- RegExpCapture* capture = nullptr;
- if (captures_ != nullptr && index <= (int) captures_->length()) {
- capture = (*captures_)[index - 1];
- }
- if (capture == nullptr) {
- builder->AddEmpty();
- break;
+ if (state->IsInsideCaptureGroup(index)) {
+ // The backreference is inside the capture group it refers to.
+ // Nothing can possibly have been captured yet.
+ builder->AddEmpty();
+ } else {
+ RegExpCapture* capture = GetCapture(index);
+ RegExpTree* atom = alloc->newInfallible<RegExpBackReference>(capture);
+ if (unicode_)
+ builder->AddAtom(UnicodeBackReferenceAtom(alloc, atom));
+ else
+ builder->AddAtom(atom);
}
- RegExpTree* atom = alloc->newInfallible<RegExpBackReference>(capture);
- if (unicode_)
- builder->AddAtom(UnicodeBackReferenceAtom(alloc, atom));
- else
- builder->AddAtom(atom);
break;
}
if (unicode_)
diff --git a/js/src/irregexp/RegExpParser.h b/js/src/irregexp/RegExpParser.h
index 7c6e87e20f..72b35718cc 100644
--- a/js/src/irregexp/RegExpParser.h
+++ b/js/src/irregexp/RegExpParser.h
@@ -228,7 +228,7 @@ class RegExpParser
bool simple() { return simple_; }
bool contains_anchor() { return contains_anchor_; }
void set_contains_anchor() { contains_anchor_ = true; }
- int captures_started() { return captures_ == nullptr ? 0 : captures_->length(); }
+ int captures_started() { return captures_started_; }
const CharT* position() { return next_pos_ - 1; }
static const int kMaxCaptures = 1 << 16;
@@ -238,8 +238,8 @@ class RegExpParser
enum SubexpressionType {
INITIAL,
CAPTURE, // All positive values represent captures.
- POSITIVE_LOOKAHEAD,
- NEGATIVE_LOOKAHEAD,
+ POSITIVE_LOOKAROUND,
+ NEGATIVE_LOOKAROUND,
GROUPING
};
@@ -248,10 +248,12 @@ class RegExpParser
RegExpParserState(LifoAlloc* alloc,
RegExpParserState* previous_state,
SubexpressionType group_type,
+ RegExpLookaround::Type lookaround_type,
int disjunction_capture_index)
: previous_state_(previous_state),
builder_(alloc->newInfallible<RegExpBuilder>(alloc)),
group_type_(group_type),
+ lookaround_type_(lookaround_type),
disjunction_capture_index_(disjunction_capture_index)
{}
// Parser state of containing expression, if any.
@@ -261,11 +263,16 @@ class RegExpParser
RegExpBuilder* builder() { return builder_; }
// Type of regexp being parsed (parenthesized group or entire regexp).
SubexpressionType group_type() { return group_type_; }
+ // Lookahead or Lookbehind.
+ RegExpLookaround::Type lookaround_type() { return lookaround_type_; }
// Index in captures array of first capture in this sub-expression, if any.
// Also the capture index of this sub-expression itself, if group_type
// is CAPTURE.
int capture_index() { return disjunction_capture_index_; }
+ // Check whether the parser is inside a capture group with the given index.
+ bool IsInsideCaptureGroup(int index);
+
private:
// Linked list implementation of stack of states.
RegExpParserState* previous_state_;
@@ -273,10 +280,15 @@ class RegExpParser
RegExpBuilder* builder_;
// Stored disjunction type (capture, look-ahead or grouping), if any.
SubexpressionType group_type_;
+ // Stored read direction.
+ RegExpLookaround::Type lookaround_type_;
// Stored disjunction's capture index (if any).
int disjunction_capture_index_;
};
+ // Return the 1-indexed RegExpCapture object, allocate if necessary.
+ RegExpCapture* GetCapture(int index);
+
widechar current() { return current_; }
bool has_more() { return has_more_; }
bool has_next() { return next_pos_ < end_; }
@@ -293,6 +305,7 @@ class RegExpParser
const CharT* next_pos_;
const CharT* end_;
widechar current_;
+ int captures_started_;
// The capture count is only valid after we have scanned for captures.
int capture_count_;
bool has_more_;