summaryrefslogtreecommitdiff
path: root/js/src/new-regexp/regexp-parser.h
diff options
context:
space:
mode:
Diffstat (limited to 'js/src/new-regexp/regexp-parser.h')
-rw-r--r--js/src/new-regexp/regexp-parser.h363
1 files changed, 0 insertions, 363 deletions
diff --git a/js/src/new-regexp/regexp-parser.h b/js/src/new-regexp/regexp-parser.h
deleted file mode 100644
index 1b2a9fe18..000000000
--- a/js/src/new-regexp/regexp-parser.h
+++ /dev/null
@@ -1,363 +0,0 @@
-// Copyright 2016 the V8 project authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#ifndef V8_REGEXP_REGEXP_PARSER_H_
-#define V8_REGEXP_REGEXP_PARSER_H_
-
-#include "new-regexp/regexp-ast.h"
-#include "new-regexp/regexp-error.h"
-
-namespace v8 {
-namespace internal {
-
-struct RegExpCompileData;
-
-// A BufferedZoneList is an automatically growing list, just like (and backed
-// by) a ZoneList, that is optimized for the case of adding and removing
-// a single element. The last element added is stored outside the backing list,
-// and if no more than one element is ever added, the ZoneList isn't even
-// allocated.
-// Elements must not be nullptr pointers.
-template <typename T, int initial_size>
-class BufferedZoneList {
- public:
- BufferedZoneList() : list_(nullptr), last_(nullptr) {}
-
- // Adds element at end of list. This element is buffered and can
- // be read using last() or removed using RemoveLast until a new Add or until
- // RemoveLast or GetList has been called.
- void Add(T* value, Zone* zone) {
- if (last_ != nullptr) {
- if (list_ == nullptr) {
- list_ = new (zone) ZoneList<T*>(initial_size, zone);
- }
- list_->Add(last_, zone);
- }
- last_ = value;
- }
-
- T* last() {
- DCHECK(last_ != nullptr);
- return last_;
- }
-
- T* RemoveLast() {
- DCHECK(last_ != nullptr);
- T* result = last_;
- if ((list_ != nullptr) && (list_->length() > 0))
- last_ = list_->RemoveLast();
- else
- last_ = nullptr;
- return result;
- }
-
- T* Get(int i) {
- DCHECK((0 <= i) && (i < length()));
- if (list_ == nullptr) {
- DCHECK_EQ(0, i);
- return last_;
- } else {
- if (i == list_->length()) {
- DCHECK(last_ != nullptr);
- return last_;
- } else {
- return list_->at(i);
- }
- }
- }
-
- void Clear() {
- list_ = nullptr;
- last_ = nullptr;
- }
-
- int length() {
- int length = (list_ == nullptr) ? 0 : list_->length();
- return length + ((last_ == nullptr) ? 0 : 1);
- }
-
- ZoneList<T*>* GetList(Zone* zone) {
- if (list_ == nullptr) {
- list_ = new (zone) ZoneList<T*>(initial_size, zone);
- }
- if (last_ != nullptr) {
- list_->Add(last_, zone);
- last_ = nullptr;
- }
- return list_;
- }
-
- private:
- ZoneList<T*>* list_;
- T* last_;
-};
-
-
-// Accumulates RegExp atoms and assertions into lists of terms and alternatives.
-class RegExpBuilder : public ZoneObject {
- public:
- RegExpBuilder(Zone* zone, JSRegExp::Flags flags);
- void AddCharacter(uc16 character);
- void AddUnicodeCharacter(uc32 character);
- void AddEscapedUnicodeCharacter(uc32 character);
- // "Adds" an empty expression. Does nothing except consume a
- // following quantifier
- void AddEmpty();
- void AddCharacterClass(RegExpCharacterClass* cc);
- void AddCharacterClassForDesugaring(uc32 c);
- void AddAtom(RegExpTree* tree);
- void AddTerm(RegExpTree* tree);
- void AddAssertion(RegExpTree* tree);
- void NewAlternative(); // '|'
- bool AddQuantifierToAtom(int min, int max,
- RegExpQuantifier::QuantifierType type);
- void FlushText();
- RegExpTree* ToRegExp();
- JSRegExp::Flags flags() const { return flags_; }
- void set_flags(JSRegExp::Flags flags) { flags_ = flags; }
-
- bool ignore_case() const { return (flags_ & JSRegExp::kIgnoreCase) != 0; }
- bool multiline() const { return (flags_ & JSRegExp::kMultiline) != 0; }
- bool dotall() const { return (flags_ & JSRegExp::kDotAll) != 0; }
-
- private:
- static const uc16 kNoPendingSurrogate = 0;
- void AddLeadSurrogate(uc16 lead_surrogate);
- void AddTrailSurrogate(uc16 trail_surrogate);
- void FlushPendingSurrogate();
- void FlushCharacters();
- void FlushTerms();
- bool NeedsDesugaringForUnicode(RegExpCharacterClass* cc);
- bool NeedsDesugaringForIgnoreCase(uc32 c);
- Zone* zone() const { return zone_; }
- bool unicode() const { return (flags_ & JSRegExp::kUnicode) != 0; }
-
- Zone* zone_;
- bool pending_empty_;
- JSRegExp::Flags flags_;
- ZoneList<uc16>* characters_;
- uc16 pending_surrogate_;
- BufferedZoneList<RegExpTree, 2> terms_;
- BufferedZoneList<RegExpTree, 2> text_;
- BufferedZoneList<RegExpTree, 2> alternatives_;
-#ifdef DEBUG
- enum { ADD_NONE, ADD_CHAR, ADD_TERM, ADD_ASSERT, ADD_ATOM } last_added_;
-#define LAST(x) last_added_ = x;
-#else
-#define LAST(x)
-#endif
-};
-
-class V8_EXPORT_PRIVATE RegExpParser {
- public:
- RegExpParser(FlatStringReader* in, JSRegExp::Flags flags, Isolate* isolate,
- Zone* zone);
-
- static bool ParseRegExp(Isolate* isolate, Zone* zone, FlatStringReader* input,
- JSRegExp::Flags flags, RegExpCompileData* result);
-
- RegExpTree* ParsePattern();
- RegExpTree* ParseDisjunction();
- RegExpTree* ParseGroup();
-
- // Parses a {...,...} quantifier and stores the range in the given
- // out parameters.
- bool ParseIntervalQuantifier(int* min_out, int* max_out);
-
- // Parses and returns a single escaped character. The character
- // must not be 'b' or 'B' since they are usually handle specially.
- uc32 ParseClassCharacterEscape();
-
- // Checks whether the following is a length-digit hexadecimal number,
- // and sets the value if it is.
- bool ParseHexEscape(int length, uc32* value);
- bool ParseUnicodeEscape(uc32* value);
- bool ParseUnlimitedLengthHexNumber(int max_value, uc32* value);
-
- bool ParsePropertyClassName(ZoneVector<char>* name_1,
- ZoneVector<char>* name_2);
- bool AddPropertyClassRange(ZoneList<CharacterRange>* add_to, bool negate,
- const ZoneVector<char>& name_1,
- const ZoneVector<char>& name_2);
-
- RegExpTree* GetPropertySequence(const ZoneVector<char>& name_1);
- RegExpTree* ParseCharacterClass(const RegExpBuilder* state);
-
- uc32 ParseOctalLiteral();
-
- // Tries to parse the input as a back reference. If successful it
- // stores the result in the output parameter and returns true. If
- // it fails it will push back the characters read so the same characters
- // can be reparsed.
- bool ParseBackReferenceIndex(int* index_out);
-
- // Parse inside a class. Either add escaped class to the range, or return
- // false and pass parsed single character through |char_out|.
- void ParseClassEscape(ZoneList<CharacterRange>* ranges, Zone* zone,
- bool add_unicode_case_equivalents, uc32* char_out,
- bool* is_class_escape);
-
- char ParseClassEscape();
-
- RegExpTree* ReportError(RegExpError error);
- void Advance();
- void Advance(int dist);
- void Reset(int pos);
-
- // Reports whether the pattern might be used as a literal search string.
- // Only use if the result of the parse is a single atom node.
- bool simple();
- bool contains_anchor() { return contains_anchor_; }
- void set_contains_anchor() { contains_anchor_ = true; }
- int captures_started() { return captures_started_; }
- int position() { return next_pos_ - 1; }
- bool failed() { return failed_; }
- // The Unicode flag can't be changed using in-regexp syntax, so it's OK to
- // just read the initial flag value here.
- bool unicode() const { return (top_level_flags_ & JSRegExp::kUnicode) != 0; }
-
- static bool IsSyntaxCharacterOrSlash(uc32 c);
-
- static const uc32 kEndMarker = (1 << 21);
-
- private:
- enum SubexpressionType {
- INITIAL,
- CAPTURE, // All positive values represent captures.
- POSITIVE_LOOKAROUND,
- NEGATIVE_LOOKAROUND,
- GROUPING
- };
-
- class RegExpParserState : public ZoneObject {
- public:
- // Push a state on the stack.
- RegExpParserState(RegExpParserState* previous_state,
- SubexpressionType group_type,
- RegExpLookaround::Type lookaround_type,
- int disjunction_capture_index,
- const ZoneVector<uc16>* capture_name,
- JSRegExp::Flags flags, Zone* zone)
- : previous_state_(previous_state),
- builder_(new (zone) RegExpBuilder(zone, flags)),
- group_type_(group_type),
- lookaround_type_(lookaround_type),
- disjunction_capture_index_(disjunction_capture_index),
- capture_name_(capture_name) {}
- // Parser state of containing expression, if any.
- RegExpParserState* previous_state() const { return previous_state_; }
- bool IsSubexpression() { return previous_state_ != nullptr; }
- // RegExpBuilder building this regexp's AST.
- RegExpBuilder* builder() const { return builder_; }
- // Type of regexp being parsed (parenthesized group or entire regexp).
- SubexpressionType group_type() const { return group_type_; }
- // Lookahead or Lookbehind.
- RegExpLookaround::Type lookaround_type() const { return lookaround_type_; }
- // Index in captures array of first capture in this sub-expression, if any.
- // Also the capture index of this sub-expression itself, if group_type
- // is CAPTURE.
- int capture_index() const { return disjunction_capture_index_; }
- // The name of the current sub-expression, if group_type is CAPTURE. Only
- // used for named captures.
- const ZoneVector<uc16>* capture_name() const { return capture_name_; }
-
- bool IsNamedCapture() const { return capture_name_ != nullptr; }
-
- // Check whether the parser is inside a capture group with the given index.
- bool IsInsideCaptureGroup(int index);
- // Check whether the parser is inside a capture group with the given name.
- bool IsInsideCaptureGroup(const ZoneVector<uc16>* name);
-
- private:
- // Linked list implementation of stack of states.
- RegExpParserState* const previous_state_;
- // Builder for the stored disjunction.
- RegExpBuilder* const builder_;
- // Stored disjunction type (capture, look-ahead or grouping), if any.
- const SubexpressionType group_type_;
- // Stored read direction.
- const RegExpLookaround::Type lookaround_type_;
- // Stored disjunction's capture index (if any).
- const int disjunction_capture_index_;
- // Stored capture name (if any).
- const ZoneVector<uc16>* const capture_name_;
- };
-
- // Return the 1-indexed RegExpCapture object, allocate if necessary.
- RegExpCapture* GetCapture(int index);
-
- // Creates a new named capture at the specified index. Must be called exactly
- // once for each named capture. Fails if a capture with the same name is
- // encountered.
- bool CreateNamedCaptureAtIndex(const ZoneVector<uc16>* name, int index);
-
- // Parses the name of a capture group (?<name>pattern). The name must adhere
- // to IdentifierName in the ECMAScript standard.
- const ZoneVector<uc16>* ParseCaptureGroupName();
-
- bool ParseNamedBackReference(RegExpBuilder* builder,
- RegExpParserState* state);
- RegExpParserState* ParseOpenParenthesis(RegExpParserState* state);
-
- // After the initial parsing pass, patch corresponding RegExpCapture objects
- // into all RegExpBackReferences. This is done after initial parsing in order
- // to avoid complicating cases in which references comes before the capture.
- void PatchNamedBackReferences();
-
- Handle<FixedArray> CreateCaptureNameMap();
-
- // Returns true iff the pattern contains named captures. May call
- // ScanForCaptures to look ahead at the remaining pattern.
- bool HasNamedCaptures();
-
- Isolate* isolate() { return isolate_; }
- Zone* zone() const { return zone_; }
-
- uc32 current() { return current_; }
- bool has_more() { return has_more_; }
- bool has_next() { return next_pos_ < in()->length(); }
- uc32 Next();
- template <bool update_position>
- uc32 ReadNext();
- FlatStringReader* in() { return in_; }
- void ScanForCaptures();
-
- struct RegExpCaptureNameLess {
- bool operator()(const RegExpCapture* lhs, const RegExpCapture* rhs) const {
- DCHECK_NOT_NULL(lhs);
- DCHECK_NOT_NULL(rhs);
- ZoneVector<uc16> lhname = *lhs->name();
- ZoneVector<uc16> rhname = *rhs->name();
- return lhname < rhname;
- }
- };
-
- Isolate* isolate_;
- Zone* zone_;
- RegExpError error_ = RegExpError::kNone;
- int error_pos_ = 0;
- ZoneList<RegExpCapture*>* captures_;
- ZoneSet<RegExpCapture*, RegExpCaptureNameLess>* named_captures_;
- ZoneList<RegExpBackReference*>* named_back_references_;
- FlatStringReader* in_;
- uc32 current_;
- // These are the flags specified outside the regexp syntax ie after the
- // terminating '/' or in the second argument to the constructor. The current
- // flags are stored on the RegExpBuilder.
- JSRegExp::Flags top_level_flags_;
- int next_pos_;
- int captures_started_;
- int capture_count_; // Only valid after we have scanned for captures.
- bool has_more_;
- bool simple_;
- bool contains_anchor_;
- bool is_scanned_for_captures_;
- bool has_named_captures_; // Only valid after we have scanned for captures.
- bool failed_;
-};
-
-} // namespace internal
-} // namespace v8
-
-#endif // V8_REGEXP_REGEXP_PARSER_H_