diff options
Diffstat (limited to 'js/src/new-regexp/regexp-ast.h')
-rw-r--r-- | js/src/new-regexp/regexp-ast.h | 615 |
1 files changed, 0 insertions, 615 deletions
diff --git a/js/src/new-regexp/regexp-ast.h b/js/src/new-regexp/regexp-ast.h deleted file mode 100644 index 32bbcf0bf..000000000 --- a/js/src/new-regexp/regexp-ast.h +++ /dev/null @@ -1,615 +0,0 @@ -// Copyright 2016 the V8 project authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#ifndef V8_REGEXP_REGEXP_AST_H_ -#define V8_REGEXP_REGEXP_AST_H_ - -#include "new-regexp/regexp-shim.h" - -namespace v8 { -namespace internal { - -#define FOR_EACH_REG_EXP_TREE_TYPE(VISIT) \ - VISIT(Disjunction) \ - VISIT(Alternative) \ - VISIT(Assertion) \ - VISIT(CharacterClass) \ - VISIT(Atom) \ - VISIT(Quantifier) \ - VISIT(Capture) \ - VISIT(Group) \ - VISIT(Lookaround) \ - VISIT(BackReference) \ - VISIT(Empty) \ - VISIT(Text) - -#define FORWARD_DECLARE(Name) class RegExp##Name; -FOR_EACH_REG_EXP_TREE_TYPE(FORWARD_DECLARE) -#undef FORWARD_DECLARE - -class RegExpCompiler; -class RegExpNode; -class RegExpTree; - -class RegExpVisitor { - public: - virtual ~RegExpVisitor() = default; -#define MAKE_CASE(Name) \ - virtual void* Visit##Name(RegExp##Name*, void* data) = 0; - FOR_EACH_REG_EXP_TREE_TYPE(MAKE_CASE) -#undef MAKE_CASE -}; - - -// A simple closed interval. -class Interval { - public: - Interval() : from_(kNone), to_(kNone - 1) {} // '- 1' for branchless size(). - Interval(int from, int to) : from_(from), to_(to) {} - Interval Union(Interval that) { - if (that.from_ == kNone) - return *this; - else if (from_ == kNone) - return that; - else - return Interval(Min(from_, that.from_), Max(to_, that.to_)); - } - - bool Contains(int value) { return (from_ <= value) && (value <= to_); } - bool is_empty() { return from_ == kNone; } - int from() const { return from_; } - int to() const { return to_; } - int size() const { return to_ - from_ + 1; } - - static Interval Empty() { return Interval(); } - - static constexpr int kNone = -1; - - private: - int from_; - int to_; -}; - - -// Represents code units in the range from from_ to to_, both ends are -// inclusive. -class CharacterRange { - public: - CharacterRange() : from_(0), to_(0) {} - // For compatibility with the CHECK_OK macro - CharacterRange(void* null) { DCHECK_NULL(null); } // NOLINT - V8_EXPORT_PRIVATE static void AddClassEscape(char type, - ZoneList<CharacterRange>* ranges, - Zone* zone); - // Add class escapes. Add case equivalent closure for \w and \W if necessary. - V8_EXPORT_PRIVATE static void AddClassEscape( - char type, ZoneList<CharacterRange>* ranges, - bool add_unicode_case_equivalents, Zone* zone); - static Vector<const int> GetWordBounds(); - static inline CharacterRange Singleton(uc32 value) { - return CharacterRange(value, value); - } - static inline CharacterRange Range(uc32 from, uc32 to) { - DCHECK(0 <= from && to <= String::kMaxCodePoint); - DCHECK(static_cast<uint32_t>(from) <= static_cast<uint32_t>(to)); - return CharacterRange(from, to); - } - static inline CharacterRange Everything() { - return CharacterRange(0, String::kMaxCodePoint); - } - static inline ZoneList<CharacterRange>* List(Zone* zone, - CharacterRange range) { - ZoneList<CharacterRange>* list = - new (zone) ZoneList<CharacterRange>(1, zone); - list->Add(range, zone); - return list; - } - bool Contains(uc32 i) { return from_ <= i && i <= to_; } - uc32 from() const { return from_; } - void set_from(uc32 value) { from_ = value; } - uc32 to() const { return to_; } - void set_to(uc32 value) { to_ = value; } - bool is_valid() { return from_ <= to_; } - bool IsEverything(uc32 max) { return from_ == 0 && to_ >= max; } - bool IsSingleton() { return (from_ == to_); } - V8_EXPORT_PRIVATE static void AddCaseEquivalents( - Isolate* isolate, Zone* zone, ZoneList<CharacterRange>* ranges, - bool is_one_byte); - // Whether a range list is in canonical form: Ranges ordered by from value, - // and ranges non-overlapping and non-adjacent. - V8_EXPORT_PRIVATE static bool IsCanonical(ZoneList<CharacterRange>* ranges); - // Convert range list to canonical form. The characters covered by the ranges - // will still be the same, but no character is in more than one range, and - // adjacent ranges are merged. The resulting list may be shorter than the - // original, but cannot be longer. - static void Canonicalize(ZoneList<CharacterRange>* ranges); - // Negate the contents of a character range in canonical form. - static void Negate(ZoneList<CharacterRange>* src, - ZoneList<CharacterRange>* dst, Zone* zone); - static const int kStartMarker = (1 << 24); - static const int kPayloadMask = (1 << 24) - 1; - - private: - CharacterRange(uc32 from, uc32 to) : from_(from), to_(to) {} - - uc32 from_; - uc32 to_; -}; - -class CharacterSet final { - public: - explicit CharacterSet(uc16 standard_set_type) - : ranges_(nullptr), standard_set_type_(standard_set_type) {} - explicit CharacterSet(ZoneList<CharacterRange>* ranges) - : ranges_(ranges), standard_set_type_(0) {} - ZoneList<CharacterRange>* ranges(Zone* zone); - uc16 standard_set_type() const { return standard_set_type_; } - void set_standard_set_type(uc16 special_set_type) { - standard_set_type_ = special_set_type; - } - bool is_standard() { return standard_set_type_ != 0; } - V8_EXPORT_PRIVATE void Canonicalize(); - - private: - ZoneList<CharacterRange>* ranges_; - // If non-zero, the value represents a standard set (e.g., all whitespace - // characters) without having to expand the ranges. - uc16 standard_set_type_; -}; - -class TextElement final { - public: - enum TextType { ATOM, CHAR_CLASS }; - - static TextElement Atom(RegExpAtom* atom); - static TextElement CharClass(RegExpCharacterClass* char_class); - - int cp_offset() const { return cp_offset_; } - void set_cp_offset(int cp_offset) { cp_offset_ = cp_offset; } - int length() const; - - TextType text_type() const { return text_type_; } - - RegExpTree* tree() const { return tree_; } - - RegExpAtom* atom() const { - DCHECK(text_type() == ATOM); - return reinterpret_cast<RegExpAtom*>(tree()); - } - - RegExpCharacterClass* char_class() const { - DCHECK(text_type() == CHAR_CLASS); - return reinterpret_cast<RegExpCharacterClass*>(tree()); - } - - private: - TextElement(TextType text_type, RegExpTree* tree) - : cp_offset_(-1), text_type_(text_type), tree_(tree) {} - - int cp_offset_; - TextType text_type_; - RegExpTree* tree_; -}; - - -class RegExpTree : public ZoneObject { - public: - static const int kInfinity = kMaxInt; - virtual ~RegExpTree() = default; - virtual void* Accept(RegExpVisitor* visitor, void* data) = 0; - virtual RegExpNode* ToNode(RegExpCompiler* compiler, - RegExpNode* on_success) = 0; - virtual bool IsTextElement() { return false; } - virtual bool IsAnchoredAtStart() { return false; } - virtual bool IsAnchoredAtEnd() { return false; } - virtual int min_match() = 0; - virtual int max_match() = 0; - // Returns the interval of registers used for captures within this - // expression. - virtual Interval CaptureRegisters() { return Interval::Empty(); } - virtual void AppendToText(RegExpText* text, Zone* zone); - V8_EXPORT_PRIVATE std::ostream& Print(std::ostream& os, - Zone* zone); // NOLINT -#define MAKE_ASTYPE(Name) \ - virtual RegExp##Name* As##Name(); \ - virtual bool Is##Name(); - FOR_EACH_REG_EXP_TREE_TYPE(MAKE_ASTYPE) -#undef MAKE_ASTYPE -}; - - -class RegExpDisjunction final : public RegExpTree { - public: - explicit RegExpDisjunction(ZoneList<RegExpTree*>* alternatives); - void* Accept(RegExpVisitor* visitor, void* data) override; - RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override; - RegExpDisjunction* AsDisjunction() override; - Interval CaptureRegisters() override; - bool IsDisjunction() override; - bool IsAnchoredAtStart() override; - bool IsAnchoredAtEnd() override; - int min_match() override { return min_match_; } - int max_match() override { return max_match_; } - ZoneList<RegExpTree*>* alternatives() { return alternatives_; } - - private: - bool SortConsecutiveAtoms(RegExpCompiler* compiler); - void RationalizeConsecutiveAtoms(RegExpCompiler* compiler); - void FixSingleCharacterDisjunctions(RegExpCompiler* compiler); - ZoneList<RegExpTree*>* alternatives_; - int min_match_; - int max_match_; -}; - - -class RegExpAlternative final : public RegExpTree { - public: - explicit RegExpAlternative(ZoneList<RegExpTree*>* nodes); - void* Accept(RegExpVisitor* visitor, void* data) override; - RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override; - RegExpAlternative* AsAlternative() override; - Interval CaptureRegisters() override; - bool IsAlternative() override; - bool IsAnchoredAtStart() override; - bool IsAnchoredAtEnd() override; - int min_match() override { return min_match_; } - int max_match() override { return max_match_; } - ZoneList<RegExpTree*>* nodes() { return nodes_; } - - private: - ZoneList<RegExpTree*>* nodes_; - int min_match_; - int max_match_; -}; - - -class RegExpAssertion final : public RegExpTree { - public: - enum AssertionType { - START_OF_LINE = 0, - START_OF_INPUT = 1, - END_OF_LINE = 2, - END_OF_INPUT = 3, - BOUNDARY = 4, - NON_BOUNDARY = 5, - LAST_TYPE = NON_BOUNDARY, - }; - RegExpAssertion(AssertionType type, JSRegExp::Flags flags) - : assertion_type_(type), flags_(flags) {} - void* Accept(RegExpVisitor* visitor, void* data) override; - RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override; - RegExpAssertion* AsAssertion() override; - bool IsAssertion() override; - bool IsAnchoredAtStart() override; - bool IsAnchoredAtEnd() override; - int min_match() override { return 0; } - int max_match() override { return 0; } - AssertionType assertion_type() const { return assertion_type_; } - JSRegExp::Flags flags() const { return flags_; } - - private: - const AssertionType assertion_type_; - const JSRegExp::Flags flags_; -}; - - -class RegExpCharacterClass final : public RegExpTree { - public: - // NEGATED: The character class is negated and should match everything but - // the specified ranges. - // CONTAINS_SPLIT_SURROGATE: The character class contains part of a split - // surrogate and should not be unicode-desugared (crbug.com/641091). - enum Flag { - NEGATED = 1 << 0, - CONTAINS_SPLIT_SURROGATE = 1 << 1, - }; - using CharacterClassFlags = base::Flags<Flag>; - - RegExpCharacterClass( - Zone* zone, ZoneList<CharacterRange>* ranges, JSRegExp::Flags flags, - CharacterClassFlags character_class_flags = CharacterClassFlags()) - : set_(ranges), - flags_(flags), - character_class_flags_(character_class_flags) { - // Convert the empty set of ranges to the negated Everything() range. - if (ranges->is_empty()) { - ranges->Add(CharacterRange::Everything(), zone); - character_class_flags_ ^= NEGATED; - } - } - RegExpCharacterClass(uc16 type, JSRegExp::Flags flags) - : set_(type), - flags_(flags), - character_class_flags_(CharacterClassFlags()) {} - void* Accept(RegExpVisitor* visitor, void* data) override; - RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override; - RegExpCharacterClass* AsCharacterClass() override; - bool IsCharacterClass() override; - bool IsTextElement() override { return true; } - int min_match() override { return 1; } - // The character class may match two code units for unicode regexps. - // TODO(yangguo): we should split this class for usage in TextElement, and - // make max_match() dependent on the character class content. - int max_match() override { return 2; } - void AppendToText(RegExpText* text, Zone* zone) override; - CharacterSet character_set() { return set_; } - // TODO(lrn): Remove need for complex version if is_standard that - // recognizes a mangled standard set and just do { return set_.is_special(); } - bool is_standard(Zone* zone); - // Returns a value representing the standard character set if is_standard() - // returns true. - // Currently used values are: - // s : unicode whitespace - // S : unicode non-whitespace - // w : ASCII word character (digit, letter, underscore) - // W : non-ASCII word character - // d : ASCII digit - // D : non-ASCII digit - // . : non-newline - // * : All characters, for advancing unanchored regexp - uc16 standard_type() const { return set_.standard_set_type(); } - ZoneList<CharacterRange>* ranges(Zone* zone) { return set_.ranges(zone); } - bool is_negated() const { return (character_class_flags_ & NEGATED) != 0; } - JSRegExp::Flags flags() const { return flags_; } - bool contains_split_surrogate() const { - return (character_class_flags_ & CONTAINS_SPLIT_SURROGATE) != 0; - } - - private: - CharacterSet set_; - const JSRegExp::Flags flags_; - CharacterClassFlags character_class_flags_; -}; - - -class RegExpAtom final : public RegExpTree { - public: - explicit RegExpAtom(Vector<const uc16> data, JSRegExp::Flags flags) - : data_(data), flags_(flags) {} - void* Accept(RegExpVisitor* visitor, void* data) override; - RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override; - RegExpAtom* AsAtom() override; - bool IsAtom() override; - bool IsTextElement() override { return true; } - int min_match() override { return data_.length(); } - int max_match() override { return data_.length(); } - void AppendToText(RegExpText* text, Zone* zone) override; - Vector<const uc16> data() { return data_; } - int length() { return data_.length(); } - JSRegExp::Flags flags() const { return flags_; } - bool ignore_case() const { return (flags_ & JSRegExp::kIgnoreCase) != 0; } - - private: - Vector<const uc16> data_; - const JSRegExp::Flags flags_; -}; - - -class RegExpText final : public RegExpTree { - public: - explicit RegExpText(Zone* zone) : elements_(2, zone), length_(0) {} - void* Accept(RegExpVisitor* visitor, void* data) override; - RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override; - RegExpText* AsText() override; - bool IsText() override; - bool IsTextElement() override { return true; } - int min_match() override { return length_; } - int max_match() override { return length_; } - void AppendToText(RegExpText* text, Zone* zone) override; - void AddElement(TextElement elm, Zone* zone) { - elements_.Add(elm, zone); - length_ += elm.length(); - } - ZoneList<TextElement>* elements() { return &elements_; } - - private: - ZoneList<TextElement> elements_; - int length_; -}; - - -class RegExpQuantifier final : public RegExpTree { - public: - enum QuantifierType { GREEDY, NON_GREEDY, POSSESSIVE }; - RegExpQuantifier(int min, int max, QuantifierType type, RegExpTree* body) - : body_(body), - min_(min), - max_(max), - quantifier_type_(type) { - if (min > 0 && body->min_match() > kInfinity / min) { - min_match_ = kInfinity; - } else { - min_match_ = min * body->min_match(); - } - if (max > 0 && body->max_match() > kInfinity / max) { - max_match_ = kInfinity; - } else { - max_match_ = max * body->max_match(); - } - } - void* Accept(RegExpVisitor* visitor, void* data) override; - RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override; - static RegExpNode* ToNode(int min, int max, bool is_greedy, RegExpTree* body, - RegExpCompiler* compiler, RegExpNode* on_success, - bool not_at_start = false); - RegExpQuantifier* AsQuantifier() override; - Interval CaptureRegisters() override; - bool IsQuantifier() override; - int min_match() override { return min_match_; } - int max_match() override { return max_match_; } - int min() { return min_; } - int max() { return max_; } - bool is_possessive() { return quantifier_type_ == POSSESSIVE; } - bool is_non_greedy() { return quantifier_type_ == NON_GREEDY; } - bool is_greedy() { return quantifier_type_ == GREEDY; } - RegExpTree* body() { return body_; } - - private: - RegExpTree* body_; - int min_; - int max_; - int min_match_; - int max_match_; - QuantifierType quantifier_type_; -}; - - -class RegExpCapture final : public RegExpTree { - public: - explicit RegExpCapture(int index) - : body_(nullptr), - index_(index), - min_match_(0), - max_match_(0), - name_(nullptr) {} - void* Accept(RegExpVisitor* visitor, void* data) override; - RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override; - static RegExpNode* ToNode(RegExpTree* body, int index, - RegExpCompiler* compiler, RegExpNode* on_success); - RegExpCapture* AsCapture() override; - bool IsAnchoredAtStart() override; - bool IsAnchoredAtEnd() override; - Interval CaptureRegisters() override; - bool IsCapture() override; - int min_match() override { return min_match_; } - int max_match() override { return max_match_; } - RegExpTree* body() { return body_; } - void set_body(RegExpTree* body) { - body_ = body; - min_match_ = body->min_match(); - max_match_ = body->max_match(); - } - int index() const { return index_; } - const ZoneVector<uc16>* name() const { return name_; } - void set_name(const ZoneVector<uc16>* name) { name_ = name; } - static int StartRegister(int index) { return index * 2; } - static int EndRegister(int index) { return index * 2 + 1; } - - private: - RegExpTree* body_; - int index_; - int min_match_; - int max_match_; - const ZoneVector<uc16>* name_; -}; - -class RegExpGroup final : public RegExpTree { - public: - explicit RegExpGroup(RegExpTree* body) - : body_(body), - min_match_(body->min_match()), - max_match_(body->max_match()) {} - void* Accept(RegExpVisitor* visitor, void* data) override; - RegExpNode* ToNode(RegExpCompiler* compiler, - RegExpNode* on_success) override { - return body_->ToNode(compiler, on_success); - } - RegExpGroup* AsGroup() override; - bool IsAnchoredAtStart() override { return body_->IsAnchoredAtStart(); } - bool IsAnchoredAtEnd() override { return body_->IsAnchoredAtEnd(); } - bool IsGroup() override; - int min_match() override { return min_match_; } - int max_match() override { return max_match_; } - Interval CaptureRegisters() override { return body_->CaptureRegisters(); } - RegExpTree* body() { return body_; } - - private: - RegExpTree* body_; - int min_match_; - int max_match_; -}; - -class RegExpLookaround final : public RegExpTree { - public: - enum Type { LOOKAHEAD, LOOKBEHIND }; - - RegExpLookaround(RegExpTree* body, bool is_positive, int capture_count, - int capture_from, Type type) - : body_(body), - is_positive_(is_positive), - capture_count_(capture_count), - capture_from_(capture_from), - type_(type) {} - - void* Accept(RegExpVisitor* visitor, void* data) override; - RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override; - RegExpLookaround* AsLookaround() override; - Interval CaptureRegisters() override; - bool IsLookaround() override; - bool IsAnchoredAtStart() override; - int min_match() override { return 0; } - int max_match() override { return 0; } - RegExpTree* body() { return body_; } - bool is_positive() { return is_positive_; } - int capture_count() { return capture_count_; } - int capture_from() { return capture_from_; } - Type type() { return type_; } - - class Builder { - public: - Builder(bool is_positive, RegExpNode* on_success, - int stack_pointer_register, int position_register, - int capture_register_count = 0, int capture_register_start = 0); - RegExpNode* on_match_success() { return on_match_success_; } - RegExpNode* ForMatch(RegExpNode* match); - - private: - bool is_positive_; - RegExpNode* on_match_success_; - RegExpNode* on_success_; - int stack_pointer_register_; - int position_register_; - }; - - private: - RegExpTree* body_; - bool is_positive_; - int capture_count_; - int capture_from_; - Type type_; -}; - - -class RegExpBackReference final : public RegExpTree { - public: - explicit RegExpBackReference(JSRegExp::Flags flags) - : capture_(nullptr), name_(nullptr), flags_(flags) {} - RegExpBackReference(RegExpCapture* capture, JSRegExp::Flags flags) - : capture_(capture), name_(nullptr), flags_(flags) {} - void* Accept(RegExpVisitor* visitor, void* data) override; - RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override; - RegExpBackReference* AsBackReference() override; - bool IsBackReference() override; - int min_match() override { return 0; } - // The back reference may be recursive, e.g. /(\2)(\1)/. To avoid infinite - // recursion, we give up. Ignorance is bliss. - int max_match() override { return kInfinity; } - int index() { return capture_->index(); } - RegExpCapture* capture() { return capture_; } - void set_capture(RegExpCapture* capture) { capture_ = capture; } - const ZoneVector<uc16>* name() const { return name_; } - void set_name(const ZoneVector<uc16>* name) { name_ = name; } - - private: - RegExpCapture* capture_; - const ZoneVector<uc16>* name_; - const JSRegExp::Flags flags_; -}; - - -class RegExpEmpty final : public RegExpTree { - public: - RegExpEmpty() = default; - void* Accept(RegExpVisitor* visitor, void* data) override; - RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override; - RegExpEmpty* AsEmpty() override; - bool IsEmpty() override; - int min_match() override { return 0; } - int max_match() override { return 0; } -}; - -} // namespace internal -} // namespace v8 - -#endif // V8_REGEXP_REGEXP_AST_H_ |