/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ #ifndef vm_RegExpObject_h #define vm_RegExpObject_h #include "mozilla/Attributes.h" #include "mozilla/MemoryReporting.h" #include "jscntxt.h" #include "builtin/SelfHostingDefines.h" #include "gc/Marking.h" #include "gc/Zone.h" #include "proxy/Proxy.h" #include "vm/ArrayObject.h" #include "vm/Shape.h" #include "irregexp/InfallibleVector.h" /* * JavaScript Regular Expressions * * There are several engine concepts associated with a single logical regexp: * * RegExpObject - The JS-visible object whose .[[Class]] equals "RegExp" * * RegExpShared - The compiled representation of the regexp. * * RegExpCompartment - Owns all RegExpShared instances in a compartment. * * To save memory, a RegExpShared is not created for a RegExpObject until it is * needed for execution. When a RegExpShared needs to be created, it is looked * up in a per-compartment table to allow reuse between objects. Lastly, on GC, * every RegExpShared that is not in active use is discarded. */ namespace js { struct MatchPair; class MatchPairs; class RegExpShared; class RegExpStatics; using RootedRegExpShared = JS::Rooted; using HandleRegExpShared = JS::Handle; using MutableHandleRegExpShared = JS::MutableHandle; namespace frontend { class TokenStream; } enum RegExpFlag : uint8_t { IgnoreCaseFlag = 0x01, GlobalFlag = 0x02, MultilineFlag = 0x04, StickyFlag = 0x08, UnicodeFlag = 0x10, DotAllFlag = 0x20, NoFlags = 0x00, AllFlags = 0x3f }; static_assert(IgnoreCaseFlag == REGEXP_IGNORECASE_FLAG && GlobalFlag == REGEXP_GLOBAL_FLAG && MultilineFlag == REGEXP_MULTILINE_FLAG && StickyFlag == REGEXP_STICKY_FLAG && UnicodeFlag == REGEXP_UNICODE_FLAG && DotAllFlag == REGEXP_DOTALL_FLAG, "Flag values should be in sync with self-hosted JS"); enum RegExpRunStatus { RegExpRunStatus_Error, RegExpRunStatus_Success, RegExpRunStatus_Success_NotFound }; extern RegExpObject* RegExpAlloc(ExclusiveContext* cx, HandleObject proto = nullptr); // |regexp| is under-typed because this function's used in the JIT. extern JSObject* CloneRegExpObject(JSContext* cx, JSObject* regexp); /* * A RegExpShared is the compiled representation of a regexp. A RegExpShared is * potentially pointed to by multiple RegExpObjects. Additionally, C++ code may * have pointers to RegExpShareds on the stack. The RegExpShareds are kept in a * table so that they can be reused when compiling the same regex string. * * During a GC, RegExpShared instances are marked and swept like GC things. * Usually, RegExpObjects clear their pointers to their RegExpShareds rather * than explicitly tracing them, so that the RegExpShared and any jitcode can * be reclaimed quicker. However, the RegExpShareds are traced through by * objects when we are preserving jitcode in their zone, to avoid the same * recompilation inefficiencies as normal Ion and baseline compilation. */ class RegExpShared : public gc::TenuredCell { public: enum CompilationMode { Normal, MatchOnly }; enum ForceByteCodeEnum { DontForceByteCode, ForceByteCode }; using JitCodeTable = UniquePtr; using JitCodeTables = Vector; private: friend class RegExpCompartment; friend class RegExpStatics; typedef frontend::TokenStream TokenStream; struct RegExpCompilation { ReadBarriered jitCode; uint8_t* byteCode; RegExpCompilation() : byteCode(nullptr) {} bool compiled(ForceByteCodeEnum force = DontForceByteCode) const { return byteCode || (force == DontForceByteCode && jitCode); } }; /* Source to the RegExp, for lazy compilation. */ GCPtr source; RegExpFlag flags; bool canStringMatch; size_t parenCount; uint32_t numNamedCaptures_; GCPtr groupsTemplate_; RegExpCompilation compilationArray[4]; static int CompilationIndex(CompilationMode mode, bool latin1) { switch (mode) { case Normal: return latin1 ? 0 : 1; case MatchOnly: return latin1 ? 2 : 3; } MOZ_CRASH(); } // Tables referenced by JIT code. JitCodeTables tables; /* Internal functions. */ RegExpShared(JSAtom* source, RegExpFlag flags); static bool compile(JSContext* cx, MutableHandleRegExpShared res, HandleLinearString input, CompilationMode mode, ForceByteCodeEnum force); static bool compile(JSContext* cx, MutableHandleRegExpShared res, HandleAtom pattern, HandleLinearString input, CompilationMode mode, ForceByteCodeEnum force); static bool compileIfNecessary(JSContext* cx, MutableHandleRegExpShared res, HandleLinearString input, CompilationMode mode, ForceByteCodeEnum force); const RegExpCompilation& compilation(CompilationMode mode, bool latin1) const { return compilationArray[CompilationIndex(mode, latin1)]; } RegExpCompilation& compilation(CompilationMode mode, bool latin1) { return compilationArray[CompilationIndex(mode, latin1)]; } public: ~RegExpShared() = delete; // Execute this RegExp on input starting from searchIndex, filling in // matches if specified and otherwise only determining if there is a match. static RegExpRunStatus execute(JSContext* cx, MutableHandleRegExpShared res, HandleLinearString input, size_t searchIndex, MatchPairs* matches, size_t* endIndex); // Register a table with this RegExpShared, and take ownership. bool addTable(JitCodeTable table) { return tables.append(Move(table)); } /* Accessors */ size_t getParenCount() const { MOZ_ASSERT(isCompiled()); return parenCount; } /* Accounts for the "0" (whole match) pair. */ size_t pairCount() const { return getParenCount() + 1; } // not public due to circular inclusion problems static bool initializeNamedCaptures(JSContext* cx, HandleRegExpShared re, irregexp::CharacterVectorVector* names, irregexp::IntegerVector* indices); PlainObject* getGroupsTemplate() { return groupsTemplate_; } uint32_t numNamedCaptures() const { return numNamedCaptures_; } JSAtom* getSource() const { return source; } RegExpFlag getFlags() const { return flags; } bool ignoreCase() const { return flags & IgnoreCaseFlag; } bool global() const { return flags & GlobalFlag; } bool multiline() const { return flags & MultilineFlag; } bool sticky() const { return flags & StickyFlag; } bool unicode() const { return flags & UnicodeFlag; } bool dotAll() const { return flags & DotAllFlag; } bool isCompiled(CompilationMode mode, bool latin1, ForceByteCodeEnum force = DontForceByteCode) const { return compilation(mode, latin1).compiled(force); } bool isCompiled() const { return isCompiled(Normal, true) || isCompiled(Normal, false) || isCompiled(MatchOnly, true) || isCompiled(MatchOnly, false); } void traceChildren(JSTracer* trc); void discardJitCode(); void finalize(FreeOp* fop); static size_t offsetOfSource() { return offsetof(RegExpShared, source); } static size_t offsetOfFlags() { return offsetof(RegExpShared, flags); } static size_t offsetOfParenCount() { return offsetof(RegExpShared, parenCount); } static size_t offsetOfLatin1JitCode(CompilationMode mode) { return offsetof(RegExpShared, compilationArray) + (CompilationIndex(mode, true) * sizeof(RegExpCompilation)) + offsetof(RegExpCompilation, jitCode); } static size_t offsetOfTwoByteJitCode(CompilationMode mode) { return offsetof(RegExpShared, compilationArray) + (CompilationIndex(mode, false) * sizeof(RegExpCompilation)) + offsetof(RegExpCompilation, jitCode); } static size_t offsetOfGroupsTemplate() { return offsetof(RegExpShared, groupsTemplate_); } size_t sizeOfExcludingThis(mozilla::MallocSizeOf mallocSizeOf); #ifdef DEBUG static bool dumpBytecode(JSContext* cx, MutableHandleRegExpShared res, bool match_only, HandleLinearString input); #endif public: static const JS::TraceKind TraceKind = JS::TraceKind::RegExpShared; }; class RegExpCompartment { struct Key { JSAtom* atom; uint16_t flag; Key() {} Key(JSAtom* atom, RegExpFlag flag) : atom(atom), flag(flag) { } MOZ_IMPLICIT Key(const ReadBarriered& shared) : atom(shared.unbarrieredGet()->getSource()), flag(shared.unbarrieredGet()->getFlags()) { } typedef Key Lookup; static HashNumber hash(const Lookup& l) { return DefaultHasher::hash(l.atom) ^ (l.flag << 1); } static bool match(Key l, Key r) { return l.atom == r.atom && l.flag == r.flag; } }; /* * The set of all RegExpShareds in the compartment. On every GC, every * RegExpShared that was not marked is deleted and removed from the set. */ using Set = GCHashSet, Key, RuntimeAllocPolicy>; JS::WeakCache set_; /* * This is the template object where the result of re.exec() is based on, * if there is a result. This is used in CreateRegExpMatchResult to set * the input/index properties faster. */ ReadBarriered matchResultTemplateObject_; /* * The shape of RegExp.prototype object that satisfies following: * * RegExp.prototype.flags getter is not modified * * RegExp.prototype.global getter is not modified * * RegExp.prototype.ignoreCase getter is not modified * * RegExp.prototype.multiline getter is not modified * * RegExp.prototype.sticky getter is not modified * * RegExp.prototype.unicode getter is not modified * * RegExp.prototype.exec is an own data property * * RegExp.prototype[@@match] is an own data property * * RegExp.prototype[@@search] is an own data property */ ReadBarriered optimizableRegExpPrototypeShape_; /* * The shape of RegExp instance that satisfies following: * * lastProperty is lastIndex * * prototype is RegExp.prototype */ ReadBarriered optimizableRegExpInstanceShape_; ArrayObject* createMatchResultTemplateObject(JSContext* cx); public: explicit RegExpCompartment(Zone* zone); ~RegExpCompartment(); bool init(JSContext* cx); void sweep(JSRuntime* rt); bool empty() { return set_.empty(); } bool get(JSContext* cx, HandleAtom source, RegExpFlag flags, MutableHandleRegExpShared shared); /* Like 'get', but compile 'maybeOpt' (if non-null). */ bool get(JSContext* cx, HandleAtom source, JSString* maybeOpt, MutableHandleRegExpShared shared); /* Get or create template object used to base the result of .exec() on. */ ArrayObject* getOrCreateMatchResultTemplateObject(JSContext* cx) { if (matchResultTemplateObject_) return matchResultTemplateObject_; return createMatchResultTemplateObject(cx); } Shape* getOptimizableRegExpPrototypeShape() { return optimizableRegExpPrototypeShape_; } void setOptimizableRegExpPrototypeShape(Shape* shape) { optimizableRegExpPrototypeShape_ = shape; } Shape* getOptimizableRegExpInstanceShape() { return optimizableRegExpInstanceShape_; } void setOptimizableRegExpInstanceShape(Shape* shape) { optimizableRegExpInstanceShape_ = shape; } static size_t offsetOfOptimizableRegExpPrototypeShape() { return offsetof(RegExpCompartment, optimizableRegExpPrototypeShape_); } static size_t offsetOfOptimizableRegExpInstanceShape() { return offsetof(RegExpCompartment, optimizableRegExpInstanceShape_); } size_t sizeOfExcludingThis(mozilla::MallocSizeOf mallocSizeOf); }; class RegExpObject : public NativeObject { static const unsigned LAST_INDEX_SLOT = 0; static const unsigned SOURCE_SLOT = 1; static const unsigned FLAGS_SLOT = 2; static_assert(RegExpObject::FLAGS_SLOT == REGEXP_FLAGS_SLOT, "FLAGS_SLOT values should be in sync with self-hosted JS"); public: static const unsigned RESERVED_SLOTS = 3; static const unsigned PRIVATE_SLOT = 3; static const Class class_; static const Class protoClass_; // The maximum number of pairs a MatchResult can have, without having to // allocate a bigger MatchResult. static const size_t MaxPairCount = 14; static RegExpObject* create(ExclusiveContext* cx, const char16_t* chars, size_t length, RegExpFlag flags, frontend::TokenStream* ts, LifoAlloc& alloc); static RegExpObject* create(ExclusiveContext* cx, HandleAtom atom, RegExpFlag flags, frontend::TokenStream* ts, LifoAlloc& alloc); /* * Compute the initial shape to associate with fresh RegExp objects, * encoding their initial properties. Return the shape after * changing |obj|'s last property to it. */ static Shape* assignInitialShape(ExclusiveContext* cx, Handle obj); /* Accessors. */ static unsigned lastIndexSlot() { return LAST_INDEX_SLOT; } static bool isInitialShape(RegExpObject* rx) { Shape* shape = rx->lastProperty(); if (!shape->hasSlot()) return false; if (shape->maybeSlot() != LAST_INDEX_SLOT) return false; return true; } const Value& getLastIndex() const { return getSlot(LAST_INDEX_SLOT); } void setLastIndex(double d) { setSlot(LAST_INDEX_SLOT, NumberValue(d)); } void zeroLastIndex(ExclusiveContext* cx) { MOZ_ASSERT(lookupPure(cx->names().lastIndex)->writable(), "can't infallibly zero a non-writable lastIndex on a " "RegExp that's been exposed to script"); setSlot(LAST_INDEX_SLOT, Int32Value(0)); } JSFlatString* toString(JSContext* cx) const; JSAtom* getSource() const { return &getSlot(SOURCE_SLOT).toString()->asAtom(); } void setSource(JSAtom* source) { setSlot(SOURCE_SLOT, StringValue(source)); } /* Flags. */ static unsigned flagsSlot() { return FLAGS_SLOT; } RegExpFlag getFlags() const { return RegExpFlag(getFixedSlot(FLAGS_SLOT).toInt32()); } void setFlags(RegExpFlag flags) { setSlot(FLAGS_SLOT, Int32Value(flags)); } bool ignoreCase() const { return getFlags() & IgnoreCaseFlag; } bool global() const { return getFlags() & GlobalFlag; } bool multiline() const { return getFlags() & MultilineFlag; } bool sticky() const { return getFlags() & StickyFlag; } bool unicode() const { return getFlags() & UnicodeFlag; } bool dotAll() const { return getFlags() & DotAllFlag; } static bool isOriginalFlagGetter(JSNative native, RegExpFlag* mask); [[nodiscard]] static bool getShared(JSContext* cx, Handle regexp, MutableHandleRegExpShared shared); bool hasShared() { return !!sharedRef(); } void setShared(RegExpShared& shared) { MOZ_ASSERT(!hasShared()); sharedRef().init(&shared); } static void trace(JSTracer* trc, JSObject* obj); void trace(JSTracer* trc); void initIgnoringLastIndex(HandleAtom source, RegExpFlag flags); // NOTE: This method is *only* safe to call on RegExps that haven't been // exposed to script, because it requires that the "lastIndex" // property be writable. void initAndZeroLastIndex(HandleAtom source, RegExpFlag flags, ExclusiveContext* cx); #ifdef DEBUG [[nodiscard]] static bool dumpBytecode(JSContext* cx, Handle regexp, bool match_only, HandleLinearString input); #endif private: /* * Precondition: the syntax for |source| has already been validated. * Side effect: sets the private field. */ [[nodiscard]] static bool createShared(JSContext* cx, Handle regexp, MutableHandleRegExpShared shared); PreBarriered& sharedRef() { auto& ref = NativeObject::privateRef(PRIVATE_SLOT); return reinterpret_cast&>(ref); } /* Call setShared in preference to setPrivate. */ void setPrivate(void* priv) = delete; }; /* * Parse regexp flags. Report an error and return false if an invalid * sequence of flags is encountered (repeat/invalid flag). * * N.B. flagStr must be rooted. */ bool ParseRegExpFlags(JSContext* cx, JSString* flagStr, RegExpFlag* flagsOut); /* Assuming GetBuiltinClass(obj) is ESClass::RegExp, return a RegExpShared for obj. */ inline bool RegExpToShared(JSContext* cx, HandleObject obj, MutableHandleRegExpShared shared) { if (obj->is()) return RegExpObject::getShared(cx, obj.as(), shared); return Proxy::regexp_toShared(cx, obj, shared); } template bool XDRScriptRegExpObject(XDRState* xdr, MutableHandle objp); extern JSObject* CloneScriptRegExpObject(JSContext* cx, RegExpObject& re); /* Escape all slashes and newlines in the given string. */ extern JSAtom* EscapeRegExpPattern(JSContext* cx, HandleAtom src); template extern bool HasRegExpMetaChars(const CharT* chars, size_t length); extern bool StringHasRegExpMetaChars(JSLinearString* str); } /* namespace js */ namespace JS { namespace ubi { template <> class Concrete : TracerConcrete { protected: explicit Concrete(js::RegExpShared* ptr) : TracerConcrete(ptr) { } public: static void construct(void* storage, js::RegExpShared* ptr) { new (storage) Concrete(ptr); } CoarseType coarseType() const final { return CoarseType::Other; } Size size(mozilla::MallocSizeOf mallocSizeOf) const override; const char16_t* typeName() const override { return concreteTypeName; } static const char16_t concreteTypeName[]; }; } // namespace ubi } // namespace JS #endif /* vm_RegExpObject_h */