diff options
Diffstat (limited to 'js/src/new-regexp/regexp-bytecode-peephole.cc')
-rw-r--r-- | js/src/new-regexp/regexp-bytecode-peephole.cc | 1028 |
1 files changed, 0 insertions, 1028 deletions
diff --git a/js/src/new-regexp/regexp-bytecode-peephole.cc b/js/src/new-regexp/regexp-bytecode-peephole.cc deleted file mode 100644 index f105a5094..000000000 --- a/js/src/new-regexp/regexp-bytecode-peephole.cc +++ /dev/null @@ -1,1028 +0,0 @@ -// Copyright 2019 the V8 project authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "new-regexp/regexp-bytecode-peephole.h" - -#include "new-regexp/regexp-bytecodes.h" - -namespace v8 { -namespace internal { - -namespace { - -struct BytecodeArgument { - int offset; - int length; - - BytecodeArgument(int offset, int length) : offset(offset), length(length) {} -}; - -struct BytecodeArgumentMapping : BytecodeArgument { - int new_length; - - BytecodeArgumentMapping(int offset, int length, int new_length) - : BytecodeArgument(offset, length), new_length(new_length) {} -}; - -struct BytecodeArgumentCheck : BytecodeArgument { - enum CheckType { kCheckAddress = 0, kCheckValue }; - CheckType type; - int check_offset; - int check_length; - - BytecodeArgumentCheck(int offset, int length, int check_offset) - : BytecodeArgument(offset, length), - type(kCheckAddress), - check_offset(check_offset) {} - BytecodeArgumentCheck(int offset, int length, int check_offset, - int check_length) - : BytecodeArgument(offset, length), - type(kCheckValue), - check_offset(check_offset), - check_length(check_length) {} -}; - -// Trie-Node for storing bytecode sequences we want to optimize. -class BytecodeSequenceNode { - public: - // Dummy bytecode used when we need to store/return a bytecode but it's not a - // valid bytecode in the current context. - static constexpr int kDummyBytecode = -1; - - BytecodeSequenceNode(int bytecode, Zone* zone); - // Adds a new node as child of the current node if it isn't a child already. - BytecodeSequenceNode& FollowedBy(int bytecode); - // Marks the end of a sequence and sets optimized bytecode to replace all - // bytecodes of the sequence with. - BytecodeSequenceNode& ReplaceWith(int bytecode); - // Maps arguments of bytecodes in the sequence to the optimized bytecode. - // Order of invocation determines order of arguments in the optimized - // bytecode. - // Invoking this method is only allowed on nodes that mark the end of a valid - // sequence (i.e. after ReplaceWith()). - // bytecode_index_in_sequence: Zero-based index of the referred bytecode - // within the sequence (e.g. the bytecode passed to CreateSequence() has - // index 0). - // argument_offset: Zero-based offset to the argument within the bytecode - // (e.g. the first argument that's not packed with the bytecode has offset 4). - // argument_byte_length: Length of the argument. - // new_argument_byte_length: Length of the argument in the new bytecode - // (= argument_byte_length if omitted). - BytecodeSequenceNode& MapArgument(int bytecode_index_in_sequence, - int argument_offset, - int argument_byte_length, - int new_argument_byte_length = 0); - // Adds a check to the sequence node making it only a valid sequence when the - // argument of the current bytecode at the specified offset matches the offset - // to check against. - // argument_offset: Zero-based offset to the argument within the bytecode - // (e.g. the first argument that's not packed with the bytecode has offset 4). - // argument_byte_length: Length of the argument. - // check_byte_offset: Zero-based offset relative to the beginning of the - // sequence that needs to match the value given by argument_offset. (e.g. - // check_byte_offset 0 matches the address of the first bytecode in the - // sequence). - BytecodeSequenceNode& IfArgumentEqualsOffset(int argument_offset, - int argument_byte_length, - int check_byte_offset); - // Adds a check to the sequence node making it only a valid sequence when the - // argument of the current bytecode at the specified offset matches the - // argument of another bytecode in the sequence. - // This is similar to IfArgumentEqualsOffset, except that this method matches - // the values of both arguments. - BytecodeSequenceNode& IfArgumentEqualsValueAtOffset( - int argument_offset, int argument_byte_length, - int other_bytecode_index_in_sequence, int other_argument_offset, - int other_argument_byte_length); - // Marks an argument as unused. - // All arguments that are not mapped explicitly have to be marked as unused. - // bytecode_index_in_sequence: Zero-based index of the referred bytecode - // within the sequence (e.g. the bytecode passed to CreateSequence() has - // index 0). - // argument_offset: Zero-based offset to the argument within the bytecode - // (e.g. the first argument that's not packed with the bytecode has offset 4). - // argument_byte_length: Length of the argument. - BytecodeSequenceNode& IgnoreArgument(int bytecode_index_in_sequence, - int argument_offset, - int argument_byte_length); - // Checks if the current node is valid for the sequence. I.e. all conditions - // set by IfArgumentEqualsOffset and IfArgumentEquals are fulfilled by this - // node for the actual bytecode sequence. - bool CheckArguments(const byte* bytecode, int pc); - // Returns whether this node marks the end of a valid sequence (i.e. can be - // replaced with an optimized bytecode). - bool IsSequence() const; - // Returns the length of the sequence in bytes. - int SequenceLength() const; - // Returns the optimized bytecode for the node or kDummyBytecode if it is not - // the end of a valid sequence. - int OptimizedBytecode() const; - // Returns the child of the current node matching the given bytecode or - // nullptr if no such child is found. - BytecodeSequenceNode* Find(int bytecode) const; - // Returns number of arguments mapped to the current node. - // Invoking this method is only allowed on nodes that mark the end of a valid - // sequence (i.e. if IsSequence()) - size_t ArgumentSize() const; - // Returns the argument-mapping of the argument at index. - // Invoking this method is only allowed on nodes that mark the end of a valid - // sequence (i.e. if IsSequence()) - BytecodeArgumentMapping ArgumentMapping(size_t index) const; - // Returns an iterator to begin of ignored arguments. - // Invoking this method is only allowed on nodes that mark the end of a valid - // sequence (i.e. if IsSequence()) - ZoneLinkedList<BytecodeArgument>::iterator ArgumentIgnoredBegin() const; - // Returns an iterator to end of ignored arguments. - // Invoking this method is only allowed on nodes that mark the end of a valid - // sequence (i.e. if IsSequence()) - ZoneLinkedList<BytecodeArgument>::iterator ArgumentIgnoredEnd() const; - // Returns whether the current node has ignored argument or not. - bool HasIgnoredArguments() const; - - private: - // Returns a node in the sequence specified by its index within the sequence. - BytecodeSequenceNode& GetNodeByIndexInSequence(int index_in_sequence); - Zone* zone() const; - - int bytecode_; - int bytecode_replacement_; - int index_in_sequence_; - int start_offset_; - BytecodeSequenceNode* parent_; - ZoneUnorderedMap<int, BytecodeSequenceNode*> children_; - ZoneVector<BytecodeArgumentMapping>* argument_mapping_; - ZoneLinkedList<BytecodeArgumentCheck>* argument_check_; - ZoneLinkedList<BytecodeArgument>* argument_ignored_; - - Zone* zone_; -}; - -class RegExpBytecodePeephole { - public: - RegExpBytecodePeephole(Zone* zone, size_t buffer_size, - const ZoneUnorderedMap<int, int>& jump_edges); - - // Parses bytecode and fills the internal buffer with the potentially - // optimized bytecode. Returns true when optimizations were performed, false - // otherwise. - bool OptimizeBytecode(const byte* bytecode, int length); - // Copies the internal bytecode buffer to another buffer. The caller is - // responsible for allocating/freeing the memory. - void CopyOptimizedBytecode(byte* to_address) const; - int Length() const; - - private: - // Sets up all sequences that are going to be used. - void DefineStandardSequences(); - // Starts a new bytecode sequence. - BytecodeSequenceNode& CreateSequence(int bytecode); - // Checks for optimization candidates at pc and emits optimized bytecode to - // the internal buffer. Returns the length of replaced bytecodes in bytes. - int TryOptimizeSequence(const byte* bytecode, int start_pc); - // Emits optimized bytecode to the internal buffer. start_pc points to the - // start of the sequence in bytecode and last_node is the last - // BytecodeSequenceNode of the matching sequence found. - void EmitOptimization(int start_pc, const byte* bytecode, - const BytecodeSequenceNode& last_node); - // Adds a relative jump source fixup at pos. - // Jump source fixups are used to find offsets in the new bytecode that - // contain jump sources. - void AddJumpSourceFixup(int fixup, int pos); - // Adds a relative jump destination fixup at pos. - // Jump destination fixups are used to find offsets in the new bytecode that - // can be jumped to. - void AddJumpDestinationFixup(int fixup, int pos); - // Sets an absolute jump destination fixup at pos. - void SetJumpDestinationFixup(int fixup, int pos); - // Prepare internal structures used to fixup jumps. - void PrepareJumpStructures(const ZoneUnorderedMap<int, int>& jump_edges); - // Updates all jump targets in the new bytecode. - void FixJumps(); - // Update a single jump. - void FixJump(int jump_source, int jump_destination); - void AddSentinelFixups(int pos); - template <typename T> - void EmitValue(T value); - template <typename T> - void OverwriteValue(int offset, T value); - void CopyRangeToOutput(const byte* orig_bytecode, int start, int length); - void SetRange(byte value, int count); - void EmitArgument(int start_pc, const byte* bytecode, - BytecodeArgumentMapping arg); - int pc() const; - Zone* zone() const; - - ZoneVector<byte> optimized_bytecode_buffer_; - BytecodeSequenceNode* sequences_; - // Jumps used in old bytecode. - // Key: Jump source (offset where destination is stored in old bytecode) - // Value: Destination - ZoneMap<int, int> jump_edges_; - // Jumps used in new bytecode. - // Key: Jump source (offset where destination is stored in new bytecode) - // Value: Destination - ZoneMap<int, int> jump_edges_mapped_; - // Number of times a jump destination is used within the bytecode. - // Key: Jump destination (offset in old bytecode). - // Value: Number of times jump destination is used. - ZoneMap<int, int> jump_usage_counts_; - // Maps offsets in old bytecode to fixups of sources (delta to new bytecode). - // Key: Offset in old bytecode from where the fixup is valid. - // Value: Delta to map jump source from old bytecode to new bytecode in bytes. - ZoneMap<int, int> jump_source_fixups_; - // Maps offsets in old bytecode to fixups of destinations (delta to new - // bytecode). - // Key: Offset in old bytecode from where the fixup is valid. - // Value: Delta to map jump destinations from old bytecode to new bytecode in - // bytes. - ZoneMap<int, int> jump_destination_fixups_; - - Zone* zone_; - - DISALLOW_IMPLICIT_CONSTRUCTORS(RegExpBytecodePeephole); -}; - -template <typename T> -T GetValue(const byte* buffer, int pos) { - DCHECK(IsAligned(reinterpret_cast<Address>(buffer + pos), alignof(T))); - return *reinterpret_cast<const T*>(buffer + pos); -} - -int32_t GetArgumentValue(const byte* bytecode, int offset, int length) { - switch (length) { - case 1: - return GetValue<byte>(bytecode, offset); - break; - case 2: - return GetValue<int16_t>(bytecode, offset); - break; - case 4: - return GetValue<int32_t>(bytecode, offset); - break; - default: - UNREACHABLE(); - } -} - -BytecodeSequenceNode::BytecodeSequenceNode(int bytecode, Zone* zone) - : bytecode_(bytecode), - bytecode_replacement_(kDummyBytecode), - index_in_sequence_(0), - start_offset_(0), - parent_(nullptr), - children_(ZoneUnorderedMap<int, BytecodeSequenceNode*>(zone)), - argument_mapping_(new (zone->New(sizeof(*argument_mapping_))) - ZoneVector<BytecodeArgumentMapping>(zone)), - argument_check_(new (zone->New(sizeof(*argument_check_))) - ZoneLinkedList<BytecodeArgumentCheck>(zone)), - argument_ignored_(new (zone->New(sizeof(*argument_ignored_))) - ZoneLinkedList<BytecodeArgument>(zone)), - zone_(zone) {} - -BytecodeSequenceNode& BytecodeSequenceNode::FollowedBy(int bytecode) { - DCHECK(0 <= bytecode && bytecode < kRegExpBytecodeCount); - - if (children_.find(bytecode) == children_.end()) { - BytecodeSequenceNode* new_node = - new (zone()->New(sizeof(BytecodeSequenceNode))) - BytecodeSequenceNode(bytecode, zone()); - // If node is not the first in the sequence, set offsets and parent. - if (bytecode_ != kDummyBytecode) { - new_node->start_offset_ = start_offset_ + RegExpBytecodeLength(bytecode_); - new_node->index_in_sequence_ = index_in_sequence_ + 1; - new_node->parent_ = this; - } - children_[bytecode] = new_node; - } - - return *children_[bytecode]; -} - -BytecodeSequenceNode& BytecodeSequenceNode::ReplaceWith(int bytecode) { - DCHECK(0 <= bytecode && bytecode < kRegExpBytecodeCount); - - bytecode_replacement_ = bytecode; - - return *this; -} - -BytecodeSequenceNode& BytecodeSequenceNode::MapArgument( - int bytecode_index_in_sequence, int argument_offset, - int argument_byte_length, int new_argument_byte_length) { - DCHECK(IsSequence()); - DCHECK_LE(bytecode_index_in_sequence, index_in_sequence_); - - BytecodeSequenceNode& ref_node = - GetNodeByIndexInSequence(bytecode_index_in_sequence); - DCHECK_LT(argument_offset, RegExpBytecodeLength(ref_node.bytecode_)); - - int absolute_offset = ref_node.start_offset_ + argument_offset; - if (new_argument_byte_length == 0) { - new_argument_byte_length = argument_byte_length; - } - - argument_mapping_->push_back(BytecodeArgumentMapping{ - absolute_offset, argument_byte_length, new_argument_byte_length}); - - return *this; -} - -BytecodeSequenceNode& BytecodeSequenceNode::IfArgumentEqualsOffset( - int argument_offset, int argument_byte_length, int check_byte_offset) { - DCHECK_LT(argument_offset, RegExpBytecodeLength(bytecode_)); - DCHECK(argument_byte_length == 1 || argument_byte_length == 2 || - argument_byte_length == 4); - - int absolute_offset = start_offset_ + argument_offset; - - argument_check_->push_back(BytecodeArgumentCheck{ - absolute_offset, argument_byte_length, check_byte_offset}); - - return *this; -} - -BytecodeSequenceNode& BytecodeSequenceNode::IfArgumentEqualsValueAtOffset( - int argument_offset, int argument_byte_length, - int other_bytecode_index_in_sequence, int other_argument_offset, - int other_argument_byte_length) { - DCHECK_LT(argument_offset, RegExpBytecodeLength(bytecode_)); - DCHECK_LE(other_bytecode_index_in_sequence, index_in_sequence_); - DCHECK_EQ(argument_byte_length, other_argument_byte_length); - - BytecodeSequenceNode& ref_node = - GetNodeByIndexInSequence(other_bytecode_index_in_sequence); - DCHECK_LT(other_argument_offset, RegExpBytecodeLength(ref_node.bytecode_)); - - int absolute_offset = start_offset_ + argument_offset; - int other_absolute_offset = ref_node.start_offset_ + other_argument_offset; - - argument_check_->push_back( - BytecodeArgumentCheck{absolute_offset, argument_byte_length, - other_absolute_offset, other_argument_byte_length}); - - return *this; -} - -BytecodeSequenceNode& BytecodeSequenceNode::IgnoreArgument( - int bytecode_index_in_sequence, int argument_offset, - int argument_byte_length) { - DCHECK(IsSequence()); - DCHECK_LE(bytecode_index_in_sequence, index_in_sequence_); - - BytecodeSequenceNode& ref_node = - GetNodeByIndexInSequence(bytecode_index_in_sequence); - DCHECK_LT(argument_offset, RegExpBytecodeLength(ref_node.bytecode_)); - - int absolute_offset = ref_node.start_offset_ + argument_offset; - - argument_ignored_->push_back( - BytecodeArgument{absolute_offset, argument_byte_length}); - - return *this; -} - -bool BytecodeSequenceNode::CheckArguments(const byte* bytecode, int pc) { - bool is_valid = true; - for (auto check_iter = argument_check_->begin(); - check_iter != argument_check_->end() && is_valid; check_iter++) { - auto value = - GetArgumentValue(bytecode, pc + check_iter->offset, check_iter->length); - if (check_iter->type == BytecodeArgumentCheck::kCheckAddress) { - is_valid &= value == pc + check_iter->check_offset; - } else if (check_iter->type == BytecodeArgumentCheck::kCheckValue) { - auto other_value = GetArgumentValue( - bytecode, pc + check_iter->check_offset, check_iter->check_length); - is_valid &= value == other_value; - } else { - UNREACHABLE(); - } - } - return is_valid; -} - -bool BytecodeSequenceNode::IsSequence() const { - return bytecode_replacement_ != kDummyBytecode; -} - -int BytecodeSequenceNode::SequenceLength() const { - return start_offset_ + RegExpBytecodeLength(bytecode_); -} - -int BytecodeSequenceNode::OptimizedBytecode() const { - return bytecode_replacement_; -} - -BytecodeSequenceNode* BytecodeSequenceNode::Find(int bytecode) const { - auto found = children_.find(bytecode); - if (found == children_.end()) return nullptr; - return found->second; -} - -size_t BytecodeSequenceNode::ArgumentSize() const { - DCHECK(IsSequence()); - return argument_mapping_->size(); -} - -BytecodeArgumentMapping BytecodeSequenceNode::ArgumentMapping( - size_t index) const { - DCHECK(IsSequence()); - DCHECK(argument_mapping_ != nullptr); - DCHECK_LT(index, argument_mapping_->size()); - - return argument_mapping_->at(index); -} - -ZoneLinkedList<BytecodeArgument>::iterator -BytecodeSequenceNode::ArgumentIgnoredBegin() const { - DCHECK(IsSequence()); - DCHECK(argument_ignored_ != nullptr); - return argument_ignored_->begin(); -} - -ZoneLinkedList<BytecodeArgument>::iterator -BytecodeSequenceNode::ArgumentIgnoredEnd() const { - DCHECK(IsSequence()); - DCHECK(argument_ignored_ != nullptr); - return argument_ignored_->end(); -} - -bool BytecodeSequenceNode::HasIgnoredArguments() const { - return argument_ignored_ != nullptr; -} - -BytecodeSequenceNode& BytecodeSequenceNode::GetNodeByIndexInSequence( - int index_in_sequence) { - DCHECK_LE(index_in_sequence, index_in_sequence_); - - if (index_in_sequence < index_in_sequence_) { - DCHECK(parent_ != nullptr); - return parent_->GetNodeByIndexInSequence(index_in_sequence); - } else { - return *this; - } -} - -Zone* BytecodeSequenceNode::zone() const { return zone_; } - -RegExpBytecodePeephole::RegExpBytecodePeephole( - Zone* zone, size_t buffer_size, - const ZoneUnorderedMap<int, int>& jump_edges) - : optimized_bytecode_buffer_(zone), - sequences_(new (zone->New(sizeof(*sequences_))) BytecodeSequenceNode( - BytecodeSequenceNode::kDummyBytecode, zone)), - jump_edges_(zone), - jump_edges_mapped_(zone), - jump_usage_counts_(zone), - jump_source_fixups_(zone), - jump_destination_fixups_(zone), - zone_(zone) { - optimized_bytecode_buffer_.reserve(buffer_size); - PrepareJumpStructures(jump_edges); - DefineStandardSequences(); - // Sentinel fixups at beginning of bytecode (position -1) so we don't have to - // check for end of iterator inside the fixup loop. - // In general fixups are deltas of original offsets of jump - // sources/destinations (in the old bytecode) to find them in the new - // bytecode. All jump targets are fixed after the new bytecode is fully - // emitted in the internal buffer. - AddSentinelFixups(-1); - // Sentinel fixups at end of (old) bytecode so we don't have to check for - // end of iterator inside the fixup loop. - DCHECK_LE(buffer_size, std::numeric_limits<int>::max()); - AddSentinelFixups(static_cast<int>(buffer_size)); -} - -void RegExpBytecodePeephole::DefineStandardSequences() { - // Commonly used sequences can be found by creating regexp bytecode traces - // (--trace-regexp-bytecodes) and using v8/tools/regexp-sequences.py. - CreateSequence(BC_LOAD_CURRENT_CHAR) - .FollowedBy(BC_CHECK_BIT_IN_TABLE) - .FollowedBy(BC_ADVANCE_CP_AND_GOTO) - // Sequence is only valid if the jump target of ADVANCE_CP_AND_GOTO is the - // first bytecode in this sequence. - .IfArgumentEqualsOffset(4, 4, 0) - .ReplaceWith(BC_SKIP_UNTIL_BIT_IN_TABLE) - .MapArgument(0, 1, 3) // load offset - .MapArgument(2, 1, 3, 4) // advance by - .MapArgument(1, 8, 16) // bit table - .MapArgument(1, 4, 4) // goto when match - .MapArgument(0, 4, 4) // goto on failure - .IgnoreArgument(2, 4, 4); // loop jump - - CreateSequence(BC_CHECK_CURRENT_POSITION) - .FollowedBy(BC_LOAD_CURRENT_CHAR_UNCHECKED) - .FollowedBy(BC_CHECK_CHAR) - .FollowedBy(BC_ADVANCE_CP_AND_GOTO) - // Sequence is only valid if the jump target of ADVANCE_CP_AND_GOTO is the - // first bytecode in this sequence. - .IfArgumentEqualsOffset(4, 4, 0) - .ReplaceWith(BC_SKIP_UNTIL_CHAR_POS_CHECKED) - .MapArgument(1, 1, 3) // load offset - .MapArgument(3, 1, 3, 2) // advance_by - .MapArgument(2, 1, 3, 2) // c - .MapArgument(0, 1, 3, 4) // eats at least - .MapArgument(2, 4, 4) // goto when match - .MapArgument(0, 4, 4) // goto on failure - .IgnoreArgument(3, 4, 4); // loop jump - - CreateSequence(BC_CHECK_CURRENT_POSITION) - .FollowedBy(BC_LOAD_CURRENT_CHAR_UNCHECKED) - .FollowedBy(BC_AND_CHECK_CHAR) - .FollowedBy(BC_ADVANCE_CP_AND_GOTO) - // Sequence is only valid if the jump target of ADVANCE_CP_AND_GOTO is the - // first bytecode in this sequence. - .IfArgumentEqualsOffset(4, 4, 0) - .ReplaceWith(BC_SKIP_UNTIL_CHAR_AND) - .MapArgument(1, 1, 3) // load offset - .MapArgument(3, 1, 3, 2) // advance_by - .MapArgument(2, 1, 3, 2) // c - .MapArgument(2, 4, 4) // mask - .MapArgument(0, 1, 3, 4) // eats at least - .MapArgument(2, 8, 4) // goto when match - .MapArgument(0, 4, 4) // goto on failure - .IgnoreArgument(3, 4, 4); // loop jump - - // TODO(pthier): It might make sense for short sequences like this one to only - // optimize them if the resulting optimization is not longer than the current - // one. This could be the case if there are jumps inside the sequence and we - // have to replicate parts of the sequence. A method to mark such sequences - // might be useful. - CreateSequence(BC_LOAD_CURRENT_CHAR) - .FollowedBy(BC_CHECK_CHAR) - .FollowedBy(BC_ADVANCE_CP_AND_GOTO) - // Sequence is only valid if the jump target of ADVANCE_CP_AND_GOTO is the - // first bytecode in this sequence. - .IfArgumentEqualsOffset(4, 4, 0) - .ReplaceWith(BC_SKIP_UNTIL_CHAR) - .MapArgument(0, 1, 3) // load offset - .MapArgument(2, 1, 3, 2) // advance by - .MapArgument(1, 1, 3, 2) // character - .MapArgument(1, 4, 4) // goto when match - .MapArgument(0, 4, 4) // goto on failure - .IgnoreArgument(2, 4, 4); // loop jump - - CreateSequence(BC_LOAD_CURRENT_CHAR) - .FollowedBy(BC_CHECK_CHAR) - .FollowedBy(BC_CHECK_CHAR) - // Sequence is only valid if the jump targets of both CHECK_CHAR bytecodes - // are equal. - .IfArgumentEqualsValueAtOffset(4, 4, 1, 4, 4) - .FollowedBy(BC_ADVANCE_CP_AND_GOTO) - // Sequence is only valid if the jump target of ADVANCE_CP_AND_GOTO is the - // first bytecode in this sequence. - .IfArgumentEqualsOffset(4, 4, 0) - .ReplaceWith(BC_SKIP_UNTIL_CHAR_OR_CHAR) - .MapArgument(0, 1, 3) // load offset - .MapArgument(3, 1, 3, 4) // advance by - .MapArgument(1, 1, 3, 2) // character 1 - .MapArgument(2, 1, 3, 2) // character 2 - .MapArgument(1, 4, 4) // goto when match - .MapArgument(0, 4, 4) // goto on failure - .IgnoreArgument(2, 4, 4) // goto when match 2 - .IgnoreArgument(3, 4, 4); // loop jump - - CreateSequence(BC_LOAD_CURRENT_CHAR) - .FollowedBy(BC_CHECK_GT) - // Sequence is only valid if the jump target of CHECK_GT is the first - // bytecode AFTER the whole sequence. - .IfArgumentEqualsOffset(4, 4, 56) - .FollowedBy(BC_CHECK_BIT_IN_TABLE) - // Sequence is only valid if the jump target of CHECK_BIT_IN_TABLE is - // the ADVANCE_CP_AND_GOTO bytecode at the end of the sequence. - .IfArgumentEqualsOffset(4, 4, 48) - .FollowedBy(BC_GOTO) - // Sequence is only valid if the jump target of GOTO is the same as the - // jump target of CHECK_GT (i.e. both jump to the first bytecode AFTER the - // whole sequence. - .IfArgumentEqualsValueAtOffset(4, 4, 1, 4, 4) - .FollowedBy(BC_ADVANCE_CP_AND_GOTO) - // Sequence is only valid if the jump target of ADVANCE_CP_AND_GOTO is the - // first bytecode in this sequence. - .IfArgumentEqualsOffset(4, 4, 0) - .ReplaceWith(BC_SKIP_UNTIL_GT_OR_NOT_BIT_IN_TABLE) - .MapArgument(0, 1, 3) // load offset - .MapArgument(4, 1, 3, 2) // advance by - .MapArgument(1, 1, 3, 2) // character - .MapArgument(2, 8, 16) // bit table - .MapArgument(1, 4, 4) // goto when match - .MapArgument(0, 4, 4) // goto on failure - .IgnoreArgument(2, 4, 4) // indirect loop jump - .IgnoreArgument(3, 4, 4) // jump out of loop - .IgnoreArgument(4, 4, 4); // loop jump -} - -bool RegExpBytecodePeephole::OptimizeBytecode(const byte* bytecode, - int length) { - int old_pc = 0; - bool did_optimize = false; - - while (old_pc < length) { - int replaced_len = TryOptimizeSequence(bytecode, old_pc); - if (replaced_len > 0) { - old_pc += replaced_len; - did_optimize = true; - } else { - int bc = bytecode[old_pc]; - int bc_len = RegExpBytecodeLength(bc); - CopyRangeToOutput(bytecode, old_pc, bc_len); - old_pc += bc_len; - } - } - - if (did_optimize) { - FixJumps(); - } - - return did_optimize; -} - -void RegExpBytecodePeephole::CopyOptimizedBytecode(byte* to_address) const { - MemCopy(to_address, &(*optimized_bytecode_buffer_.begin()), Length()); -} - -int RegExpBytecodePeephole::Length() const { return pc(); } - -BytecodeSequenceNode& RegExpBytecodePeephole::CreateSequence(int bytecode) { - DCHECK(sequences_ != nullptr); - DCHECK(0 <= bytecode && bytecode < kRegExpBytecodeCount); - - return sequences_->FollowedBy(bytecode); -} - -int RegExpBytecodePeephole::TryOptimizeSequence(const byte* bytecode, - int start_pc) { - BytecodeSequenceNode* seq_node = sequences_; - BytecodeSequenceNode* valid_seq_end = nullptr; - - int current_pc = start_pc; - - // Check for the longest valid sequence matching any of the pre-defined - // sequences in the Trie data structure. - while ((seq_node = seq_node->Find(bytecode[current_pc]))) { - if (!seq_node->CheckArguments(bytecode, start_pc)) { - break; - } - if (seq_node->IsSequence()) { - valid_seq_end = seq_node; - } - current_pc += RegExpBytecodeLength(bytecode[current_pc]); - } - - if (valid_seq_end) { - EmitOptimization(start_pc, bytecode, *valid_seq_end); - return valid_seq_end->SequenceLength(); - } - - return 0; -} - -void RegExpBytecodePeephole::EmitOptimization( - int start_pc, const byte* bytecode, const BytecodeSequenceNode& last_node) { -#ifdef DEBUG - int optimized_start_pc = pc(); -#endif - // Jump sources that are mapped or marked as unused will be deleted at the end - // of this method. We don't delete them immediately as we might need the - // information when we have to preserve bytecodes at the end. - // TODO(pthier): Replace with a stack-allocated data structure. - ZoneLinkedList<int> delete_jumps = ZoneLinkedList<int>(zone()); - - uint32_t bc = last_node.OptimizedBytecode(); - EmitValue(bc); - - for (size_t arg = 0; arg < last_node.ArgumentSize(); arg++) { - BytecodeArgumentMapping arg_map = last_node.ArgumentMapping(arg); - int arg_pos = start_pc + arg_map.offset; - // If we map any jump source we mark the old source for deletion and insert - // a new jump. - auto jump_edge_iter = jump_edges_.find(arg_pos); - if (jump_edge_iter != jump_edges_.end()) { - int jump_source = jump_edge_iter->first; - int jump_destination = jump_edge_iter->second; - // Add new jump edge add current position. - jump_edges_mapped_.emplace(Length(), jump_destination); - // Mark old jump edge for deletion. - delete_jumps.push_back(jump_source); - // Decrement usage count of jump destination. - auto jump_count_iter = jump_usage_counts_.find(jump_destination); - DCHECK(jump_count_iter != jump_usage_counts_.end()); - int& usage_count = jump_count_iter->second; - --usage_count; - } - // TODO(pthier): DCHECK that mapped arguments are never sources of jumps - // to destinations inside the sequence. - EmitArgument(start_pc, bytecode, arg_map); - } - DCHECK_EQ(pc(), optimized_start_pc + - RegExpBytecodeLength(last_node.OptimizedBytecode())); - - // Remove jumps from arguments we ignore. - if (last_node.HasIgnoredArguments()) { - for (auto ignored_arg = last_node.ArgumentIgnoredBegin(); - ignored_arg != last_node.ArgumentIgnoredEnd(); ignored_arg++) { - auto jump_edge_iter = jump_edges_.find(start_pc + ignored_arg->offset); - if (jump_edge_iter != jump_edges_.end()) { - int jump_source = jump_edge_iter->first; - int jump_destination = jump_edge_iter->second; - // Mark old jump edge for deletion. - delete_jumps.push_back(jump_source); - // Decrement usage count of jump destination. - auto jump_count_iter = jump_usage_counts_.find(jump_destination); - DCHECK(jump_count_iter != jump_usage_counts_.end()); - int& usage_count = jump_count_iter->second; - --usage_count; - } - } - } - - int fixup_length = RegExpBytecodeLength(bc) - last_node.SequenceLength(); - - // Check if there are any jumps inside the old sequence. - // If so we have to keep the bytecodes that are jumped to around. - auto jump_destination_candidate = jump_usage_counts_.upper_bound(start_pc); - int jump_candidate_destination = jump_destination_candidate->first; - int jump_candidate_count = jump_destination_candidate->second; - // Jump destinations only jumped to from inside the sequence will be ignored. - while (jump_destination_candidate != jump_usage_counts_.end() && - jump_candidate_count == 0) { - ++jump_destination_candidate; - jump_candidate_destination = jump_destination_candidate->first; - jump_candidate_count = jump_destination_candidate->second; - } - - int preserve_from = start_pc + last_node.SequenceLength(); - if (jump_destination_candidate != jump_usage_counts_.end() && - jump_candidate_destination < start_pc + last_node.SequenceLength()) { - preserve_from = jump_candidate_destination; - // Check if any jump in the sequence we are preserving has a jump - // destination inside the optimized sequence before the current position we - // want to preserve. If so we have to preserve all bytecodes starting at - // this jump destination. - for (auto jump_iter = jump_edges_.lower_bound(preserve_from); - jump_iter != jump_edges_.end() && - jump_iter->first /* jump source */ < - start_pc + last_node.SequenceLength(); - ++jump_iter) { - int jump_destination = jump_iter->second; - if (jump_destination > start_pc && jump_destination < preserve_from) { - preserve_from = jump_destination; - } - } - - // We preserve everything to the end of the sequence. This is conservative - // since it would be enough to preserve all bytecudes up to an unconditional - // jump. - int preserve_length = start_pc + last_node.SequenceLength() - preserve_from; - fixup_length += preserve_length; - // Jumps after the start of the preserved sequence need fixup. - AddJumpSourceFixup(fixup_length, - start_pc + last_node.SequenceLength() - preserve_length); - // All jump targets after the start of the optimized sequence need to be - // fixed relative to the length of the optimized sequence including - // bytecodes we preserved. - AddJumpDestinationFixup(fixup_length, start_pc + 1); - // Jumps to the sequence we preserved need absolute fixup as they could - // occur before or after the sequence. - SetJumpDestinationFixup(pc() - preserve_from, preserve_from); - CopyRangeToOutput(bytecode, preserve_from, preserve_length); - } else { - AddJumpDestinationFixup(fixup_length, start_pc + 1); - // Jumps after the end of the old sequence need fixup. - AddJumpSourceFixup(fixup_length, start_pc + last_node.SequenceLength()); - } - - // Delete jumps we definitely don't need anymore - for (int del : delete_jumps) { - if (del < preserve_from) { - jump_edges_.erase(del); - } - } -} - -void RegExpBytecodePeephole::AddJumpSourceFixup(int fixup, int pos) { - auto previous_fixup = jump_source_fixups_.lower_bound(pos); - DCHECK(previous_fixup != jump_source_fixups_.end()); - DCHECK(previous_fixup != jump_source_fixups_.begin()); - - int previous_fixup_value = (--previous_fixup)->second; - jump_source_fixups_[pos] = previous_fixup_value + fixup; -} - -void RegExpBytecodePeephole::AddJumpDestinationFixup(int fixup, int pos) { - auto previous_fixup = jump_destination_fixups_.lower_bound(pos); - DCHECK(previous_fixup != jump_destination_fixups_.end()); - DCHECK(previous_fixup != jump_destination_fixups_.begin()); - - int previous_fixup_value = (--previous_fixup)->second; - jump_destination_fixups_[pos] = previous_fixup_value + fixup; -} - -void RegExpBytecodePeephole::SetJumpDestinationFixup(int fixup, int pos) { - auto previous_fixup = jump_destination_fixups_.lower_bound(pos); - DCHECK(previous_fixup != jump_destination_fixups_.end()); - DCHECK(previous_fixup != jump_destination_fixups_.begin()); - - int previous_fixup_value = (--previous_fixup)->second; - jump_destination_fixups_.emplace(pos, fixup); - jump_destination_fixups_.emplace(pos + 1, previous_fixup_value); -} - -void RegExpBytecodePeephole::PrepareJumpStructures( - const ZoneUnorderedMap<int, int>& jump_edges) { - for (auto jump_edge : jump_edges) { - int jump_source = jump_edge.first; - int jump_destination = jump_edge.second; - - jump_edges_.emplace(jump_source, jump_destination); - jump_usage_counts_[jump_destination]++; - } -} - -void RegExpBytecodePeephole::FixJumps() { - int position_fixup = 0; - // Next position where fixup changes. - auto next_source_fixup = jump_source_fixups_.lower_bound(0); - int next_source_fixup_offset = next_source_fixup->first; - int next_source_fixup_value = next_source_fixup->second; - - for (auto jump_edge : jump_edges_) { - int jump_source = jump_edge.first; - int jump_destination = jump_edge.second; - while (jump_source >= next_source_fixup_offset) { - position_fixup = next_source_fixup_value; - ++next_source_fixup; - next_source_fixup_offset = next_source_fixup->first; - next_source_fixup_value = next_source_fixup->second; - } - jump_source += position_fixup; - - FixJump(jump_source, jump_destination); - } - - // Mapped jump edges don't need source fixups, as the position already is an - // offset in the new bytecode. - for (auto jump_edge : jump_edges_mapped_) { - int jump_source = jump_edge.first; - int jump_destination = jump_edge.second; - - FixJump(jump_source, jump_destination); - } -} - -void RegExpBytecodePeephole::FixJump(int jump_source, int jump_destination) { - int fixed_jump_destination = - jump_destination + - (--jump_destination_fixups_.upper_bound(jump_destination))->second; - DCHECK_LT(fixed_jump_destination, Length()); -#ifdef DEBUG - // TODO(pthier): This check could be better if we track the bytecodes - // actually used and check if we jump to one of them. - byte jump_bc = optimized_bytecode_buffer_[fixed_jump_destination]; - DCHECK_GT(jump_bc, 0); - DCHECK_LT(jump_bc, kRegExpBytecodeCount); -#endif - - if (jump_destination != fixed_jump_destination) { - OverwriteValue<uint32_t>(jump_source, fixed_jump_destination); - } -} - -void RegExpBytecodePeephole::AddSentinelFixups(int pos) { - jump_source_fixups_.emplace(pos, 0); - jump_destination_fixups_.emplace(pos, 0); -} - -template <typename T> -void RegExpBytecodePeephole::EmitValue(T value) { - DCHECK(optimized_bytecode_buffer_.begin() + pc() == - optimized_bytecode_buffer_.end()); - byte* value_byte_iter = reinterpret_cast<byte*>(&value); - optimized_bytecode_buffer_.insert(optimized_bytecode_buffer_.end(), - value_byte_iter, - value_byte_iter + sizeof(T)); -} - -template <typename T> -void RegExpBytecodePeephole::OverwriteValue(int offset, T value) { - byte* value_byte_iter = reinterpret_cast<byte*>(&value); - byte* value_byte_iter_end = value_byte_iter + sizeof(T); - while (value_byte_iter < value_byte_iter_end) { - optimized_bytecode_buffer_[offset++] = *value_byte_iter++; - } -} - -void RegExpBytecodePeephole::CopyRangeToOutput(const byte* orig_bytecode, - int start, int length) { - DCHECK(optimized_bytecode_buffer_.begin() + pc() == - optimized_bytecode_buffer_.end()); - optimized_bytecode_buffer_.insert(optimized_bytecode_buffer_.end(), - orig_bytecode + start, - orig_bytecode + start + length); -} - -void RegExpBytecodePeephole::SetRange(byte value, int count) { - DCHECK(optimized_bytecode_buffer_.begin() + pc() == - optimized_bytecode_buffer_.end()); - optimized_bytecode_buffer_.insert(optimized_bytecode_buffer_.end(), count, - value); -} - -void RegExpBytecodePeephole::EmitArgument(int start_pc, const byte* bytecode, - BytecodeArgumentMapping arg) { - int arg_pos = start_pc + arg.offset; - switch (arg.length) { - case 1: - DCHECK_EQ(arg.new_length, arg.length); - EmitValue(GetValue<byte>(bytecode, arg_pos)); - break; - case 2: - DCHECK_EQ(arg.new_length, arg.length); - EmitValue(GetValue<uint16_t>(bytecode, arg_pos)); - break; - case 3: { - // Length 3 only occurs in 'packed' arguments where the lowermost byte is - // the current bytecode, and the remaining 3 bytes are the packed value. - // - // We load 4 bytes from position - 1 and shift out the bytecode. -#ifdef V8_TARGET_BIG_ENDIAN - UNIMPLEMENTED(); - int32_t val = 0; -#else - int32_t val = GetValue<int32_t>(bytecode, arg_pos - 1) >> kBitsPerByte; -#endif // V8_TARGET_BIG_ENDIAN - - switch (arg.new_length) { - case 2: - EmitValue<uint16_t>(val); - break; - case 3: { - // Pack with previously emitted value. - auto prev_val = - GetValue<int32_t>(&(*optimized_bytecode_buffer_.begin()), - Length() - sizeof(uint32_t)); -#ifdef V8_TARGET_BIG_ENDIAN - UNIMPLEMENTED(); - USE(prev_val); -#else - DCHECK_EQ(prev_val & 0xFFFFFF00, 0); - OverwriteValue<uint32_t>( - pc() - sizeof(uint32_t), - (static_cast<uint32_t>(val) << 8) | (prev_val & 0xFF)); -#endif // V8_TARGET_BIG_ENDIAN - break; - } - case 4: - EmitValue<uint32_t>(val); - break; - } - break; - } - case 4: - DCHECK_EQ(arg.new_length, arg.length); - EmitValue(GetValue<uint32_t>(bytecode, arg_pos)); - break; - case 8: - DCHECK_EQ(arg.new_length, arg.length); - EmitValue(GetValue<uint64_t>(bytecode, arg_pos)); - break; - default: - CopyRangeToOutput(bytecode, arg_pos, Min(arg.length, arg.new_length)); - if (arg.length < arg.new_length) { - SetRange(0x00, arg.new_length - arg.length); - } - break; - } -} - -int RegExpBytecodePeephole::pc() const { - DCHECK_LE(optimized_bytecode_buffer_.size(), std::numeric_limits<int>::max()); - return static_cast<int>(optimized_bytecode_buffer_.size()); -} - -Zone* RegExpBytecodePeephole::zone() const { return zone_; } - -} // namespace - -// static -Handle<ByteArray> RegExpBytecodePeepholeOptimization::OptimizeBytecode( - Isolate* isolate, Zone* zone, Handle<String> source, const byte* bytecode, - int length, const ZoneUnorderedMap<int, int>& jump_edges) { - RegExpBytecodePeephole peephole(zone, length, jump_edges); - bool did_optimize = peephole.OptimizeBytecode(bytecode, length); - Handle<ByteArray> array = isolate->factory()->NewByteArray(peephole.Length()); - peephole.CopyOptimizedBytecode(array->GetDataStartAddress()); - - if (did_optimize && FLAG_trace_regexp_peephole_optimization) { - PrintF("Original Bytecode:\n"); - RegExpBytecodeDisassemble(bytecode, length, source->ToCString().get()); - PrintF("Optimized Bytecode:\n"); - RegExpBytecodeDisassemble(array->GetDataStartAddress(), peephole.Length(), - source->ToCString().get()); - } - - return array; -} - -} // namespace internal -} // namespace v8 |