diff options
Diffstat (limited to 'modules/brotli/enc/hash.h')
-rw-r--r-- | modules/brotli/enc/hash.h | 498 |
1 files changed, 498 insertions, 0 deletions
diff --git a/modules/brotli/enc/hash.h b/modules/brotli/enc/hash.h new file mode 100644 index 0000000000..8c5a7bb5ad --- /dev/null +++ b/modules/brotli/enc/hash.h @@ -0,0 +1,498 @@ +/* Copyright 2010 Google Inc. All Rights Reserved. + + Distributed under MIT license. + See file LICENSE for detail or copy at https://opensource.org/licenses/MIT +*/ + +/* A (forgetful) hash table to the data seen by the compressor, to + help create backward references to previous data. */ + +#ifndef BROTLI_ENC_HASH_H_ +#define BROTLI_ENC_HASH_H_ + +#include <string.h> /* memcmp, memset */ + +#include "../common/constants.h" +#include "../common/dictionary.h" +#include "../common/platform.h" +#include <brotli/types.h> +#include "./encoder_dict.h" +#include "./fast_log.h" +#include "./find_match_length.h" +#include "./memory.h" +#include "./quality.h" +#include "./static_dict.h" + +#if defined(__cplusplus) || defined(c_plusplus) +extern "C" { +#endif + +/* Pointer to hasher data. + * + * Excluding initialization and destruction, hasher can be passed as + * HasherHandle by value. + * + * Typically hasher data consists of 3 sections: + * * HasherCommon structure + * * private structured hasher data, depending on hasher type + * * private dynamic hasher data, depending on hasher type and parameters + * + * Using "define" instead of "typedef", because on MSVC __restrict does not work + * on typedef pointer types. */ +#define HasherHandle uint8_t* + +typedef struct { + BrotliHasherParams params; + + /* False if hasher needs to be "prepared" before use. */ + BROTLI_BOOL is_prepared_; + + size_t dict_num_lookups; + size_t dict_num_matches; +} HasherCommon; + +static BROTLI_INLINE HasherCommon* GetHasherCommon(HasherHandle handle) { + return (HasherCommon*)handle; +} + +#define score_t size_t + +static const uint32_t kCutoffTransformsCount = 10; +/* 0, 12, 27, 23, 42, 63, 56, 48, 59, 64 */ +/* 0+0, 4+8, 8+19, 12+11, 16+26, 20+43, 24+32, 28+20, 32+27, 36+28 */ +static const uint64_t kCutoffTransforms = + BROTLI_MAKE_UINT64_T(0x071B520A, 0xDA2D3200); + +typedef struct HasherSearchResult { + size_t len; + size_t distance; + score_t score; + int len_code_delta; /* == len_code - len */ +} HasherSearchResult; + +/* kHashMul32 multiplier has these properties: + * The multiplier must be odd. Otherwise we may lose the highest bit. + * No long streaks of ones or zeros. + * There is no effort to ensure that it is a prime, the oddity is enough + for this use. + * The number has been tuned heuristically against compression benchmarks. */ +static const uint32_t kHashMul32 = 0x1E35A7BD; +static const uint64_t kHashMul64 = BROTLI_MAKE_UINT64_T(0x1E35A7BD, 0x1E35A7BD); +static const uint64_t kHashMul64Long = + BROTLI_MAKE_UINT64_T(0x1FE35A7Bu, 0xD3579BD3u); + +static BROTLI_INLINE uint32_t Hash14(const uint8_t* data) { + uint32_t h = BROTLI_UNALIGNED_LOAD32LE(data) * kHashMul32; + /* The higher bits contain more mixture from the multiplication, + so we take our results from there. */ + return h >> (32 - 14); +} + +static BROTLI_INLINE void PrepareDistanceCache( + int* BROTLI_RESTRICT distance_cache, const int num_distances) { + if (num_distances > 4) { + int last_distance = distance_cache[0]; + distance_cache[4] = last_distance - 1; + distance_cache[5] = last_distance + 1; + distance_cache[6] = last_distance - 2; + distance_cache[7] = last_distance + 2; + distance_cache[8] = last_distance - 3; + distance_cache[9] = last_distance + 3; + if (num_distances > 10) { + int next_last_distance = distance_cache[1]; + distance_cache[10] = next_last_distance - 1; + distance_cache[11] = next_last_distance + 1; + distance_cache[12] = next_last_distance - 2; + distance_cache[13] = next_last_distance + 2; + distance_cache[14] = next_last_distance - 3; + distance_cache[15] = next_last_distance + 3; + } + } +} + +#define BROTLI_LITERAL_BYTE_SCORE 135 +#define BROTLI_DISTANCE_BIT_PENALTY 30 +/* Score must be positive after applying maximal penalty. */ +#define BROTLI_SCORE_BASE (BROTLI_DISTANCE_BIT_PENALTY * 8 * sizeof(size_t)) + +/* Usually, we always choose the longest backward reference. This function + allows for the exception of that rule. + + If we choose a backward reference that is further away, it will + usually be coded with more bits. We approximate this by assuming + log2(distance). If the distance can be expressed in terms of the + last four distances, we use some heuristic constants to estimate + the bits cost. For the first up to four literals we use the bit + cost of the literals from the literal cost model, after that we + use the average bit cost of the cost model. + + This function is used to sometimes discard a longer backward reference + when it is not much longer and the bit cost for encoding it is more + than the saved literals. + + backward_reference_offset MUST be positive. */ +static BROTLI_INLINE score_t BackwardReferenceScore( + size_t copy_length, size_t backward_reference_offset) { + return BROTLI_SCORE_BASE + BROTLI_LITERAL_BYTE_SCORE * (score_t)copy_length - + BROTLI_DISTANCE_BIT_PENALTY * Log2FloorNonZero(backward_reference_offset); +} + +static BROTLI_INLINE score_t BackwardReferenceScoreUsingLastDistance( + size_t copy_length) { + return BROTLI_LITERAL_BYTE_SCORE * (score_t)copy_length + + BROTLI_SCORE_BASE + 15; +} + +static BROTLI_INLINE score_t BackwardReferencePenaltyUsingLastDistance( + size_t distance_short_code) { + return (score_t)39 + ((0x1CA10 >> (distance_short_code & 0xE)) & 0xE); +} + +static BROTLI_INLINE BROTLI_BOOL TestStaticDictionaryItem( + const BrotliEncoderDictionary* dictionary, size_t item, + const uint8_t* data, size_t max_length, size_t max_backward, + size_t max_distance, HasherSearchResult* out) { + size_t len; + size_t word_idx; + size_t offset; + size_t matchlen; + size_t backward; + score_t score; + len = item & 0x1F; + word_idx = item >> 5; + offset = dictionary->words->offsets_by_length[len] + len * word_idx; + if (len > max_length) { + return BROTLI_FALSE; + } + + matchlen = + FindMatchLengthWithLimit(data, &dictionary->words->data[offset], len); + if (matchlen + dictionary->cutoffTransformsCount <= len || matchlen == 0) { + return BROTLI_FALSE; + } + { + size_t cut = len - matchlen; + size_t transform_id = (cut << 2) + + (size_t)((dictionary->cutoffTransforms >> (cut * 6)) & 0x3F); + backward = max_backward + 1 + word_idx + + (transform_id << dictionary->words->size_bits_by_length[len]); + } + if (backward > max_distance) { + return BROTLI_FALSE; + } + score = BackwardReferenceScore(matchlen, backward); + if (score < out->score) { + return BROTLI_FALSE; + } + out->len = matchlen; + out->len_code_delta = (int)len - (int)matchlen; + out->distance = backward; + out->score = score; + return BROTLI_TRUE; +} + +static BROTLI_INLINE void SearchInStaticDictionary( + const BrotliEncoderDictionary* dictionary, + HasherHandle handle, const uint8_t* data, size_t max_length, + size_t max_backward, size_t max_distance, + HasherSearchResult* out, BROTLI_BOOL shallow) { + size_t key; + size_t i; + HasherCommon* self = GetHasherCommon(handle); + if (self->dict_num_matches < (self->dict_num_lookups >> 7)) { + return; + } + key = Hash14(data) << 1; + for (i = 0; i < (shallow ? 1u : 2u); ++i, ++key) { + size_t item = dictionary->hash_table[key]; + self->dict_num_lookups++; + if (item != 0) { + BROTLI_BOOL item_matches = TestStaticDictionaryItem( + dictionary, item, data, + max_length, max_backward, max_distance, out); + if (item_matches) { + self->dict_num_matches++; + } + } + } +} + +typedef struct BackwardMatch { + uint32_t distance; + uint32_t length_and_code; +} BackwardMatch; + +static BROTLI_INLINE void InitBackwardMatch(BackwardMatch* self, + size_t dist, size_t len) { + self->distance = (uint32_t)dist; + self->length_and_code = (uint32_t)(len << 5); +} + +static BROTLI_INLINE void InitDictionaryBackwardMatch(BackwardMatch* self, + size_t dist, size_t len, size_t len_code) { + self->distance = (uint32_t)dist; + self->length_and_code = + (uint32_t)((len << 5) | (len == len_code ? 0 : len_code)); +} + +static BROTLI_INLINE size_t BackwardMatchLength(const BackwardMatch* self) { + return self->length_and_code >> 5; +} + +static BROTLI_INLINE size_t BackwardMatchLengthCode(const BackwardMatch* self) { + size_t code = self->length_and_code & 31; + return code ? code : BackwardMatchLength(self); +} + +#define EXPAND_CAT(a, b) CAT(a, b) +#define CAT(a, b) a ## b +#define FN(X) EXPAND_CAT(X, HASHER()) + +#define HASHER() H10 +#define BUCKET_BITS 17 +#define MAX_TREE_SEARCH_DEPTH 64 +#define MAX_TREE_COMP_LENGTH 128 +#include "./hash_to_binary_tree_inc.h" /* NOLINT(build/include) */ +#undef MAX_TREE_SEARCH_DEPTH +#undef MAX_TREE_COMP_LENGTH +#undef BUCKET_BITS +#undef HASHER +/* MAX_NUM_MATCHES == 64 + MAX_TREE_SEARCH_DEPTH */ +#define MAX_NUM_MATCHES_H10 128 + +/* For BUCKET_SWEEP == 1, enabling the dictionary lookup makes compression + a little faster (0.5% - 1%) and it compresses 0.15% better on small text + and HTML inputs. */ + +#define HASHER() H2 +#define BUCKET_BITS 16 +#define BUCKET_SWEEP 1 +#define HASH_LEN 5 +#define USE_DICTIONARY 1 +#include "./hash_longest_match_quickly_inc.h" /* NOLINT(build/include) */ +#undef BUCKET_SWEEP +#undef USE_DICTIONARY +#undef HASHER + +#define HASHER() H3 +#define BUCKET_SWEEP 2 +#define USE_DICTIONARY 0 +#include "./hash_longest_match_quickly_inc.h" /* NOLINT(build/include) */ +#undef USE_DICTIONARY +#undef BUCKET_SWEEP +#undef BUCKET_BITS +#undef HASHER + +#define HASHER() H4 +#define BUCKET_BITS 17 +#define BUCKET_SWEEP 4 +#define USE_DICTIONARY 1 +#include "./hash_longest_match_quickly_inc.h" /* NOLINT(build/include) */ +#undef USE_DICTIONARY +#undef HASH_LEN +#undef BUCKET_SWEEP +#undef BUCKET_BITS +#undef HASHER + +#define HASHER() H5 +#include "./hash_longest_match_inc.h" /* NOLINT(build/include) */ +#undef HASHER + +#define HASHER() H6 +#include "./hash_longest_match64_inc.h" /* NOLINT(build/include) */ +#undef HASHER + +#define BUCKET_BITS 15 + +#define NUM_LAST_DISTANCES_TO_CHECK 4 +#define NUM_BANKS 1 +#define BANK_BITS 16 +#define HASHER() H40 +#include "./hash_forgetful_chain_inc.h" /* NOLINT(build/include) */ +#undef HASHER +#undef NUM_LAST_DISTANCES_TO_CHECK + +#define NUM_LAST_DISTANCES_TO_CHECK 10 +#define HASHER() H41 +#include "./hash_forgetful_chain_inc.h" /* NOLINT(build/include) */ +#undef HASHER +#undef NUM_LAST_DISTANCES_TO_CHECK +#undef NUM_BANKS +#undef BANK_BITS + +#define NUM_LAST_DISTANCES_TO_CHECK 16 +#define NUM_BANKS 512 +#define BANK_BITS 9 +#define HASHER() H42 +#include "./hash_forgetful_chain_inc.h" /* NOLINT(build/include) */ +#undef HASHER +#undef NUM_LAST_DISTANCES_TO_CHECK +#undef NUM_BANKS +#undef BANK_BITS + +#undef BUCKET_BITS + +#define HASHER() H54 +#define BUCKET_BITS 20 +#define BUCKET_SWEEP 4 +#define HASH_LEN 7 +#define USE_DICTIONARY 0 +#include "./hash_longest_match_quickly_inc.h" /* NOLINT(build/include) */ +#undef USE_DICTIONARY +#undef HASH_LEN +#undef BUCKET_SWEEP +#undef BUCKET_BITS +#undef HASHER + +/* fast large window hashers */ + +#define HASHER() HROLLING_FAST +#define CHUNKLEN 32 +#define JUMP 4 +#define NUMBUCKETS 16777216 +#define MASK ((NUMBUCKETS * 64) - 1) +#include "./hash_rolling_inc.h" /* NOLINT(build/include) */ +#undef JUMP +#undef HASHER + + +#define HASHER() HROLLING +#define JUMP 1 +#include "./hash_rolling_inc.h" /* NOLINT(build/include) */ +#undef MASK +#undef NUMBUCKETS +#undef JUMP +#undef CHUNKLEN +#undef HASHER + +#define HASHER() H35 +#define HASHER_A H3 +#define HASHER_B HROLLING_FAST +#include "./hash_composite_inc.h" /* NOLINT(build/include) */ +#undef HASHER_A +#undef HASHER_B +#undef HASHER + +#define HASHER() H55 +#define HASHER_A H54 +#define HASHER_B HROLLING_FAST +#include "./hash_composite_inc.h" /* NOLINT(build/include) */ +#undef HASHER_A +#undef HASHER_B +#undef HASHER + +#define HASHER() H65 +#define HASHER_A H6 +#define HASHER_B HROLLING +#include "./hash_composite_inc.h" /* NOLINT(build/include) */ +#undef HASHER_A +#undef HASHER_B +#undef HASHER + +#undef FN +#undef CAT +#undef EXPAND_CAT + +#define FOR_GENERIC_HASHERS(H) H(2) H(3) H(4) H(5) H(6) H(40) H(41) H(42) H(54)\ + H(35) H(55) H(65) +#define FOR_ALL_HASHERS(H) FOR_GENERIC_HASHERS(H) H(10) + +static BROTLI_INLINE void DestroyHasher( + MemoryManager* m, HasherHandle* handle) { + if (*handle == NULL) return; + BROTLI_FREE(m, *handle); +} + +static BROTLI_INLINE void HasherReset(HasherHandle handle) { + if (handle == NULL) return; + GetHasherCommon(handle)->is_prepared_ = BROTLI_FALSE; +} + +static BROTLI_INLINE size_t HasherSize(const BrotliEncoderParams* params, + BROTLI_BOOL one_shot, const size_t input_size) { + size_t result = sizeof(HasherCommon); + switch (params->hasher.type) { +#define SIZE_(N) \ + case N: \ + result += HashMemAllocInBytesH ## N(params, one_shot, input_size); \ + break; + FOR_ALL_HASHERS(SIZE_) +#undef SIZE_ + default: + break; + } + return result; +} + +static BROTLI_INLINE void HasherSetup(MemoryManager* m, HasherHandle* handle, + BrotliEncoderParams* params, const uint8_t* data, size_t position, + size_t input_size, BROTLI_BOOL is_last) { + HasherHandle self = NULL; + HasherCommon* common = NULL; + BROTLI_BOOL one_shot = (position == 0 && is_last); + if (*handle == NULL) { + size_t alloc_size; + ChooseHasher(params, ¶ms->hasher); + alloc_size = HasherSize(params, one_shot, input_size); + self = BROTLI_ALLOC(m, uint8_t, alloc_size); + if (BROTLI_IS_OOM(m)) return; + *handle = self; + common = GetHasherCommon(self); + common->params = params->hasher; + switch (common->params.type) { +#define INITIALIZE_(N) \ + case N: \ + InitializeH ## N(*handle, params); \ + break; + FOR_ALL_HASHERS(INITIALIZE_); +#undef INITIALIZE_ + default: + break; + } + HasherReset(*handle); + } + + self = *handle; + common = GetHasherCommon(self); + if (!common->is_prepared_) { + switch (common->params.type) { +#define PREPARE_(N) \ + case N: \ + PrepareH ## N(self, one_shot, input_size, data); \ + break; + FOR_ALL_HASHERS(PREPARE_) +#undef PREPARE_ + default: break; + } + if (position == 0) { + common->dict_num_lookups = 0; + common->dict_num_matches = 0; + } + common->is_prepared_ = BROTLI_TRUE; + } +} + +static BROTLI_INLINE void InitOrStitchToPreviousBlock( + MemoryManager* m, HasherHandle* handle, const uint8_t* data, size_t mask, + BrotliEncoderParams* params, size_t position, size_t input_size, + BROTLI_BOOL is_last) { + HasherHandle self; + HasherSetup(m, handle, params, data, position, input_size, is_last); + if (BROTLI_IS_OOM(m)) return; + self = *handle; + switch (GetHasherCommon(self)->params.type) { +#define INIT_(N) \ + case N: \ + StitchToPreviousBlockH ## N(self, input_size, position, data, mask); \ + break; + FOR_ALL_HASHERS(INIT_) +#undef INIT_ + default: break; + } +} + +#if defined(__cplusplus) || defined(c_plusplus) +} /* extern "C" */ +#endif + +#endif /* BROTLI_ENC_HASH_H_ */ |