summaryrefslogtreecommitdiff
path: root/modules/brotli/enc/hash.h
diff options
context:
space:
mode:
Diffstat (limited to 'modules/brotli/enc/hash.h')
-rw-r--r--modules/brotli/enc/hash.h498
1 files changed, 498 insertions, 0 deletions
diff --git a/modules/brotli/enc/hash.h b/modules/brotli/enc/hash.h
new file mode 100644
index 0000000000..8c5a7bb5ad
--- /dev/null
+++ b/modules/brotli/enc/hash.h
@@ -0,0 +1,498 @@
+/* Copyright 2010 Google Inc. All Rights Reserved.
+
+ Distributed under MIT license.
+ See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
+*/
+
+/* A (forgetful) hash table to the data seen by the compressor, to
+ help create backward references to previous data. */
+
+#ifndef BROTLI_ENC_HASH_H_
+#define BROTLI_ENC_HASH_H_
+
+#include <string.h> /* memcmp, memset */
+
+#include "../common/constants.h"
+#include "../common/dictionary.h"
+#include "../common/platform.h"
+#include <brotli/types.h>
+#include "./encoder_dict.h"
+#include "./fast_log.h"
+#include "./find_match_length.h"
+#include "./memory.h"
+#include "./quality.h"
+#include "./static_dict.h"
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+/* Pointer to hasher data.
+ *
+ * Excluding initialization and destruction, hasher can be passed as
+ * HasherHandle by value.
+ *
+ * Typically hasher data consists of 3 sections:
+ * * HasherCommon structure
+ * * private structured hasher data, depending on hasher type
+ * * private dynamic hasher data, depending on hasher type and parameters
+ *
+ * Using "define" instead of "typedef", because on MSVC __restrict does not work
+ * on typedef pointer types. */
+#define HasherHandle uint8_t*
+
+typedef struct {
+ BrotliHasherParams params;
+
+ /* False if hasher needs to be "prepared" before use. */
+ BROTLI_BOOL is_prepared_;
+
+ size_t dict_num_lookups;
+ size_t dict_num_matches;
+} HasherCommon;
+
+static BROTLI_INLINE HasherCommon* GetHasherCommon(HasherHandle handle) {
+ return (HasherCommon*)handle;
+}
+
+#define score_t size_t
+
+static const uint32_t kCutoffTransformsCount = 10;
+/* 0, 12, 27, 23, 42, 63, 56, 48, 59, 64 */
+/* 0+0, 4+8, 8+19, 12+11, 16+26, 20+43, 24+32, 28+20, 32+27, 36+28 */
+static const uint64_t kCutoffTransforms =
+ BROTLI_MAKE_UINT64_T(0x071B520A, 0xDA2D3200);
+
+typedef struct HasherSearchResult {
+ size_t len;
+ size_t distance;
+ score_t score;
+ int len_code_delta; /* == len_code - len */
+} HasherSearchResult;
+
+/* kHashMul32 multiplier has these properties:
+ * The multiplier must be odd. Otherwise we may lose the highest bit.
+ * No long streaks of ones or zeros.
+ * There is no effort to ensure that it is a prime, the oddity is enough
+ for this use.
+ * The number has been tuned heuristically against compression benchmarks. */
+static const uint32_t kHashMul32 = 0x1E35A7BD;
+static const uint64_t kHashMul64 = BROTLI_MAKE_UINT64_T(0x1E35A7BD, 0x1E35A7BD);
+static const uint64_t kHashMul64Long =
+ BROTLI_MAKE_UINT64_T(0x1FE35A7Bu, 0xD3579BD3u);
+
+static BROTLI_INLINE uint32_t Hash14(const uint8_t* data) {
+ uint32_t h = BROTLI_UNALIGNED_LOAD32LE(data) * kHashMul32;
+ /* The higher bits contain more mixture from the multiplication,
+ so we take our results from there. */
+ return h >> (32 - 14);
+}
+
+static BROTLI_INLINE void PrepareDistanceCache(
+ int* BROTLI_RESTRICT distance_cache, const int num_distances) {
+ if (num_distances > 4) {
+ int last_distance = distance_cache[0];
+ distance_cache[4] = last_distance - 1;
+ distance_cache[5] = last_distance + 1;
+ distance_cache[6] = last_distance - 2;
+ distance_cache[7] = last_distance + 2;
+ distance_cache[8] = last_distance - 3;
+ distance_cache[9] = last_distance + 3;
+ if (num_distances > 10) {
+ int next_last_distance = distance_cache[1];
+ distance_cache[10] = next_last_distance - 1;
+ distance_cache[11] = next_last_distance + 1;
+ distance_cache[12] = next_last_distance - 2;
+ distance_cache[13] = next_last_distance + 2;
+ distance_cache[14] = next_last_distance - 3;
+ distance_cache[15] = next_last_distance + 3;
+ }
+ }
+}
+
+#define BROTLI_LITERAL_BYTE_SCORE 135
+#define BROTLI_DISTANCE_BIT_PENALTY 30
+/* Score must be positive after applying maximal penalty. */
+#define BROTLI_SCORE_BASE (BROTLI_DISTANCE_BIT_PENALTY * 8 * sizeof(size_t))
+
+/* Usually, we always choose the longest backward reference. This function
+ allows for the exception of that rule.
+
+ If we choose a backward reference that is further away, it will
+ usually be coded with more bits. We approximate this by assuming
+ log2(distance). If the distance can be expressed in terms of the
+ last four distances, we use some heuristic constants to estimate
+ the bits cost. For the first up to four literals we use the bit
+ cost of the literals from the literal cost model, after that we
+ use the average bit cost of the cost model.
+
+ This function is used to sometimes discard a longer backward reference
+ when it is not much longer and the bit cost for encoding it is more
+ than the saved literals.
+
+ backward_reference_offset MUST be positive. */
+static BROTLI_INLINE score_t BackwardReferenceScore(
+ size_t copy_length, size_t backward_reference_offset) {
+ return BROTLI_SCORE_BASE + BROTLI_LITERAL_BYTE_SCORE * (score_t)copy_length -
+ BROTLI_DISTANCE_BIT_PENALTY * Log2FloorNonZero(backward_reference_offset);
+}
+
+static BROTLI_INLINE score_t BackwardReferenceScoreUsingLastDistance(
+ size_t copy_length) {
+ return BROTLI_LITERAL_BYTE_SCORE * (score_t)copy_length +
+ BROTLI_SCORE_BASE + 15;
+}
+
+static BROTLI_INLINE score_t BackwardReferencePenaltyUsingLastDistance(
+ size_t distance_short_code) {
+ return (score_t)39 + ((0x1CA10 >> (distance_short_code & 0xE)) & 0xE);
+}
+
+static BROTLI_INLINE BROTLI_BOOL TestStaticDictionaryItem(
+ const BrotliEncoderDictionary* dictionary, size_t item,
+ const uint8_t* data, size_t max_length, size_t max_backward,
+ size_t max_distance, HasherSearchResult* out) {
+ size_t len;
+ size_t word_idx;
+ size_t offset;
+ size_t matchlen;
+ size_t backward;
+ score_t score;
+ len = item & 0x1F;
+ word_idx = item >> 5;
+ offset = dictionary->words->offsets_by_length[len] + len * word_idx;
+ if (len > max_length) {
+ return BROTLI_FALSE;
+ }
+
+ matchlen =
+ FindMatchLengthWithLimit(data, &dictionary->words->data[offset], len);
+ if (matchlen + dictionary->cutoffTransformsCount <= len || matchlen == 0) {
+ return BROTLI_FALSE;
+ }
+ {
+ size_t cut = len - matchlen;
+ size_t transform_id = (cut << 2) +
+ (size_t)((dictionary->cutoffTransforms >> (cut * 6)) & 0x3F);
+ backward = max_backward + 1 + word_idx +
+ (transform_id << dictionary->words->size_bits_by_length[len]);
+ }
+ if (backward > max_distance) {
+ return BROTLI_FALSE;
+ }
+ score = BackwardReferenceScore(matchlen, backward);
+ if (score < out->score) {
+ return BROTLI_FALSE;
+ }
+ out->len = matchlen;
+ out->len_code_delta = (int)len - (int)matchlen;
+ out->distance = backward;
+ out->score = score;
+ return BROTLI_TRUE;
+}
+
+static BROTLI_INLINE void SearchInStaticDictionary(
+ const BrotliEncoderDictionary* dictionary,
+ HasherHandle handle, const uint8_t* data, size_t max_length,
+ size_t max_backward, size_t max_distance,
+ HasherSearchResult* out, BROTLI_BOOL shallow) {
+ size_t key;
+ size_t i;
+ HasherCommon* self = GetHasherCommon(handle);
+ if (self->dict_num_matches < (self->dict_num_lookups >> 7)) {
+ return;
+ }
+ key = Hash14(data) << 1;
+ for (i = 0; i < (shallow ? 1u : 2u); ++i, ++key) {
+ size_t item = dictionary->hash_table[key];
+ self->dict_num_lookups++;
+ if (item != 0) {
+ BROTLI_BOOL item_matches = TestStaticDictionaryItem(
+ dictionary, item, data,
+ max_length, max_backward, max_distance, out);
+ if (item_matches) {
+ self->dict_num_matches++;
+ }
+ }
+ }
+}
+
+typedef struct BackwardMatch {
+ uint32_t distance;
+ uint32_t length_and_code;
+} BackwardMatch;
+
+static BROTLI_INLINE void InitBackwardMatch(BackwardMatch* self,
+ size_t dist, size_t len) {
+ self->distance = (uint32_t)dist;
+ self->length_and_code = (uint32_t)(len << 5);
+}
+
+static BROTLI_INLINE void InitDictionaryBackwardMatch(BackwardMatch* self,
+ size_t dist, size_t len, size_t len_code) {
+ self->distance = (uint32_t)dist;
+ self->length_and_code =
+ (uint32_t)((len << 5) | (len == len_code ? 0 : len_code));
+}
+
+static BROTLI_INLINE size_t BackwardMatchLength(const BackwardMatch* self) {
+ return self->length_and_code >> 5;
+}
+
+static BROTLI_INLINE size_t BackwardMatchLengthCode(const BackwardMatch* self) {
+ size_t code = self->length_and_code & 31;
+ return code ? code : BackwardMatchLength(self);
+}
+
+#define EXPAND_CAT(a, b) CAT(a, b)
+#define CAT(a, b) a ## b
+#define FN(X) EXPAND_CAT(X, HASHER())
+
+#define HASHER() H10
+#define BUCKET_BITS 17
+#define MAX_TREE_SEARCH_DEPTH 64
+#define MAX_TREE_COMP_LENGTH 128
+#include "./hash_to_binary_tree_inc.h" /* NOLINT(build/include) */
+#undef MAX_TREE_SEARCH_DEPTH
+#undef MAX_TREE_COMP_LENGTH
+#undef BUCKET_BITS
+#undef HASHER
+/* MAX_NUM_MATCHES == 64 + MAX_TREE_SEARCH_DEPTH */
+#define MAX_NUM_MATCHES_H10 128
+
+/* For BUCKET_SWEEP == 1, enabling the dictionary lookup makes compression
+ a little faster (0.5% - 1%) and it compresses 0.15% better on small text
+ and HTML inputs. */
+
+#define HASHER() H2
+#define BUCKET_BITS 16
+#define BUCKET_SWEEP 1
+#define HASH_LEN 5
+#define USE_DICTIONARY 1
+#include "./hash_longest_match_quickly_inc.h" /* NOLINT(build/include) */
+#undef BUCKET_SWEEP
+#undef USE_DICTIONARY
+#undef HASHER
+
+#define HASHER() H3
+#define BUCKET_SWEEP 2
+#define USE_DICTIONARY 0
+#include "./hash_longest_match_quickly_inc.h" /* NOLINT(build/include) */
+#undef USE_DICTIONARY
+#undef BUCKET_SWEEP
+#undef BUCKET_BITS
+#undef HASHER
+
+#define HASHER() H4
+#define BUCKET_BITS 17
+#define BUCKET_SWEEP 4
+#define USE_DICTIONARY 1
+#include "./hash_longest_match_quickly_inc.h" /* NOLINT(build/include) */
+#undef USE_DICTIONARY
+#undef HASH_LEN
+#undef BUCKET_SWEEP
+#undef BUCKET_BITS
+#undef HASHER
+
+#define HASHER() H5
+#include "./hash_longest_match_inc.h" /* NOLINT(build/include) */
+#undef HASHER
+
+#define HASHER() H6
+#include "./hash_longest_match64_inc.h" /* NOLINT(build/include) */
+#undef HASHER
+
+#define BUCKET_BITS 15
+
+#define NUM_LAST_DISTANCES_TO_CHECK 4
+#define NUM_BANKS 1
+#define BANK_BITS 16
+#define HASHER() H40
+#include "./hash_forgetful_chain_inc.h" /* NOLINT(build/include) */
+#undef HASHER
+#undef NUM_LAST_DISTANCES_TO_CHECK
+
+#define NUM_LAST_DISTANCES_TO_CHECK 10
+#define HASHER() H41
+#include "./hash_forgetful_chain_inc.h" /* NOLINT(build/include) */
+#undef HASHER
+#undef NUM_LAST_DISTANCES_TO_CHECK
+#undef NUM_BANKS
+#undef BANK_BITS
+
+#define NUM_LAST_DISTANCES_TO_CHECK 16
+#define NUM_BANKS 512
+#define BANK_BITS 9
+#define HASHER() H42
+#include "./hash_forgetful_chain_inc.h" /* NOLINT(build/include) */
+#undef HASHER
+#undef NUM_LAST_DISTANCES_TO_CHECK
+#undef NUM_BANKS
+#undef BANK_BITS
+
+#undef BUCKET_BITS
+
+#define HASHER() H54
+#define BUCKET_BITS 20
+#define BUCKET_SWEEP 4
+#define HASH_LEN 7
+#define USE_DICTIONARY 0
+#include "./hash_longest_match_quickly_inc.h" /* NOLINT(build/include) */
+#undef USE_DICTIONARY
+#undef HASH_LEN
+#undef BUCKET_SWEEP
+#undef BUCKET_BITS
+#undef HASHER
+
+/* fast large window hashers */
+
+#define HASHER() HROLLING_FAST
+#define CHUNKLEN 32
+#define JUMP 4
+#define NUMBUCKETS 16777216
+#define MASK ((NUMBUCKETS * 64) - 1)
+#include "./hash_rolling_inc.h" /* NOLINT(build/include) */
+#undef JUMP
+#undef HASHER
+
+
+#define HASHER() HROLLING
+#define JUMP 1
+#include "./hash_rolling_inc.h" /* NOLINT(build/include) */
+#undef MASK
+#undef NUMBUCKETS
+#undef JUMP
+#undef CHUNKLEN
+#undef HASHER
+
+#define HASHER() H35
+#define HASHER_A H3
+#define HASHER_B HROLLING_FAST
+#include "./hash_composite_inc.h" /* NOLINT(build/include) */
+#undef HASHER_A
+#undef HASHER_B
+#undef HASHER
+
+#define HASHER() H55
+#define HASHER_A H54
+#define HASHER_B HROLLING_FAST
+#include "./hash_composite_inc.h" /* NOLINT(build/include) */
+#undef HASHER_A
+#undef HASHER_B
+#undef HASHER
+
+#define HASHER() H65
+#define HASHER_A H6
+#define HASHER_B HROLLING
+#include "./hash_composite_inc.h" /* NOLINT(build/include) */
+#undef HASHER_A
+#undef HASHER_B
+#undef HASHER
+
+#undef FN
+#undef CAT
+#undef EXPAND_CAT
+
+#define FOR_GENERIC_HASHERS(H) H(2) H(3) H(4) H(5) H(6) H(40) H(41) H(42) H(54)\
+ H(35) H(55) H(65)
+#define FOR_ALL_HASHERS(H) FOR_GENERIC_HASHERS(H) H(10)
+
+static BROTLI_INLINE void DestroyHasher(
+ MemoryManager* m, HasherHandle* handle) {
+ if (*handle == NULL) return;
+ BROTLI_FREE(m, *handle);
+}
+
+static BROTLI_INLINE void HasherReset(HasherHandle handle) {
+ if (handle == NULL) return;
+ GetHasherCommon(handle)->is_prepared_ = BROTLI_FALSE;
+}
+
+static BROTLI_INLINE size_t HasherSize(const BrotliEncoderParams* params,
+ BROTLI_BOOL one_shot, const size_t input_size) {
+ size_t result = sizeof(HasherCommon);
+ switch (params->hasher.type) {
+#define SIZE_(N) \
+ case N: \
+ result += HashMemAllocInBytesH ## N(params, one_shot, input_size); \
+ break;
+ FOR_ALL_HASHERS(SIZE_)
+#undef SIZE_
+ default:
+ break;
+ }
+ return result;
+}
+
+static BROTLI_INLINE void HasherSetup(MemoryManager* m, HasherHandle* handle,
+ BrotliEncoderParams* params, const uint8_t* data, size_t position,
+ size_t input_size, BROTLI_BOOL is_last) {
+ HasherHandle self = NULL;
+ HasherCommon* common = NULL;
+ BROTLI_BOOL one_shot = (position == 0 && is_last);
+ if (*handle == NULL) {
+ size_t alloc_size;
+ ChooseHasher(params, &params->hasher);
+ alloc_size = HasherSize(params, one_shot, input_size);
+ self = BROTLI_ALLOC(m, uint8_t, alloc_size);
+ if (BROTLI_IS_OOM(m)) return;
+ *handle = self;
+ common = GetHasherCommon(self);
+ common->params = params->hasher;
+ switch (common->params.type) {
+#define INITIALIZE_(N) \
+ case N: \
+ InitializeH ## N(*handle, params); \
+ break;
+ FOR_ALL_HASHERS(INITIALIZE_);
+#undef INITIALIZE_
+ default:
+ break;
+ }
+ HasherReset(*handle);
+ }
+
+ self = *handle;
+ common = GetHasherCommon(self);
+ if (!common->is_prepared_) {
+ switch (common->params.type) {
+#define PREPARE_(N) \
+ case N: \
+ PrepareH ## N(self, one_shot, input_size, data); \
+ break;
+ FOR_ALL_HASHERS(PREPARE_)
+#undef PREPARE_
+ default: break;
+ }
+ if (position == 0) {
+ common->dict_num_lookups = 0;
+ common->dict_num_matches = 0;
+ }
+ common->is_prepared_ = BROTLI_TRUE;
+ }
+}
+
+static BROTLI_INLINE void InitOrStitchToPreviousBlock(
+ MemoryManager* m, HasherHandle* handle, const uint8_t* data, size_t mask,
+ BrotliEncoderParams* params, size_t position, size_t input_size,
+ BROTLI_BOOL is_last) {
+ HasherHandle self;
+ HasherSetup(m, handle, params, data, position, input_size, is_last);
+ if (BROTLI_IS_OOM(m)) return;
+ self = *handle;
+ switch (GetHasherCommon(self)->params.type) {
+#define INIT_(N) \
+ case N: \
+ StitchToPreviousBlockH ## N(self, input_size, position, data, mask); \
+ break;
+ FOR_ALL_HASHERS(INIT_)
+#undef INIT_
+ default: break;
+ }
+}
+
+#if defined(__cplusplus) || defined(c_plusplus)
+} /* extern "C" */
+#endif
+
+#endif /* BROTLI_ENC_HASH_H_ */