1 // Copyright 2005 Google Inc. All Rights Reserved.
3 // Redistribution and use in source and binary forms, with or without
4 // modification, are permitted provided that the following conditions are
7 // * Redistributions of source code must retain the above copyright
8 // notice, this list of conditions and the following disclaimer.
9 // * Redistributions in binary form must reproduce the above
10 // copyright notice, this list of conditions and the following disclaimer
11 // in the documentation and/or other materials provided with the
13 // * Neither the name of Google Inc. nor the names of its
14 // contributors may be used to endorse or promote products derived from
15 // this software without specific prior written permission.
17 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 #include "snappy-internal.h"
31 #include "snappy-sinksource.h"
42 // Any hash function will produce a valid compressed bitstream, but a good
43 // hash function reduces the number of collisions and thus yields better
44 // compression for compressible input, and more speed for incompressible
45 // input. Of course, it doesn't hurt if the hash function is reasonably fast
46 // either, as it gets called a lot.
47 static inline uint32 HashBytes(uint32 bytes, int shift) {
48 uint32 kMul = 0x1e35a7bd;
49 return (bytes * kMul) >> shift;
51 static inline uint32 Hash(const char* p, int shift) {
52 return HashBytes(UNALIGNED_LOAD32(p), shift);
55 size_t MaxCompressedLength(size_t source_len) {
56 // Compressed data can be defined as:
57 // compressed := item* literal*
58 // item := literal* copy
60 // The trailing literal sequence has a space blowup of at most 62/60
61 // since a literal of length 60 needs one tag byte + one extra byte
62 // for length information.
64 // Item blowup is trickier to measure. Suppose the "copy" op copies
65 // 4 bytes of data. Because of a special check in the encoding code,
66 // we produce a 4-byte copy only if the offset is < 65536. Therefore
67 // the copy op takes 3 bytes to encode, and this type of item leads
68 // to at most the 62/60 blowup for representing literals.
70 // Suppose the "copy" op copies 5 bytes of data. If the offset is big
71 // enough, it will take 5 bytes to encode the copy op. Therefore the
72 // worst case here is a one-byte literal followed by a five-byte copy.
73 // I.e., 6 bytes of input turn into 7 bytes of "compressed" data.
75 // This last factor dominates the blowup, so the final estimate is:
76 return 32 + source_len + source_len/6;
81 COPY_1_BYTE_OFFSET = 1, // 3 bit length + 3 bits of offset in opcode
82 COPY_2_BYTE_OFFSET = 2,
83 COPY_4_BYTE_OFFSET = 3
86 // Copy "len" bytes from "src" to "op", one byte at a time. Used for
87 // handling COPY operations where the input and output regions may
88 // overlap. For example, suppose:
92 // After IncrementalCopy(src, op, len), the result will have
93 // eleven copies of "ab"
94 // ababababababababababab
95 // Note that this does not match the semantics of either memcpy()
97 static inline void IncrementalCopy(const char* src, char* op, int len) {
104 // Equivalent to IncrementalCopy except that it can write up to ten extra
105 // bytes after the end of the copy, and that it is faster.
107 // The main part of this loop is a simple copy of eight bytes at a time until
108 // we've copied (at least) the requested amount of bytes. However, if op and
109 // src are less than eight bytes apart (indicating a repeating pattern of
110 // length < 8), we first need to expand the pattern in order to get the correct
111 // results. For instance, if the buffer looks like this, with the eight-byte
112 // <src> and <op> patterns marked as intervals:
118 // a single eight-byte copy from <src> to <op> will repeat the pattern once,
119 // after which we can move <op> two bytes without moving <src>:
125 // and repeat the exercise until the two no longer overlap.
127 // This allows us to do very well in the special case of one single byte
128 // repeated many times, without taking a big hit for more general cases.
130 // The worst case of extra writing past the end of the match occurs when
131 // op - src == 1 and len == 1; the last copy will read from byte positions
132 // [0..7] and write to [4..11], whereas it was only supposed to write to
133 // position 1. Thus, ten excess bytes.
137 const int kMaxIncrementCopyOverflow = 10;
141 static inline void IncrementalCopyFastPath(const char* src, char* op, int len) {
142 while (op - src < 8) {
143 UnalignedCopy64(src, op);
148 UnalignedCopy64(src, op);
155 static inline char* EmitLiteral(char* op,
158 bool allow_fast_path) {
159 int n = len - 1; // Zero-length literals are disallowed
162 *op++ = LITERAL | (n << 2);
164 // The vast majority of copies are below 16 bytes, for which a
165 // call to memcpy is overkill. This fast path can sometimes
166 // copy up to 15 bytes too much, but that is okay in the
167 // main loop, since we have a bit to go on for both sides:
169 // - The input will always have kInputMarginBytes = 15 extra
170 // available bytes, as long as we're in the main loop, and
171 // if not, allow_fast_path = false.
172 // - The output will always have 32 spare bytes (see
173 // MaxCompressedLength).
174 if (allow_fast_path && len <= 16) {
175 UnalignedCopy64(literal, op);
176 UnalignedCopy64(literal + 8, op + 8);
180 // Encode in upcoming bytes
191 *base = LITERAL | ((59+count) << 2);
193 memcpy(op, literal, len);
197 static inline char* EmitCopyLessThan64(char* op, size_t offset, int len) {
200 DCHECK_LT(offset, 65536);
202 if ((len < 12) && (offset < 2048)) {
203 size_t len_minus_4 = len - 4;
204 assert(len_minus_4 < 8); // Must fit in 3 bits
205 *op++ = COPY_1_BYTE_OFFSET | ((len_minus_4) << 2) | ((offset >> 8) << 5);
206 *op++ = offset & 0xff;
208 *op++ = COPY_2_BYTE_OFFSET | ((len-1) << 2);
209 LittleEndian::Store16(op, offset);
215 static inline char* EmitCopy(char* op, size_t offset, int len) {
216 // Emit 64 byte copies but make sure to keep at least four bytes reserved
218 op = EmitCopyLessThan64(op, offset, 64);
222 // Emit an extra 60 byte copy if have too much data to fit in one copy
224 op = EmitCopyLessThan64(op, offset, 60);
229 op = EmitCopyLessThan64(op, offset, len);
234 bool GetUncompressedLength(const char* start, size_t n, size_t* result) {
236 const char* limit = start + n;
237 if (Varint::Parse32WithLimit(start, limit, &v) != NULL) {
246 uint16* WorkingMemory::GetHashTable(size_t input_size, int* table_size) {
247 // Use smaller hash table when input.size() is smaller, since we
248 // fill the table, incurring O(hash table size) overhead for
249 // compression, and if the input is short, we won't need that
250 // many hash table entries anyway.
251 assert(kMaxHashTableSize >= 256);
253 while (htsize < kMaxHashTableSize && htsize < input_size) {
256 CHECK_EQ(0, htsize & (htsize - 1)) << ": must be power of two";
257 CHECK_LE(htsize, kMaxHashTableSize) << ": hash table too large";
260 if (htsize <= ARRAYSIZE(small_table_)) {
261 table = small_table_;
263 if (large_table_ == NULL) {
264 large_table_ = new uint16[kMaxHashTableSize];
266 table = large_table_;
269 *table_size = htsize;
270 memset(table, 0, htsize * sizeof(*table));
273 } // end namespace internal
275 // For 0 <= offset <= 4, GetUint32AtOffset(GetEightBytesAt(p), offset) will
276 // equal UNALIGNED_LOAD32(p + offset). Motivation: On x86-64 hardware we have
277 // empirically found that overlapping loads such as
278 // UNALIGNED_LOAD32(p) ... UNALIGNED_LOAD32(p+1) ... UNALIGNED_LOAD32(p+2)
279 // are slower than UNALIGNED_LOAD64(p) followed by shifts and casts to uint32.
281 // We have different versions for 64- and 32-bit; ideally we would avoid the
282 // two functions and just inline the UNALIGNED_LOAD64 call into
283 // GetUint32AtOffset, but GCC (at least not as of 4.6) is seemingly not clever
284 // enough to avoid loading the value multiple times then. For 64-bit, the load
285 // is done when GetEightBytesAt() is called, whereas for 32-bit, the load is
286 // done at GetUint32AtOffset() time.
290 typedef uint64 EightBytesReference;
292 static inline EightBytesReference GetEightBytesAt(const char* ptr) {
293 return UNALIGNED_LOAD64(ptr);
296 static inline uint32 GetUint32AtOffset(uint64 v, int offset) {
297 DCHECK_GE(offset, 0);
298 DCHECK_LE(offset, 4);
299 return v >> (LittleEndian::IsLittleEndian() ? 8 * offset : 32 - 8 * offset);
304 typedef const char* EightBytesReference;
306 static inline EightBytesReference GetEightBytesAt(const char* ptr) {
310 static inline uint32 GetUint32AtOffset(const char* v, int offset) {
311 DCHECK_GE(offset, 0);
312 DCHECK_LE(offset, 4);
313 return UNALIGNED_LOAD32(v + offset);
318 // Flat array compression that does not emit the "uncompressed length"
319 // prefix. Compresses "input" string to the "*op" buffer.
321 // REQUIRES: "input" is at most "kBlockSize" bytes long.
322 // REQUIRES: "op" points to an array of memory that is at least
323 // "MaxCompressedLength(input.size())" in size.
324 // REQUIRES: All elements in "table[0..table_size-1]" are initialized to zero.
325 // REQUIRES: "table_size" is a power of two
327 // Returns an "end" pointer into "op" buffer.
328 // "end - op" is the compressed size of "input".
330 char* CompressFragment(const char* input,
334 const int table_size) {
335 // "ip" is the input pointer, and "op" is the output pointer.
336 const char* ip = input;
337 CHECK_LE(input_size, kBlockSize);
338 CHECK_EQ(table_size & (table_size - 1), 0) << ": table must be power of two";
339 const int shift = 32 - Bits::Log2Floor(table_size);
340 DCHECK_EQ(static_cast<int>(kuint32max >> shift), table_size - 1);
341 const char* ip_end = input + input_size;
342 const char* base_ip = ip;
343 // Bytes in [next_emit, ip) will be emitted as literal bytes. Or
344 // [next_emit, ip_end) after the main loop.
345 const char* next_emit = ip;
347 const size_t kInputMarginBytes = 15;
348 if (PREDICT_TRUE(input_size >= kInputMarginBytes)) {
349 const char* ip_limit = input + input_size - kInputMarginBytes;
351 for (uint32 next_hash = Hash(++ip, shift); ; ) {
352 DCHECK_LT(next_emit, ip);
353 // The body of this loop calls EmitLiteral once and then EmitCopy one or
354 // more times. (The exception is that when we're close to exhausting
355 // the input we goto emit_remainder.)
357 // In the first iteration of this loop we're just starting, so
358 // there's nothing to copy, so calling EmitLiteral once is
359 // necessary. And we only start a new iteration when the
360 // current iteration has determined that a call to EmitLiteral will
361 // precede the next call to EmitCopy (if any).
363 // Step 1: Scan forward in the input looking for a 4-byte-long match.
364 // If we get close to exhausting the input then goto emit_remainder.
366 // Heuristic match skipping: If 32 bytes are scanned with no matches
367 // found, start looking only at every other byte. If 32 more bytes are
368 // scanned, look at every third byte, etc.. When a match is found,
369 // immediately go back to looking at every byte. This is a small loss
370 // (~5% performance, ~0.1% density) for compressible data due to more
371 // bookkeeping, but for non-compressible data (such as JPEG) it's a huge
372 // win since the compressor quickly "realizes" the data is incompressible
373 // and doesn't bother looking for matches everywhere.
375 // The "skip" variable keeps track of how many bytes there are since the
376 // last match; dividing it by 32 (ie. right-shifting by five) gives the
377 // number of bytes to move ahead for each iteration.
380 const char* next_ip = ip;
381 const char* candidate;
384 uint32 hash = next_hash;
385 DCHECK_EQ(hash, Hash(ip, shift));
386 uint32 bytes_between_hash_lookups = skip++ >> 5;
387 next_ip = ip + bytes_between_hash_lookups;
388 if (PREDICT_FALSE(next_ip > ip_limit)) {
391 next_hash = Hash(next_ip, shift);
392 candidate = base_ip + table[hash];
393 DCHECK_GE(candidate, base_ip);
394 DCHECK_LT(candidate, ip);
396 table[hash] = ip - base_ip;
397 } while (PREDICT_TRUE(UNALIGNED_LOAD32(ip) !=
398 UNALIGNED_LOAD32(candidate)));
400 // Step 2: A 4-byte match has been found. We'll later see if more
401 // than 4 bytes match. But, prior to the match, input
402 // bytes [next_emit, ip) are unmatched. Emit them as "literal bytes."
403 DCHECK_LE(next_emit + 16, ip_end);
404 op = EmitLiteral(op, next_emit, ip - next_emit, true);
406 // Step 3: Call EmitCopy, and then see if another EmitCopy could
407 // be our next move. Repeat until we find no match for the
408 // input immediately after what was consumed by the last EmitCopy call.
410 // If we exit this loop normally then we need to call EmitLiteral next,
411 // though we don't yet know how big the literal will be. We handle that
412 // by proceeding to the next iteration of the main loop. We also can exit
413 // this loop via goto if we get close to exhausting the input.
414 EightBytesReference input_bytes;
415 uint32 candidate_bytes = 0;
418 // We have a 4-byte match at ip, and no need to emit any
419 // "literal bytes" prior to ip.
420 const char* base = ip;
421 int matched = 4 + FindMatchLength(candidate + 4, ip + 4, ip_end);
423 size_t offset = base - candidate;
424 DCHECK_EQ(0, memcmp(base, candidate, matched));
425 op = EmitCopy(op, offset, matched);
426 // We could immediately start working at ip now, but to improve
427 // compression we first update table[Hash(ip - 1, ...)].
428 const char* insert_tail = ip - 1;
430 if (PREDICT_FALSE(ip >= ip_limit)) {
433 input_bytes = GetEightBytesAt(insert_tail);
434 uint32 prev_hash = HashBytes(GetUint32AtOffset(input_bytes, 0), shift);
435 table[prev_hash] = ip - base_ip - 1;
436 uint32 cur_hash = HashBytes(GetUint32AtOffset(input_bytes, 1), shift);
437 candidate = base_ip + table[cur_hash];
438 candidate_bytes = UNALIGNED_LOAD32(candidate);
439 table[cur_hash] = ip - base_ip;
440 } while (GetUint32AtOffset(input_bytes, 1) == candidate_bytes);
442 next_hash = HashBytes(GetUint32AtOffset(input_bytes, 2), shift);
448 // Emit the remaining bytes as a literal
449 if (next_emit < ip_end) {
450 op = EmitLiteral(op, next_emit, ip_end - next_emit, false);
455 } // end namespace internal
457 // Signature of output types needed by decompression code.
458 // The decompression code is templatized on a type that obeys this
459 // signature so that we do not pay virtual function call overhead in
460 // the middle of a tight decompression loop.
462 // class DecompressionWriter {
464 // // Called before decompression
465 // void SetExpectedLength(size_t length);
467 // // Called after decompression
468 // bool CheckLength() const;
470 // // Called repeatedly during decompression
471 // bool Append(const char* ip, size_t length);
472 // bool AppendFromSelf(uint32 offset, size_t length);
474 // // The difference between TryFastAppend and Append is that TryFastAppend
475 // // is allowed to read up to <available> bytes from the input buffer,
476 // // whereas Append is allowed to read <length>.
478 // // Also, TryFastAppend is allowed to return false, declining the append,
479 // // without it being a fatal error -- just "return false" would be
480 // // a perfectly legal implementation of TryFastAppend. The intention
481 // // is for TryFastAppend to allow a fast path in the common case of
482 // // a small append.
484 // // NOTE(user): TryFastAppend must always return decline (return false)
485 // // if <length> is 61 or more, as in this case the literal length is not
486 // // decoded fully. In practice, this should not be a big problem,
487 // // as it is unlikely that one would implement a fast path accepting
488 // // this much data.
489 // bool TryFastAppend(const char* ip, size_t available, size_t length);
492 // -----------------------------------------------------------------------
493 // Lookup table for decompression code. Generated by ComputeTable() below.
494 // -----------------------------------------------------------------------
496 // Mapping from i in range [0,4] to a mask to extract the bottom 8*i bits
497 static const uint32 wordmask[] = {
498 0u, 0xffu, 0xffffu, 0xffffffu, 0xffffffffu
501 // Data stored per entry in lookup table:
502 // Range Bits-used Description
503 // ------------------------------------
504 // 1..64 0..7 Literal/copy length encoded in opcode byte
505 // 0..7 8..10 Copy offset encoded in opcode byte / 256
506 // 0..4 11..13 Extra bytes after opcode
508 // We use eight bits for the length even though 7 would have sufficed
509 // because of efficiency reasons:
510 // (1) Extracting a byte is faster than a bit-field
511 // (2) It properly aligns copy offset so we do not need a <<8
512 static const uint16 char_table[256] = {
513 0x0001, 0x0804, 0x1001, 0x2001, 0x0002, 0x0805, 0x1002, 0x2002,
514 0x0003, 0x0806, 0x1003, 0x2003, 0x0004, 0x0807, 0x1004, 0x2004,
515 0x0005, 0x0808, 0x1005, 0x2005, 0x0006, 0x0809, 0x1006, 0x2006,
516 0x0007, 0x080a, 0x1007, 0x2007, 0x0008, 0x080b, 0x1008, 0x2008,
517 0x0009, 0x0904, 0x1009, 0x2009, 0x000a, 0x0905, 0x100a, 0x200a,
518 0x000b, 0x0906, 0x100b, 0x200b, 0x000c, 0x0907, 0x100c, 0x200c,
519 0x000d, 0x0908, 0x100d, 0x200d, 0x000e, 0x0909, 0x100e, 0x200e,
520 0x000f, 0x090a, 0x100f, 0x200f, 0x0010, 0x090b, 0x1010, 0x2010,
521 0x0011, 0x0a04, 0x1011, 0x2011, 0x0012, 0x0a05, 0x1012, 0x2012,
522 0x0013, 0x0a06, 0x1013, 0x2013, 0x0014, 0x0a07, 0x1014, 0x2014,
523 0x0015, 0x0a08, 0x1015, 0x2015, 0x0016, 0x0a09, 0x1016, 0x2016,
524 0x0017, 0x0a0a, 0x1017, 0x2017, 0x0018, 0x0a0b, 0x1018, 0x2018,
525 0x0019, 0x0b04, 0x1019, 0x2019, 0x001a, 0x0b05, 0x101a, 0x201a,
526 0x001b, 0x0b06, 0x101b, 0x201b, 0x001c, 0x0b07, 0x101c, 0x201c,
527 0x001d, 0x0b08, 0x101d, 0x201d, 0x001e, 0x0b09, 0x101e, 0x201e,
528 0x001f, 0x0b0a, 0x101f, 0x201f, 0x0020, 0x0b0b, 0x1020, 0x2020,
529 0x0021, 0x0c04, 0x1021, 0x2021, 0x0022, 0x0c05, 0x1022, 0x2022,
530 0x0023, 0x0c06, 0x1023, 0x2023, 0x0024, 0x0c07, 0x1024, 0x2024,
531 0x0025, 0x0c08, 0x1025, 0x2025, 0x0026, 0x0c09, 0x1026, 0x2026,
532 0x0027, 0x0c0a, 0x1027, 0x2027, 0x0028, 0x0c0b, 0x1028, 0x2028,
533 0x0029, 0x0d04, 0x1029, 0x2029, 0x002a, 0x0d05, 0x102a, 0x202a,
534 0x002b, 0x0d06, 0x102b, 0x202b, 0x002c, 0x0d07, 0x102c, 0x202c,
535 0x002d, 0x0d08, 0x102d, 0x202d, 0x002e, 0x0d09, 0x102e, 0x202e,
536 0x002f, 0x0d0a, 0x102f, 0x202f, 0x0030, 0x0d0b, 0x1030, 0x2030,
537 0x0031, 0x0e04, 0x1031, 0x2031, 0x0032, 0x0e05, 0x1032, 0x2032,
538 0x0033, 0x0e06, 0x1033, 0x2033, 0x0034, 0x0e07, 0x1034, 0x2034,
539 0x0035, 0x0e08, 0x1035, 0x2035, 0x0036, 0x0e09, 0x1036, 0x2036,
540 0x0037, 0x0e0a, 0x1037, 0x2037, 0x0038, 0x0e0b, 0x1038, 0x2038,
541 0x0039, 0x0f04, 0x1039, 0x2039, 0x003a, 0x0f05, 0x103a, 0x203a,
542 0x003b, 0x0f06, 0x103b, 0x203b, 0x003c, 0x0f07, 0x103c, 0x203c,
543 0x0801, 0x0f08, 0x103d, 0x203d, 0x1001, 0x0f09, 0x103e, 0x203e,
544 0x1801, 0x0f0a, 0x103f, 0x203f, 0x2001, 0x0f0b, 0x1040, 0x2040
547 // In debug mode, allow optional computation of the table at startup.
548 // Also, check that the decompression table is correct.
550 DEFINE_bool(snappy_dump_decompression_table, false,
551 "If true, we print the decompression table at startup.");
553 static uint16 MakeEntry(unsigned int extra,
555 unsigned int copy_offset) {
556 // Check that all of the fields fit within the allocated space
557 DCHECK_EQ(extra, extra & 0x7); // At most 3 bits
558 DCHECK_EQ(copy_offset, copy_offset & 0x7); // At most 3 bits
559 DCHECK_EQ(len, len & 0x7f); // At most 7 bits
560 return len | (copy_offset << 8) | (extra << 11);
563 static void ComputeTable() {
566 // Place invalid entries in all places to detect missing initialization
568 for (int i = 0; i < 256; i++) {
572 // Small LITERAL entries. We store (len-1) in the top 6 bits.
573 for (unsigned int len = 1; len <= 60; len++) {
574 dst[LITERAL | ((len-1) << 2)] = MakeEntry(0, len, 0);
578 // Large LITERAL entries. We use 60..63 in the high 6 bits to
579 // encode the number of bytes of length info that follow the opcode.
580 for (unsigned int extra_bytes = 1; extra_bytes <= 4; extra_bytes++) {
581 // We set the length field in the lookup table to 1 because extra
582 // bytes encode len-1.
583 dst[LITERAL | ((extra_bytes+59) << 2)] = MakeEntry(extra_bytes, 1, 0);
587 // COPY_1_BYTE_OFFSET.
589 // The tag byte in the compressed data stores len-4 in 3 bits, and
590 // offset/256 in 5 bits. offset%256 is stored in the next byte.
592 // This format is used for length in range [4..11] and offset in
594 for (unsigned int len = 4; len < 12; len++) {
595 for (unsigned int offset = 0; offset < 2048; offset += 256) {
596 dst[COPY_1_BYTE_OFFSET | ((len-4)<<2) | ((offset>>8)<<5)] =
597 MakeEntry(1, len, offset>>8);
602 // COPY_2_BYTE_OFFSET.
603 // Tag contains len-1 in top 6 bits, and offset in next two bytes.
604 for (unsigned int len = 1; len <= 64; len++) {
605 dst[COPY_2_BYTE_OFFSET | ((len-1)<<2)] = MakeEntry(2, len, 0);
609 // COPY_4_BYTE_OFFSET.
610 // Tag contents len-1 in top 6 bits, and offset in next four bytes.
611 for (unsigned int len = 1; len <= 64; len++) {
612 dst[COPY_4_BYTE_OFFSET | ((len-1)<<2)] = MakeEntry(4, len, 0);
616 // Check that each entry was initialized exactly once.
617 CHECK_EQ(assigned, 256);
618 for (int i = 0; i < 256; i++) {
619 CHECK_NE(dst[i], 0xffff);
622 if (FLAGS_snappy_dump_decompression_table) {
623 printf("static const uint16 char_table[256] = {\n ");
624 for (int i = 0; i < 256; i++) {
627 ((i == 255) ? "\n" : (((i%8) == 7) ? ",\n " : ", ")));
632 // Check that computed table matched recorded table
633 for (int i = 0; i < 256; i++) {
634 CHECK_EQ(dst[i], char_table[i]);
639 // Helper class for decompression
640 class SnappyDecompressor {
642 Source* reader_; // Underlying source of bytes to decompress
643 const char* ip_; // Points to next buffered byte
644 const char* ip_limit_; // Points just past buffered bytes
645 uint32 peeked_; // Bytes peeked from reader (need to skip)
646 bool eof_; // Hit end of input without an error?
647 char scratch_[5]; // Temporary buffer for PeekFast() boundaries
649 // Ensure that all of the tag metadata for the next tag is available
650 // in [ip_..ip_limit_-1]. Also ensures that [ip,ip+4] is readable even
651 // if (ip_limit_ - ip_ < 5).
653 // Returns true on success, false on error or end of input.
657 explicit SnappyDecompressor(Source* reader)
665 ~SnappyDecompressor() {
666 // Advance past any bytes we peeked at from the reader
667 reader_->Skip(peeked_);
670 // Returns true iff we have hit the end of the input without an error.
675 // Read the uncompressed length stored at the start of the compressed data.
676 // On succcess, stores the length in *result and returns true.
677 // On failure, returns false.
678 bool ReadUncompressedLength(uint32* result) {
679 DCHECK(ip_ == NULL); // Must not have read anything yet
680 // Length is encoded in 1..5 bytes
684 if (shift >= 32) return false;
686 const char* ip = reader_->Peek(&n);
687 if (n == 0) return false;
688 const unsigned char c = *(reinterpret_cast<const unsigned char*>(ip));
690 *result |= static_cast<uint32>(c & 0x7f) << shift;
699 // Process the next item found in the input.
700 // Returns true if successful, false on error or end of input.
701 template <class Writer>
702 void DecompressAllTags(Writer* writer) {
703 const char* ip = ip_;
705 // We could have put this refill fragment only at the beginning of the loop.
706 // However, duplicating it at the end of each branch gives the compiler more
707 // scope to optimize the <ip_limit_ - ip> expression based on the local
708 // context, which overall increases speed.
709 #define MAYBE_REFILL() \
710 if (ip_limit_ - ip < 5) { \
712 if (!RefillTag()) return; \
718 const unsigned char c = *(reinterpret_cast<const unsigned char*>(ip++));
720 if ((c & 0x3) == LITERAL) {
721 size_t literal_length = (c >> 2) + 1u;
722 if (writer->TryFastAppend(ip, ip_limit_ - ip, literal_length)) {
723 DCHECK_LT(literal_length, 61);
724 ip += literal_length;
728 if (PREDICT_FALSE(literal_length >= 61)) {
730 const size_t literal_length_length = literal_length - 60;
732 (LittleEndian::Load32(ip) & wordmask[literal_length_length]) + 1;
733 ip += literal_length_length;
736 size_t avail = ip_limit_ - ip;
737 while (avail < literal_length) {
738 if (!writer->Append(ip, avail)) return;
739 literal_length -= avail;
740 reader_->Skip(peeked_);
742 ip = reader_->Peek(&n);
745 if (avail == 0) return; // Premature end of input
746 ip_limit_ = ip + avail;
748 if (!writer->Append(ip, literal_length)) {
751 ip += literal_length;
754 const uint32 entry = char_table[c];
755 const uint32 trailer = LittleEndian::Load32(ip) & wordmask[entry >> 11];
756 const uint32 length = entry & 0xff;
759 // copy_offset/256 is encoded in bits 8..10. By just fetching
760 // those bits, we get copy_offset (since the bit-field starts at
762 const uint32 copy_offset = entry & 0x700;
763 if (!writer->AppendFromSelf(copy_offset + trailer, length)) {
774 bool SnappyDecompressor::RefillTag() {
775 const char* ip = ip_;
776 if (ip == ip_limit_) {
777 // Fetch a new fragment from the reader
778 reader_->Skip(peeked_); // All peeked bytes are used up
780 ip = reader_->Peek(&n);
789 // Read the tag character
790 DCHECK_LT(ip, ip_limit_);
791 const unsigned char c = *(reinterpret_cast<const unsigned char*>(ip));
792 const uint32 entry = char_table[c];
793 const uint32 needed = (entry >> 11) + 1; // +1 byte for 'c'
794 DCHECK_LE(needed, sizeof(scratch_));
796 // Read more bytes from reader if needed
797 uint32 nbuf = ip_limit_ - ip;
799 // Stitch together bytes from ip and reader to form the word
800 // contents. We store the needed bytes in "scratch_". They
801 // will be consumed immediately by the caller since we do not
802 // read more than we need.
803 memmove(scratch_, ip, nbuf);
804 reader_->Skip(peeked_); // All peeked bytes are used up
806 while (nbuf < needed) {
808 const char* src = reader_->Peek(&length);
809 if (length == 0) return false;
810 uint32 to_add = min<uint32>(needed - nbuf, length);
811 memcpy(scratch_ + nbuf, src, to_add);
813 reader_->Skip(to_add);
815 DCHECK_EQ(nbuf, needed);
817 ip_limit_ = scratch_ + needed;
818 } else if (nbuf < 5) {
819 // Have enough bytes, but move into scratch_ so that we do not
820 // read past end of input
821 memmove(scratch_, ip, nbuf);
822 reader_->Skip(peeked_); // All peeked bytes are used up
825 ip_limit_ = scratch_ + nbuf;
827 // Pass pointer to buffer returned by reader_.
833 template <typename Writer>
834 static bool InternalUncompress(Source* r,
837 // Read the uncompressed length from the front of the compressed input
838 SnappyDecompressor decompressor(r);
839 uint32 uncompressed_len = 0;
840 if (!decompressor.ReadUncompressedLength(&uncompressed_len)) return false;
841 return InternalUncompressAllTags(
842 &decompressor, writer, uncompressed_len, max_len);
845 template <typename Writer>
846 static bool InternalUncompressAllTags(SnappyDecompressor* decompressor,
848 uint32 uncompressed_len,
850 // Protect against possible DoS attack
851 if (static_cast<uint64>(uncompressed_len) > max_len) {
855 writer->SetExpectedLength(uncompressed_len);
857 // Process the entire input
858 decompressor->DecompressAllTags(writer);
859 return (decompressor->eof() && writer->CheckLength());
862 bool GetUncompressedLength(Source* source, uint32* result) {
863 SnappyDecompressor decompressor(source);
864 return decompressor.ReadUncompressedLength(result);
867 size_t Compress(Source* reader, Sink* writer) {
869 size_t N = reader->Available();
870 char ulength[Varint::kMax32];
871 char* p = Varint::Encode32(ulength, N);
872 writer->Append(ulength, p-ulength);
873 written += (p - ulength);
875 internal::WorkingMemory wmem;
876 char* scratch = NULL;
877 char* scratch_output = NULL;
880 // Get next block to compress (without copying if possible)
881 size_t fragment_size;
882 const char* fragment = reader->Peek(&fragment_size);
883 DCHECK_NE(fragment_size, 0) << ": premature end of input";
884 const size_t num_to_read = min(N, kBlockSize);
885 size_t bytes_read = fragment_size;
887 size_t pending_advance = 0;
888 if (bytes_read >= num_to_read) {
889 // Buffer returned by reader is large enough
890 pending_advance = num_to_read;
891 fragment_size = num_to_read;
893 // Read into scratch buffer
894 if (scratch == NULL) {
895 // If this is the last iteration, we want to allocate N bytes
896 // of space, otherwise the max possible kBlockSize space.
897 // num_to_read contains exactly the correct value
898 scratch = new char[num_to_read];
900 memcpy(scratch, fragment, bytes_read);
901 reader->Skip(bytes_read);
903 while (bytes_read < num_to_read) {
904 fragment = reader->Peek(&fragment_size);
905 size_t n = min<size_t>(fragment_size, num_to_read - bytes_read);
906 memcpy(scratch + bytes_read, fragment, n);
910 DCHECK_EQ(bytes_read, num_to_read);
912 fragment_size = num_to_read;
914 DCHECK_EQ(fragment_size, num_to_read);
916 // Get encoding table for compression
918 uint16* table = wmem.GetHashTable(num_to_read, &table_size);
920 // Compress input_fragment and append to dest
921 const int max_output = MaxCompressedLength(num_to_read);
923 // Need a scratch buffer for the output, in case the byte sink doesn't
924 // have room for us directly.
925 if (scratch_output == NULL) {
926 scratch_output = new char[max_output];
928 // Since we encode kBlockSize regions followed by a region
929 // which is <= kBlockSize in length, a previously allocated
930 // scratch_output[] region is big enough for this iteration.
932 char* dest = writer->GetAppendBuffer(max_output, scratch_output);
933 char* end = internal::CompressFragment(fragment, fragment_size,
934 dest, table, table_size);
935 writer->Append(dest, end - dest);
936 written += (end - dest);
939 reader->Skip(pending_advance);
943 delete[] scratch_output;
948 // -----------------------------------------------------------------------
949 // Flat array interfaces
950 // -----------------------------------------------------------------------
952 // A type that writes to a flat array.
953 // Note that this is not a "ByteSink", but a type that matches the
954 // Writer template argument to SnappyDecompressor::DecompressAllTags().
955 class SnappyArrayWriter {
962 inline explicit SnappyArrayWriter(char* dst)
967 inline void SetExpectedLength(size_t len) {
968 op_limit_ = op_ + len;
971 inline bool CheckLength() const {
972 return op_ == op_limit_;
975 inline bool Append(const char* ip, size_t len) {
977 const size_t space_left = op_limit_ - op;
978 if (space_left < len) {
986 inline bool TryFastAppend(const char* ip, size_t available, size_t len) {
988 const size_t space_left = op_limit_ - op;
989 if (len <= 16 && available >= 16 && space_left >= 16) {
990 // Fast path, used for the majority (about 95%) of invocations.
991 UnalignedCopy64(ip, op);
992 UnalignedCopy64(ip + 8, op + 8);
1000 inline bool AppendFromSelf(size_t offset, size_t len) {
1002 const size_t space_left = op_limit_ - op;
1004 if (op - base_ <= offset - 1u) { // -1u catches offset==0
1007 if (len <= 16 && offset >= 8 && space_left >= 16) {
1008 // Fast path, used for the majority (70-80%) of dynamic invocations.
1009 UnalignedCopy64(op - offset, op);
1010 UnalignedCopy64(op - offset + 8, op + 8);
1012 if (space_left >= len + kMaxIncrementCopyOverflow) {
1013 IncrementalCopyFastPath(op - offset, op, len);
1015 if (space_left < len) {
1018 IncrementalCopy(op - offset, op, len);
1027 bool RawUncompress(const char* compressed, size_t n, char* uncompressed) {
1028 ByteArraySource reader(compressed, n);
1029 return RawUncompress(&reader, uncompressed);
1032 bool RawUncompress(Source* compressed, char* uncompressed) {
1033 SnappyArrayWriter output(uncompressed);
1034 return InternalUncompress(compressed, &output, kuint32max);
1037 bool Uncompress(const char* compressed, size_t n, string* uncompressed) {
1039 if (!GetUncompressedLength(compressed, n, &ulength)) {
1042 // Protect against possible DoS attack
1043 if ((static_cast<uint64>(ulength) + uncompressed->size()) >
1044 uncompressed->max_size()) {
1047 STLStringResizeUninitialized(uncompressed, ulength);
1048 return RawUncompress(compressed, n, string_as_array(uncompressed));
1052 // A Writer that drops everything on the floor and just does validation
1053 class SnappyDecompressionValidator {
1059 inline SnappyDecompressionValidator() : produced_(0) { }
1060 inline void SetExpectedLength(size_t len) {
1063 inline bool CheckLength() const {
1064 return expected_ == produced_;
1066 inline bool Append(const char* ip, size_t len) {
1068 return produced_ <= expected_;
1070 inline bool TryFastAppend(const char* ip, size_t available, size_t length) {
1073 inline bool AppendFromSelf(size_t offset, size_t len) {
1074 if (produced_ <= offset - 1u) return false; // -1u catches offset==0
1076 return produced_ <= expected_;
1080 bool IsValidCompressedBuffer(const char* compressed, size_t n) {
1081 ByteArraySource reader(compressed, n);
1082 SnappyDecompressionValidator writer;
1083 return InternalUncompress(&reader, &writer, kuint32max);
1086 void RawCompress(const char* input,
1087 size_t input_length,
1089 size_t* compressed_length) {
1090 ByteArraySource reader(input, input_length);
1091 UncheckedByteArraySink writer(compressed);
1092 Compress(&reader, &writer);
1094 // Compute how many bytes were added
1095 *compressed_length = (writer.CurrentDestination() - compressed);
1098 size_t Compress(const char* input, size_t input_length, string* compressed) {
1099 // Pre-grow the buffer to the max length of the compressed output
1100 compressed->resize(MaxCompressedLength(input_length));
1102 size_t compressed_length;
1103 RawCompress(input, input_length, string_as_array(compressed),
1104 &compressed_length);
1105 compressed->resize(compressed_length);
1106 return compressed_length;
1110 } // end namespace snappy