[packages] gzip: add rsyncable support

git-svn-id: svn://svn.openwrt.org/openwrt/packages@30620 3c298f89-4303-0410-b956-a3cf2f4a3e73
This commit is contained in:
swalker 2012-02-17 20:31:05 +00:00
parent 3fa7ba6f05
commit 56eb3b309a
2 changed files with 250 additions and 2 deletions

View File

@ -1,5 +1,5 @@
#
# Copyright (C) 2006-2010 OpenWrt.org
# Copyright (C) 2006-2012 OpenWrt.org
#
# This is free software, licensed under the GNU General Public License v2.
# See /LICENSE for more information.
@ -9,7 +9,7 @@ include $(TOPDIR)/rules.mk
PKG_NAME:=gzip
PKG_VERSION:=1.4
PKG_RELEASE:=1
PKG_RELEASE:=2
PKG_SOURCE:=$(PKG_NAME)-$(PKG_VERSION).tar.gz
PKG_SOURCE_URL:=@GNU/gzip

View File

@ -0,0 +1,248 @@
--- a/deflate.c
+++ b/deflate.c
@@ -131,6 +131,14 @@
#endif
/* Matches of length 3 are discarded if their distance exceeds TOO_FAR */
+#ifndef RSYNC_WIN
+# define RSYNC_WIN 8192
+#endif
+/* Size of rsync window, must be < MAX_DIST */
+
+#define RSYNC_SUM_MATCH(sum) (((sum) & (RSYNC_WIN - 1)) == 0)
+/* Whether window sum matches magic value */
+
/* ===========================================================================
* Local data used by the "longest match" routines.
*/
@@ -212,6 +220,8 @@ local int compr_level;
unsigned near good_match;
/* Use a faster search when the previous match is longer than this */
+local ulg rsync_sum; /* rolling sum of rsync window */
+local ulg rsync_chunk_end; /* next rsync sequence point */
/* Values for max_lazy_match, good_match and max_chain_length, depending on
* the desired pack level (0..9). The values given below have been tuned to
@@ -307,6 +317,10 @@ void lm_init (pack_level, flags)
#endif
/* prev will be initialized on the fly */
+ /* rsync params */
+ rsync_chunk_end = 0xFFFFFFFFUL;
+ rsync_sum = 0;
+
/* Set the default configuration parameters:
*/
max_lazy_match = configuration_table[pack_level].max_lazy;
@@ -324,6 +338,7 @@ void lm_init (pack_level, flags)
strstart = 0;
block_start = 0L;
+ rsync_chunk_end = 0xFFFFFFFFUL;
#ifdef ASMV
match_init(); /* initialize the asm code */
#endif
@@ -543,6 +558,8 @@ local void fill_window()
memcpy((char*)window, (char*)window+WSIZE, (unsigned)WSIZE);
match_start -= WSIZE;
strstart -= WSIZE; /* we now have strstart >= MAX_DIST: */
+ if (rsync_chunk_end != 0xFFFFFFFFUL)
+ rsync_chunk_end -= WSIZE;
block_start -= (long) WSIZE;
@@ -570,6 +587,39 @@ local void fill_window()
}
}
+local void rsync_roll(start, num)
+ unsigned start;
+ unsigned num;
+{
+ unsigned i;
+
+ if (start < RSYNC_WIN) {
+ /* before window fills. */
+ for (i = start; i < RSYNC_WIN; i++) {
+ if (i == start + num) return;
+ rsync_sum += (ulg)window[i];
+ }
+ num -= (RSYNC_WIN - start);
+ start = RSYNC_WIN;
+ }
+
+ /* buffer after window full */
+ for (i = start; i < start+num; i++) {
+ /* New character in */
+ rsync_sum += (ulg)window[i];
+ /* Old character out */
+ rsync_sum -= (ulg)window[i - RSYNC_WIN];
+ if (rsync_chunk_end == 0xFFFFFFFFUL && RSYNC_SUM_MATCH(rsync_sum))
+ rsync_chunk_end = i;
+ }
+}
+
+/* ===========================================================================
+ * Set rsync_chunk_end if window sum matches magic value.
+ */
+#define RSYNC_ROLL(s, n) \
+ do { if (rsync) rsync_roll((s), (n)); } while(0)
+
/* ===========================================================================
* Flush the current block, with given end-of-file flag.
* IN assertion: strstart is set to the end of the current match.
@@ -617,6 +667,7 @@ local off_t deflate_fast()
lookahead -= match_length;
+ RSYNC_ROLL(strstart, match_length);
/* Insert new strings in the hash table only if the match length
* is not too large. This saves time but degrades compression.
*/
@@ -645,9 +696,18 @@ local off_t deflate_fast()
/* No match, output a literal byte */
Tracevv((stderr,"%c",window[strstart]));
flush = ct_tally (0, window[strstart]);
+ RSYNC_ROLL(strstart, 1);
lookahead--;
strstart++;
}
+ if (rsync && strstart > rsync_chunk_end) {
+ ush attr = 0; /* ascii/binary flag */
+
+ flush = 1;
+ /* Reset huffman tree */
+ ct_init(&attr, &method);
+ rsync_chunk_end = 0xFFFFFFFFUL;
+ }
if (flush) FLUSH_BLOCK(0), block_start = strstart;
/* Make sure that we always have enough lookahead, except
@@ -721,6 +781,7 @@ off_t deflate()
*/
lookahead -= prev_length-1;
prev_length -= 2;
+ RSYNC_ROLL(strstart, prev_length+1);
do {
strstart++;
INSERT_STRING(strstart, hash_head);
@@ -733,24 +794,51 @@ off_t deflate()
match_available = 0;
match_length = MIN_MATCH-1;
strstart++;
- if (flush) FLUSH_BLOCK(0), block_start = strstart;
+ if (rsync && strstart > rsync_chunk_end) {
+ ush attr = 0; /* ascii/binary flag */
+
+ /* Reset huffman tree */
+ ct_init(&attr, &method);
+ rsync_chunk_end = 0xFFFFFFFFUL;
+ flush = 1;
+ }
+ if (flush) FLUSH_BLOCK(0), block_start = strstart;
} else if (match_available) {
/* If there was no match at the previous position, output a
* single literal. If there was a match but the current match
* is longer, truncate the previous match to a single literal.
*/
Tracevv((stderr,"%c",window[strstart-1]));
- if (ct_tally (0, window[strstart-1])) {
- FLUSH_BLOCK(0), block_start = strstart;
- }
+ flush = ct_tally (0, window[strstart-1]);
+ if (rsync && strstart > rsync_chunk_end) {
+ ush attr = 0; /* ascii/binary flag */
+
+ /* Reset huffman tree */
+ ct_init(&attr, &method);
+ rsync_chunk_end = 0xFFFFFFFFUL;
+
+ flush = 1;
+ }
+ if (flush) FLUSH_BLOCK(0), block_start = strstart;
+ RSYNC_ROLL(strstart, 1);
strstart++;
lookahead--;
} else {
/* There is no previous match to compare with, wait for
* the next step to decide.
*/
+ if (rsync && strstart > rsync_chunk_end) {
+ ush attr = 0; /* ascii/binary flag */
+
+ /* Reset huffman tree */
+ ct_init(&attr, &method);
+ rsync_chunk_end = 0xFFFFFFFFUL;
+
+ FLUSH_BLOCK(0), block_start = strstart;
+ }
match_available = 1;
+ RSYNC_ROLL(strstart, 1);
strstart++;
lookahead--;
}
--- a/doc/gzip.texi
+++ b/doc/gzip.texi
@@ -353,6 +353,14 @@ specified on the command line are direct
into the directory and compress all the files it finds there (or
decompress them in the case of @command{gunzip}).
+@item --rsyncable
+While compressing, synchronize the output occasionally based on the
+input. This can reduce the compression slightly in some cases, but
+means that the @code{rsync} program can take advantage of similarities
+in the uncompressed input when syncronizing two files compressed with
+this flag. @code{gunzip} cannot tell the difference between a
+compressed file created with this option, and one created without it.
+
@item --suffix @var{suf}
@itemx -S @var{suf}
Use suffix @var{suf} instead of @samp{.gz}. Any suffix can be
--- a/gzip.c
+++ b/gzip.c
@@ -218,6 +218,7 @@ int ofd; /* output fil
unsigned insize; /* valid bytes in inbuf */
unsigned inptr; /* index of next byte to be processed in inbuf */
unsigned outcnt; /* bytes in output buffer */
+int rsync = 0; /* make ryncable chunks */
static int handled_sig[] =
{
@@ -271,6 +272,7 @@ struct option longopts[] =
{"best", 0, 0, '9'}, /* compress better */
{"lzw", 0, 0, 'Z'}, /* make output compatible with old compress */
{"bits", 1, 0, 'b'}, /* max number of bits per code (implies -Z) */
+ {"rsyncable", 0, 0, 'R'}, /* make rsync-friendly archive */
{ 0, 0, 0, 0 }
};
@@ -352,6 +354,7 @@ local void help()
" -Z, --lzw produce output compatible with old compress",
" -b, --bits=BITS max number of bits per code (implies -Z)",
#endif
+ " --rsyncable Make rsync-friendly archive",
"",
"With no FILE, or when FILE is -, read standard input.",
"",
@@ -479,6 +482,9 @@ int main (int argc, char **argv)
recursive = 1;
#endif
break;
+ case 'R':
+ rsync = 1; break;
+
case 'S':
#ifdef NO_MULTIPLE_DOTS
if (*optarg == '.') optarg++;
--- a/gzip.h
+++ b/gzip.h
@@ -146,6 +146,7 @@ EXTERN(uch, window); /* Sliding
extern unsigned insize; /* valid bytes in inbuf */
extern unsigned inptr; /* index of next byte to be processed in inbuf */
extern unsigned outcnt; /* bytes in output buffer */
+extern int rsync; /* deflate into rsyncable chunks */
extern off_t bytes_in; /* number of input bytes */
extern off_t bytes_out; /* number of output bytes */