[BusyBox] Patch: libbb/unzip.c size reduction

Aaron Lehmann aaronl at vitelus.com
Sat Dec 1 02:36:32 UTC 2001


On Sat, Dec 01, 2001 at 07:05:40PM +1100, rbrown64 at csc.com.au wrote:
> 2001-12-01  Rodney Brown  <rbrown64 at csc.com.au>
>      * libbb/unzip.c: Space reductions.
>      * archival/bunzip2.c: Declare BZ2_crc32Table const.

I couldn't get this to apply to CVS, so I hand-patched it.

Then I did a few space optimizations of my own (removed unnecessary
function, changed a commonly-used macro to a function...). This
resulted in a 288 byte reduction in binary size.

                           text    data     bss     dec     hex
After Rodney's diff:       243102    2812  203172  449086   6da3e
Above + my improvements:   242830    2812  203172  448814   6d92e

I think a good improvement for the future would be sharing many global
variables between unzip and gzip. Since these can't both be running at
the same time in the same process, there would be no problem with
this. *Theoretically*, the number of bytes of global variables that
the entire binary needs equals the sum of the sizes of the global
variables in the largest applet, but can you imagine how awful the
code would look after doing that kind of compression?

Attached is my diff, which is Rodney's patch brought up to CVS, plus
my "improvements". I verified the md5sums of binaries that were
compressed with both /bin/gzip and the patched busybox gzip and then
decompressed by the patched unzip code.

P.S. busybox gzip seems to be 2/3 as fast as /bin/gzip. I think this
is probably acceptable, and doubt it it due to any of my changes.
Still, it may be worth looking into at some point.

P.P.S. Erik, please look at my ash ansification diff sometime so that
I can do further work against that and submit it.
-------------- next part --------------
Index: Changelog
===================================================================
RCS file: /var/cvs/busybox/Changelog,v
retrieving revision 1.274
diff -u -r1.274 Changelog
--- Changelog	2001/11/19 21:07:14	1.274
+++ Changelog	2001/12/01 09:22:31
@@ -15,7 +15,7 @@
 	    -- a whole bunch of ash size optimizations
 	    -- Fix for ash leading redirections (i.e. '2>/dev/null ls rubbish')
 	* Rodney Brown  <RDBrown at mira.net> 
-	    -- Optimized gzip.c, shrinking it be ~1.5k
+	    -- Optimized gzip.c, shrinking it by ~1.5k
 	* Matt Kraai
 	    -- Fix sed s/[/]// handling (closes: #1208).
 	    -- Fix `-/bin/sh' invocation (closes: #1209).
Index: TODO
===================================================================
RCS file: /var/cvs/busybox/TODO,v
retrieving revision 1.81
diff -u -r1.81 TODO
--- TODO	2001/11/19 22:51:41	1.81
+++ TODO	2001/12/01 09:22:32
@@ -56,3 +56,8 @@
 xargs could use a -l option
 
 ------------------------------------------------------------------
+
+libbb/unzip.c and archival/gzip.c have common constant static arrays and
+code for initializing the CRC array. Both use CRC-32 and could use
+common code for CRC calculation. Within archival/gzip.c, the CRC
+array should be malloc-ed as it is in libbb/unzip.c .
Index: archival/bunzip2.c
===================================================================
RCS file: /var/cvs/busybox/archival/bunzip2.c,v
retrieving revision 1.3
diff -u -r1.3 bunzip2.c
--- archival/bunzip2.c	2001/11/18 14:20:25	1.3
+++ archival/bunzip2.c	2001/12/01 09:22:33
@@ -276,7 +276,7 @@
 int numFilesProcessed;
 int exitValue;
 
-unsigned int BZ2_crc32Table[256] = {
+const unsigned int BZ2_crc32Table[256] = {
 
    /*-- Ugly, innit? --*/
 
Index: archival/gzip.c
===================================================================
RCS file: /var/cvs/busybox/archival/gzip.c,v
retrieving revision 1.50
diff -u -r1.50 gzip.c
--- archival/gzip.c	2001/10/24 04:59:09	1.50
+++ archival/gzip.c	2001/12/01 09:22:35
@@ -174,15 +174,6 @@
 #define put_byte(c) {outbuf[outcnt++]=(uch)(c); if (outcnt==OUTBUFSIZ)\
    flush_outbuf();}
 
-/* Output a 16 bit value, lsb first */
-#define put_short(w) \
-{ if (outcnt < OUTBUFSIZ-2) { \
-    outbuf[outcnt++] = (uch) ((w) & 0xff); \
-    outbuf[outcnt++] = (uch) ((ush)(w) >> 8); \
-  } else { \
-	put_short_when_full(w); \
-  } \
-}
 
 /* Output a 32 bit value to the bit stream, lsb first */
 #if 0
@@ -247,9 +238,6 @@
 	/* from util.c: */
 static void flush_outbuf (void);
 
-static void put_short_when_full (ush);
-
-
 /* lzw.h -- define the lzw functions.
  * Copyright (C) 1992-1993 Jean-loup Gailly.
  * This is free software; you can redistribute it and/or modify it under the
@@ -336,6 +324,19 @@
 static unsigned insize;				/* valid bytes in inbuf */
 static unsigned outcnt;				/* bytes in output buffer */
 
+
+/* Output a 16 bit value, lsb first */
+static void put_short(ush w)
+{
+  if (outcnt < OUTBUFSIZ-2) {
+    outbuf[outcnt++] = (uch) ((w) & 0xff);
+    outbuf[outcnt++] = (uch) ((ush)(w) >> 8);
+  } else {
+    put_byte((uch)((w) & 0xff));
+    put_byte((uch)((ush)(w) >> 8));
+  }
+}
+
 /* ========================================================================
  * Signal and error handler.
  */
@@ -1481,7 +1482,7 @@
  * if we rely on DIST_BUFSIZE == LIT_BUFSIZE.
  */
 #if LIT_BUFSIZE > INBUFSIZ
-error cannot overlay l_buf and inbuf
+#error cannot overlay l_buf and inbuf
 #endif
 #define REP_3_6      16
 /* repeat previous bit length 3-6 times (2 bits of repeat count) */
@@ -2462,21 +2463,10 @@
 static ulg crc;					/* crc on uncompressed file data */
 static long header_bytes;				/* number of bytes in gzip header */
 
-static void put_short_when_full(ush w)
-{
-	put_byte((uch)((w) & 0xff));
-	put_byte((uch)((ush)(w) >> 8));
-}
-
-static void put_short_function(ush n)
-{
-	put_short(n);
-}
-
 static void put_long(ulg n)
 {
-	put_short_function((n) & 0xffff);
-	put_short_function(((ulg)(n)) >> 16);
+	put_short((n) & 0xffff);
+	put_short(((ulg)(n)) >> 16);
 }
 
 /* put_header_byte is used for the compressed output
Index: libbb/unzip.c
===================================================================
RCS file: /var/cvs/busybox/libbb/unzip.c,v
retrieving revision 1.11
diff -u -r1.11 unzip.c
--- libbb/unzip.c	2001/11/29 06:36:56	1.11
+++ libbb/unzip.c	2001/12/01 09:22:36
@@ -124,19 +124,12 @@
 static void make_crc_table(void)
 {
 	unsigned long table_entry;      /* crc shift register */
-	unsigned long poly = 0;      /* polynomial exclusive-or pattern */
+	const unsigned long poly = 0xedb88320L;      /* polynomial exclusive-or pattern */
 	int i;                /* counter for all possible eight bit values */
 	int k;                /* byte being shifted into crc apparatus */
 
-	/* terms of polynomial defining this crc (except x^32): */
-	static int p[] = {0,1,2,4,5,7,8,10,11,12,16,22,23,26};
-
 	crc_table = (unsigned long *) malloc(256 * sizeof(unsigned long));
 
-	/* Make exclusive-or pattern from polynomial (0xedb88320) */
-	for (i = 0; i < sizeof(p)/sizeof(int); i++)
-		poly |= 1L << (31 - p[i]);
-
 	/* Compute and print table of CRC's, five per line */
 	for (i = 0; i < 256; i++) {
 		table_entry = i;
@@ -192,6 +185,8 @@
 	return 0;
 }
 
+typedef unsigned char extra_bits_t;
+
 /* Given a list of code lengths and a maximum table size, make a set of
  * tables to decode that set of codes.  Return zero on success, one if
  * the given code set is incomplete (the tables are still built in this
@@ -207,7 +202,7 @@
  * m:	maximum lookup bits, returns actual
  */
 static int huft_build(unsigned int *b, const unsigned int n, const unsigned int s, 
-	const unsigned short *d, const unsigned short *e, huft_t **t, int *m)
+	const unsigned short *d, const extra_bits_t *e, huft_t **t, int *m)
 {
 	unsigned a;		/* counter for codes of length k */
 	unsigned c[BMAX + 1];	/* bit length count table */
@@ -499,6 +494,30 @@
 	return 0;
 }
 
+static const unsigned short cplens[] = {     /* Copy lengths for literal codes 257..285 */
+    3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31,
+    35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0
+};
+/* note: see note #13 above about the 258 in this list. */
+static const extra_bits_t cplext[] = {  /* Extra bits for literal codes 257..285 */
+    0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2,
+    3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0, 99, 99
+};                 /* 99==invalid */
+static const unsigned short cpdist[] = {     /* Copy offsets for distance codes 0..29 */
+    1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193,
+    257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145,
+    8193, 12289, 16385, 24577
+};
+static const extra_bits_t cpdext[] = {  /* Extra bits for distance codes */
+    0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6,
+    7, 7, 8, 8, 9, 9, 10, 10, 11, 11,
+    12, 12, 13, 13
+};
+/* Tables for deflate from PKZIP's appnote.txt. */
+static const extra_bits_t border[] = {  /* Order of the bit length code lengths */
+    16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15
+};
+
 /*
  * decompress an inflated block
  * e: last block flag
@@ -510,25 +529,6 @@
 	unsigned t;			/* block type */
 	register unsigned long b;			/* bit buffer */
 	register unsigned k;		/* number of bits in bit buffer */
-	static unsigned short cplens[] = {		/* Copy lengths for literal codes 257..285 */
-		3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31,
-		35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0
-	};
-	/* note: see note #13 above about the 258 in this list. */
-	static unsigned short cplext[] = {		/* Extra bits for literal codes 257..285 */
-		0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2,
-		3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0, 99, 99
-	};				/* 99==invalid */
-	static unsigned short cpdist[] = {		/* Copy offsets for distance codes 0..29 */
-		1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193,
-		257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145,
-		8193, 12289, 16385, 24577
-	};
-	static unsigned short cpdext[] = {		/* Extra bits for distance codes */
-		0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6,
-		7, 7, 8, 8, 9, 9, 10, 10, 11, 11,
-		12, 12, 13, 13
-	};
 
 	/* make local bit buffer */
 	b = bb;
@@ -667,12 +667,8 @@
 		}
 	case 2:	/* Inflate dynamic */
 		{
-			/* Tables for deflate from PKZIP's appnote.txt. */
-			static unsigned border[] = {	/* Order of the bit length code lengths */
-				16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15
-			};
-			int dbits = 6;					/* bits in base distance lookup table */
-			int lbits = 9;					/* bits in base literal/length lookup table */
+			const int dbits = 6;					/* bits in base distance lookup table */
+			const int lbits = 9;					/* bits in base literal/length lookup table */
 
 			int i;						/* temporary variables */
 			unsigned j;


More information about the busybox mailing list