[BusyBox] [patch] Next bzunzip2 cleanup.

Rob Landley rob at landley.net
Mon Oct 6 04:31:42 UTC 2003


This patch goes on top of the previous one.  This zaps the entire wrapper 
around BZ2_bzDecompress(), replacing it with one 73-line function.  This 
trims about 350 bytes off the executable size, and makes the code MUCH easier 
to read.

Altogether, with the two patches, a busybox executable that just implements 
bunzip2 (and bzcat) went from an even 15000 bytes to 14569 byes.  (With the 
first patch by itself, it was 14920 bytes).

Strangely, decompress_bunzip2.o went from 11056 to 10192 bytes.  I don't know 
why and 864 byte drop in the object file only resulted in a 431 byte drop in 
the executable; I didn't touch the other files.  (Anybody know this one?)

Once again, ./busybox bzcat of the kernel tarball (33 megabytes as a bzip, 
over a hundred uncompressed), piped to gnu tar tv, produced no errors.  I 
didn't actually mess with the decompression state machine at all, so there's 
no reason to think it would hurt data in a non-obvious way.

Here's the patch...

Rob
--- busybox2/archival/libunarchive/decompress_bunzip2.c	2003-10-05 
23:20:49.977010392 -0500
+++ busybox-new/archival/libunarchive/decompress_bunzip2.c	2003-10-05 
23:25:35.266639776 -0500
@@ -78,7 +78,6 @@
 #define BZ_RUNA 0
 #define BZ_RUNB 1
 
-#define BZ_MAX_UNUSED 5000
 #define FILE_NAME_LEN 1034
 /*-- states for decompression. --*/
 
@@ -224,18 +223,7 @@
 	int	*save_gPerm;
 } DState;
 
-typedef struct {
-	DState	*dstate;
-	int			fd;
-	unsigned char	initialisedOk;
-	char	buf[BZ_MAX_UNUSED];
-	int		lastErr;
-	int		bufN;
-} bzFile;
-
 static int BZ2_rNums[512];
-static bzFile *bzf;
-static int bzerr = BZ_OK;
 
 static const unsigned int BZ2_crc32Table[256] = {
 
@@ -1271,20 +1259,6 @@
 	return retVal;   
 }
 
-extern void BZ2_bzReadClose(void)
-{
-	if (bzf->initialisedOk) {
-		DState *s=bzf->dstate;
-		if (s == NULL) {
-			return;
-		}
-		free(s->tt);
-		free(s);
-		return;
-	}
-	free(bzf);
-}
-
 static void unRLE_obuf_to_output_FAST(DState *s)
 {
 	unsigned char k1;
@@ -1523,124 +1497,79 @@
 	return(0);  /*NOTREACHED*/
 }
 
-extern ssize_t read_bz2(int fd, void *buf, size_t count)
-{
-	int n, ret;
-	bz_stream *strm=&(bzf->dstate->strm);
-
-	bzerr = BZ_OK;
-	if (count == 0) {
-		return(0);
-	}
-	strm->avail_out = count;
-	strm->next_out = buf;
-
-	while (1) {
-		if (strm->avail_in == 0) {
-			n = bb_xread(bzf->fd, bzf->buf, BZ_MAX_UNUSED);
-			if (n == 0) {
-				break;
-			}
-			bzf->bufN = n;
-			strm->avail_in = bzf->bufN;
-			strm->next_in = bzf->buf;
-		}
-
-		ret = BZ2_bzDecompress(bzf->dstate);
-
-		if ((ret != BZ_OK) && (ret != BZ_STREAM_END)) {
-			bb_error_msg_and_die("Error decompressing");
-		}
-
-		if (ret == BZ_STREAM_END) {
-			bzerr = BZ_STREAM_END;
-			return(count - strm->avail_out);
-		}
-		if (strm->avail_out == 0) {
-			bzerr = BZ_OK;
-			return(count);
-		}
-	}
-	return(0);
-}
-
-extern void BZ2_bzReadOpen(int fd, void *unused, int nUnused)
-{
-	DState *s;
-
-	bzf = xmalloc(sizeof(bzFile));
-	bzf->initialisedOk = FALSE;
-	bzf->fd        = fd;
-	bzf->bufN      = 0;
-
-	s = xmalloc(sizeof(DState));
-	bzf->dstate = s;
-	s->state = BZ_X_MAGIC_1;
-	s->bsLive = 0;
-	s->bsBuff = 0;
-	s->calculatedCombinedCRC = 0;
-	s->tt = NULL;
-	s->currBlockNo = 0;
-
-	while (nUnused > 0) {
-		bzf->buf[bzf->bufN] = *((unsigned char *)(unused));
-		bzf->bufN++;
-		unused = ((void *)( 1 + ((unsigned char *)(unused))  ));
-		nUnused--;
-	}
-	s->strm.avail_in = bzf->bufN;
-	s->strm.next_in  = bzf->buf;
-
-	bzf->initialisedOk = TRUE;
-
-	return;
-}
+#define BZ2_BUFSIZ 5000
 
 extern unsigned char uncompressStream(int src_fd, int dst_fd)
 {
-	unsigned char unused[BZ_MAX_UNUSED];
-	unsigned char *unusedTmp;
-	unsigned char obuf[5000];
-	int nread;
-	int nUnused;
-	int streamNo;
-	int i;
-
-	nUnused = 0;
-	streamNo = 0;
-
-	while(1) {
-		BZ2_bzReadOpen(src_fd, unused, nUnused);
-		streamNo++;
-
-		while (bzerr == BZ_OK) {
-			nread = read_bz2(src_fd, obuf, 5000);
-			if (bzerr == BZ_DATA_ERROR_MAGIC) {
-				bb_error_msg_and_die("invalid magic");
-			}
-			if (((bzerr == BZ_OK) || (bzerr == BZ_STREAM_END)) && (nread > 0)) {
-				if (write(dst_fd, obuf, nread) != nread) {
-					BZ2_bzReadClose();
+	DState *decomp_state;
+	bz_stream *strm;
+	char *in_buf, *out_buf;
+	int temp,ret=BZ_OK;
+
+	/* Note: the decompression state is potentially multiple megabytes.  Don't
+	   memset it on alloc, there are huge embedded arrays that may be sparsely
+	   used.  The I/O buffers don't care about being zeroed either. */
+
+	decomp_state = xmalloc(sizeof(DState)+2*BZ2_BUFSIZ);
+	in_buf=((char*)decomp_state)+sizeof(DState);
+	out_buf=in_buf+BZ2_BUFSIZ;
+
+	/* If !avail_in, next_in will get set by read, so save a byte or two by not
+	   initializing it here. */
+	strm = &(decomp_state->strm);
+	strm->avail_in = 0;
+	strm->next_out = out_buf;
+	strm->avail_out = BZ2_BUFSIZ;
+
+	/* Read through all the compressed data blocks in the file. */
+
+	for(;;) {
+		/* Initialize decompression state for start of new compressed block. */
+
+		decomp_state->state = BZ_X_MAGIC_1;
+		decomp_state->bsLive = 0;
+		decomp_state->bsBuff = 0;
+		decomp_state->calculatedCombinedCRC = 0;
+		decomp_state->tt = NULL;
+		decomp_state->currBlockNo = 0;
+
+		/* Decompress block. */
+
+		for(;;) {
+
+			/* If our output space isn't empty, write it to dst_fd. */
+
+			temp = strm->next_out-out_buf;
+			if(temp) {
+				if(write(dst_fd, out_buf, temp) != temp)
 					bb_perror_msg_and_die("Couldnt write to file");
-				}
+				strm->next_out = out_buf;
+				strm->avail_out = BZ2_BUFSIZ;
 			}
-		}
-		nUnused = bzf->dstate->strm.avail_in;
-		unusedTmp = bzf->dstate->strm.next_in;
 
-		for (i = 0; i < nUnused; i++) {
-			unused[i] = unusedTmp[i];
-		}
-		BZ2_bzReadClose();
-		if (nUnused == 0) {
-			break;
+			/* If we're out of input data, read more from src_fd. */
+			if(!strm->avail_in) {
+				temp = bb_xread(src_fd, in_buf, BZ2_BUFSIZ);
+				if(!temp) goto free_and_return;
+				decomp_state->strm.next_in = in_buf;
+				decomp_state->strm.avail_in = temp;
+			}
+
+			/* Decompress input buf to output buf until one or the other runs
+			   out, or we get an error. */
+			ret = BZ2_bzDecompress(decomp_state);
+			if(ret!=BZ_OK && ret!=BZ_STREAM_END) goto free_and_return;
 		}
 	}
 
-	close(src_fd);
-	if (dst_fd != fileno(stdout)) {
-		close(dst_fd);
-	}
+free_and_return:
+	/* Decompression is done, for whatever reason.  Free memory and return. */
+
+	free(decomp_state);
+	if(ret==BZ_DATA_ERROR_MAGIC)
+		bb_error_msg_and_die("Non-bzip data found");
+	if(ret!=BZ_STREAM_END && ret!=BZ_OK)
+		bb_error_msg_and_die("Error decompressing");
+
 	return TRUE;
 }
-




More information about the busybox mailing list