[git commit] bzip2: fix two crashes on corrupted archives

Denys Vlasenko vda.linux at googlemail.com
Sun Apr 8 18:05:04 UTC 2018


commit: https://git.busybox.net/busybox/commit/?id=38ccd6af8abbafff98d458a1c62909acfc09a514
branch: https://git.busybox.net/busybox/commit/?id=refs/heads/master

As it turns out, longjmp'ing into freed stack is not healthy...

function                                             old     new   delta
unpack_usage_messages                                  -      97     +97
unpack_bz2_stream                                    369     409     +40
get_next_block                                      1667    1677     +10
get_bits                                             156     155      -1
start_bunzip                                         212     183     -29
bb_show_usage                                        181     120     -61
------------------------------------------------------------------------------
(add/remove: 1/0 grow/shrink: 2/3 up/down: 147/-91)            Total: 56 bytes

Signed-off-by: Denys Vlasenko <vda.linux at googlemail.com>
---
 archival/libarchive/decompress_bunzip2.c |  78 ++++++++++++++++++++++---------
 archival/libarchive/decompress_gunzip.c  |   1 -
 coreutils/test.c                         |   1 -
 include/bb_archive.h                     |   2 +-
 libbb/appletlib.c                        |  17 +++++--
 miscutils/bbconfig.c                     |  19 ++++++--
 shell/ash.c                              |   1 -
 testsuite/bunzip2.tests                  |  16 +++++++
 testsuite/bz2_issue_11.bz2               | Bin 0 -> 12000 bytes
 testsuite/bz2_issue_12.bz2               | Bin 0 -> 11000 bytes
 10 files changed, 99 insertions(+), 36 deletions(-)

diff --git a/archival/libarchive/decompress_bunzip2.c b/archival/libarchive/decompress_bunzip2.c
index bec89edd3..7ef4e035f 100644
--- a/archival/libarchive/decompress_bunzip2.c
+++ b/archival/libarchive/decompress_bunzip2.c
@@ -100,7 +100,7 @@ struct bunzip_data {
 	unsigned dbufSize;
 
 	/* For I/O error handling */
-	jmp_buf jmpbuf;
+	jmp_buf *jmpbuf;
 
 	/* Big things go last (register-relative addressing can be larger for big offsets) */
 	uint32_t crc32Table[256];
@@ -127,7 +127,7 @@ static unsigned get_bits(bunzip_data *bd, int bits_wanted)
 			/* if "no input fd" case: in_fd == -1, read fails, we jump */
 			bd->inbufCount = read(bd->in_fd, bd->inbuf, IOBUF_SIZE);
 			if (bd->inbufCount <= 0)
-				longjmp(bd->jmpbuf, RETVAL_UNEXPECTED_INPUT_EOF);
+				longjmp(*bd->jmpbuf, RETVAL_UNEXPECTED_INPUT_EOF);
 			bd->inbufPos = 0;
 		}
 
@@ -151,12 +151,12 @@ static unsigned get_bits(bunzip_data *bd, int bits_wanted)
 
 	return bits;
 }
+//#define get_bits(bd, n) (dbg("%d:get_bits()", __LINE__), get_bits(bd, n))
 
 /* Unpacks the next block and sets up for the inverse Burrows-Wheeler step. */
 static int get_next_block(bunzip_data *bd)
 {
-	struct group_data *hufGroup;
-	int groupCount, *base, *limit, selector,
+	int groupCount, selector,
 		i, j, symCount, symTotal, nSelectors, byteCount[256];
 	uint8_t uc, symToByte[256], mtfSymbol[256], *selectors;
 	uint32_t *dbuf;
@@ -179,15 +179,19 @@ static int get_next_block(bunzip_data *bd)
 	i = get_bits(bd, 24);
 	j = get_bits(bd, 24);
 	bd->headerCRC = get_bits(bd, 32);
-	if ((i == 0x177245) && (j == 0x385090)) return RETVAL_LAST_BLOCK;
-	if ((i != 0x314159) || (j != 0x265359)) return RETVAL_NOT_BZIP_DATA;
+	if ((i == 0x177245) && (j == 0x385090))
+		return RETVAL_LAST_BLOCK;
+	if ((i != 0x314159) || (j != 0x265359))
+		return RETVAL_NOT_BZIP_DATA;
 
 	/* We can add support for blockRandomised if anybody complains.  There was
 	   some code for this in busybox 1.0.0-pre3, but nobody ever noticed that
 	   it didn't actually work. */
-	if (get_bits(bd, 1)) return RETVAL_OBSOLETE_INPUT;
+	if (get_bits(bd, 1))
+		return RETVAL_OBSOLETE_INPUT;
 	origPtr = get_bits(bd, 24);
-	if (origPtr > bd->dbufSize) return RETVAL_DATA_ERROR;
+	if (origPtr > bd->dbufSize)
+		return RETVAL_DATA_ERROR;
 
 	/* mapping table: if some byte values are never used (encoding things
 	   like ascii text), the compression code removes the gaps to have fewer
@@ -231,13 +235,21 @@ static int get_next_block(bunzip_data *bd)
 		/* Get next value */
 		int n = 0;
 		while (get_bits(bd, 1)) {
-			if (n >= groupCount) return RETVAL_DATA_ERROR;
+			if (n >= groupCount)
+				return RETVAL_DATA_ERROR;
 			n++;
 		}
 		/* Decode MTF to get the next selector */
 		tmp_byte = mtfSymbol[n];
 		while (--n >= 0)
 			mtfSymbol[n + 1] = mtfSymbol[n];
+//We catch it later, in the second loop where we use selectors[i].
+//Maybe this is a better place, though?
+//		if (tmp_byte >= groupCount) {
+//			dbg("%d: selectors[%d]:%d groupCount:%d",
+//					__LINE__, i, tmp_byte, groupCount);
+//			return RETVAL_DATA_ERROR;
+//		}
 		mtfSymbol[0] = selectors[i] = tmp_byte;
 	}
 
@@ -248,6 +260,8 @@ static int get_next_block(bunzip_data *bd)
 		uint8_t length[MAX_SYMBOLS];
 		/* 8 bits is ALMOST enough for temp[], see below */
 		unsigned temp[MAX_HUFCODE_BITS+1];
+		struct group_data *hufGroup;
+		int *base, *limit;
 		int minLen, maxLen, pp, len_m1;
 
 		/* Read Huffman code lengths for each symbol.  They're stored in
@@ -283,8 +297,10 @@ static int get_next_block(bunzip_data *bd)
 		/* Find largest and smallest lengths in this group */
 		minLen = maxLen = length[0];
 		for (i = 1; i < symCount; i++) {
-			if (length[i] > maxLen) maxLen = length[i];
-			else if (length[i] < minLen) minLen = length[i];
+			if (length[i] > maxLen)
+				maxLen = length[i];
+			else if (length[i] < minLen)
+				minLen = length[i];
 		}
 
 		/* Calculate permute[], base[], and limit[] tables from length[].
@@ -320,7 +336,8 @@ static int get_next_block(bunzip_data *bd)
 		/* Count symbols coded for at each bit length */
 		/* NB: in pathological cases, temp[8] can end ip being 256.
 		 * That's why uint8_t is too small for temp[]. */
-		for (i = 0; i < symCount; i++) temp[length[i]]++;
+		for (i = 0; i < symCount; i++)
+			temp[length[i]]++;
 
 		/* Calculate limit[] (the largest symbol-coding value at each bit
 		 * length, which is (previous limit<<1)+symbols at this level), and
@@ -363,12 +380,22 @@ static int get_next_block(bunzip_data *bd)
 
 	runPos = dbufCount = selector = 0;
 	for (;;) {
+		struct group_data *hufGroup;
+		int *base, *limit;
 		int nextSym;
+		uint8_t ngrp;
 
 		/* Fetch next Huffman coding group from list. */
 		symCount = GROUP_SIZE - 1;
-		if (selector >= nSelectors) return RETVAL_DATA_ERROR;
-		hufGroup = bd->groups + selectors[selector++];
+		if (selector >= nSelectors)
+			return RETVAL_DATA_ERROR;
+		ngrp = selectors[selector++];
+		if (ngrp >= groupCount) {
+			dbg("%d selectors[%d]:%d groupCount:%d",
+				__LINE__, selector-1, ngrp, groupCount);
+			return RETVAL_DATA_ERROR;
+		}
+		hufGroup = bd->groups + ngrp;
 		base = hufGroup->base - 1;
 		limit = hufGroup->limit - 1;
 
@@ -403,7 +430,8 @@ static int get_next_block(bunzip_data *bd)
 		}
 		/* Figure how many bits are in next symbol and unget extras */
 		i = hufGroup->minLen;
-		while (nextSym > limit[i]) ++i;
+		while (nextSym > limit[i])
+			++i;
 		j = hufGroup->maxLen - i;
 		if (j < 0)
 			return RETVAL_DATA_ERROR;
@@ -671,7 +699,10 @@ int FAST_FUNC read_bunzip(bunzip_data *bd, char *outbuf, int len)
 /* Because bunzip2 is used for help text unpacking, and because bb_show_usage()
    should work for NOFORK applets too, we must be extremely careful to not leak
    any allocations! */
-int FAST_FUNC start_bunzip(bunzip_data **bdp, int in_fd,
+int FAST_FUNC start_bunzip(
+		void *jmpbuf,
+		bunzip_data **bdp,
+		int in_fd,
 		const void *inbuf, int len)
 {
 	bunzip_data *bd;
@@ -683,11 +714,14 @@ int FAST_FUNC start_bunzip(bunzip_data **bdp, int in_fd,
 
 	/* Figure out how much data to allocate */
 	i = sizeof(bunzip_data);
-	if (in_fd != -1) i += IOBUF_SIZE;
+	if (in_fd != -1)
+		i += IOBUF_SIZE;
 
 	/* Allocate bunzip_data.  Most fields initialize to zero. */
 	bd = *bdp = xzalloc(i);
 
+	bd->jmpbuf = jmpbuf;
+
 	/* Setup input buffer */
 	bd->in_fd = in_fd;
 	if (-1 == in_fd) {
@@ -702,10 +736,6 @@ int FAST_FUNC start_bunzip(bunzip_data **bdp, int in_fd,
 	/* Init the CRC32 table (big endian) */
 	crc32_filltable(bd->crc32Table, 1);
 
-	/* Setup for I/O error handling via longjmp */
-	i = setjmp(bd->jmpbuf);
-	if (i) return i;
-
 	/* Ensure that file starts with "BZh['1'-'9']." */
 	/* Update: now caller verifies 1st two bytes, makes .gz/.bz2
 	 * integration easier */
@@ -752,8 +782,12 @@ unpack_bz2_stream(transformer_state_t *xstate)
 	outbuf = xmalloc(IOBUF_SIZE);
 	len = 0;
 	while (1) { /* "Process one BZ... stream" loop */
+		jmp_buf jmpbuf;
 
-		i = start_bunzip(&bd, xstate->src_fd, outbuf + 2, len);
+		/* Setup for I/O error handling via longjmp */
+		i = setjmp(jmpbuf);
+		if (i == 0)
+			i = start_bunzip(&jmpbuf, &bd, xstate->src_fd, outbuf + 2, len);
 
 		if (i == 0) {
 			while (1) { /* "Produce some output bytes" loop */
diff --git a/archival/libarchive/decompress_gunzip.c b/archival/libarchive/decompress_gunzip.c
index 9a58d10d4..7f9046b82 100644
--- a/archival/libarchive/decompress_gunzip.c
+++ b/archival/libarchive/decompress_gunzip.c
@@ -32,7 +32,6 @@
  *
  * Licensed under GPLv2 or later, see file LICENSE in this source tree.
  */
-#include <setjmp.h>
 #include "libbb.h"
 #include "bb_archive.h"
 
diff --git a/coreutils/test.c b/coreutils/test.c
index a8286525a..824ce3b5a 100644
--- a/coreutils/test.c
+++ b/coreutils/test.c
@@ -76,7 +76,6 @@
 //usage:       "1\n"
 
 #include "libbb.h"
-#include <setjmp.h>
 
 /* This is a NOFORK applet. Be very careful! */
 
diff --git a/include/bb_archive.h b/include/bb_archive.h
index a5c61e95b..b437f1920 100644
--- a/include/bb_archive.h
+++ b/include/bb_archive.h
@@ -210,7 +210,7 @@ const llist_t *find_list_entry2(const llist_t *list, const char *filename) FAST_
 
 /* A bit of bunzip2 internals are exposed for compressed help support: */
 typedef struct bunzip_data bunzip_data;
-int start_bunzip(bunzip_data **bdp, int in_fd, const void *inbuf, int len) FAST_FUNC;
+int start_bunzip(void *, bunzip_data **bdp, int in_fd, const void *inbuf, int len) FAST_FUNC;
 /* NB: read_bunzip returns < 0 on error, or the number of *unfilled* bytes
  * in outbuf. IOW: on EOF returns len ("all bytes are not filled"), not 0: */
 int read_bunzip(bunzip_data *bd, char *outbuf, int len) FAST_FUNC;
diff --git a/libbb/appletlib.c b/libbb/appletlib.c
index 022455da4..769b7881c 100644
--- a/libbb/appletlib.c
+++ b/libbb/appletlib.c
@@ -102,14 +102,21 @@ static const char *unpack_usage_messages(void)
 	char *outbuf = NULL;
 	bunzip_data *bd;
 	int i;
+	jmp_buf jmpbuf;
 
-	i = start_bunzip(&bd,
+	/* Setup for I/O error handling via longjmp */
+	i = setjmp(jmpbuf);
+	if (i == 0) {
+		i = start_bunzip(&jmpbuf,
+			&bd,
 			/* src_fd: */ -1,
 			/* inbuf:  */ packed_usage,
-			/* len:    */ sizeof(packed_usage));
-	/* read_bunzip can longjmp to start_bunzip, and ultimately
-	 * end up here with i != 0 on read data errors! Not trivial */
-	if (!i) {
+			/* len:    */ sizeof(packed_usage)
+		);
+	}
+	/* read_bunzip can longjmp and end up here with i != 0
+	 * on read data errors! Not trivial */
+	if (i == 0) {
 		/* Cannot use xmalloc: will leak bd in NOFORK case! */
 		outbuf = malloc_or_warn(sizeof(UNPACKED_USAGE));
 		if (outbuf)
diff --git a/miscutils/bbconfig.c b/miscutils/bbconfig.c
index 9ab57876e..501349548 100644
--- a/miscutils/bbconfig.c
+++ b/miscutils/bbconfig.c
@@ -44,13 +44,22 @@ int bbconfig_main(int argc UNUSED_PARAM, char **argv UNUSED_PARAM)
 {
 #if ENABLE_FEATURE_COMPRESS_BBCONFIG
 	bunzip_data *bd;
-	int i = start_bunzip(&bd,
+	int i;
+	jmp_buf jmpbuf;
+
+	/* Setup for I/O error handling via longjmp */
+	i = setjmp(jmpbuf);
+	if (i == 0) {
+		i = start_bunzip(&jmpbuf,
+			&bd,
 			/* src_fd: */ -1,
 			/* inbuf:  */ bbconfig_config_bz2,
-			/* len:    */ sizeof(bbconfig_config_bz2));
-	/* read_bunzip can longjmp to start_bunzip, and ultimately
-	 * end up here with i != 0 on read data errors! Not trivial */
-	if (!i) {
+			/* len:    */ sizeof(bbconfig_config_bz2)
+		);
+	}
+	/* read_bunzip can longjmp and end up here with i != 0
+	 * on read data errors! Not trivial */
+	if (i == 0) {
 		/* Cannot use xmalloc: will leak bd in NOFORK case! */
 		char *outbuf = malloc_or_warn(sizeof(bbconfig_config));
 		if (outbuf) {
diff --git a/shell/ash.c b/shell/ash.c
index 56fba4a57..24958c0fc 100644
--- a/shell/ash.c
+++ b/shell/ash.c
@@ -177,7 +177,6 @@
 
 #define JOBS ENABLE_ASH_JOB_CONTROL
 
-#include <setjmp.h>
 #include <fnmatch.h>
 #include <sys/times.h>
 #include <sys/utsname.h> /* for setting $HOSTNAME */
diff --git a/testsuite/bunzip2.tests b/testsuite/bunzip2.tests
index fcfce1a31..edb332748 100755
--- a/testsuite/bunzip2.tests
+++ b/testsuite/bunzip2.tests
@@ -552,6 +552,22 @@ if test "${0##*/}" = "bunzip2.tests"; then
 	echo "FAIL: $unpack: pbzip_4m_zeros file"
 	FAILCOUNT=$((FAILCOUNT + 1))
     fi
+
+    errout="`${bb}bunzip2 <bz2_issue_11.bz2 2>&1 >/dev/null`"
+    if test x"$errout:$?" = x"bunzip2: bunzip error -5:1"; then
+	echo "PASS: $unpack: bz2_issue_11.bz2 corrupted example"
+    else
+	echo "FAIL: $unpack: bz2_issue_11.bz2 corrupted example"
+	FAILCOUNT=$((FAILCOUNT + 1))
+    fi
+
+    errout="`${bb}bunzip2 <bz2_issue_12.bz2 2>&1 >/dev/null`"
+    if test x"$errout:$?" = x"bunzip2: bunzip error -3:1"; then
+	echo "PASS: $unpack: bz2_issue_12.bz2 corrupted example"
+    else
+	echo "FAIL: $unpack: bz2_issue_12.bz2 corrupted example"
+	FAILCOUNT=$((FAILCOUNT + 1))
+    fi
 fi
 
 exit $((FAILCOUNT <= 255 ? FAILCOUNT : 255))
diff --git a/testsuite/bz2_issue_11.bz2 b/testsuite/bz2_issue_11.bz2
new file mode 100644
index 000000000..62b252046
Binary files /dev/null and b/testsuite/bz2_issue_11.bz2 differ
diff --git a/testsuite/bz2_issue_12.bz2 b/testsuite/bz2_issue_12.bz2
new file mode 100644
index 000000000..4215f08d6
Binary files /dev/null and b/testsuite/bz2_issue_12.bz2 differ


More information about the busybox-cvs mailing list