[git commit master] decompress_unxz: newer version, one which can unpack SHA-256 protected files

Denys Vlasenko vda.linux at googlemail.com
Tue Jun 1 12:41:39 UTC 2010


commit: http://git.busybox.net/busybox/commit/?id=716f3f612e62c55edd052b505a86e4e2e09074a5
branch: http://git.busybox.net/busybox/commit/?id=refs/heads/master

function                                             old     new   delta
check_sizes                                            -      16     +16
crc32_table                                            -       4      +4
index_update                                          47      40      -7
crc32_validate                                       110      93     -17
dec_vli                                              197     165     -32
unpack_xz_stream                                    4284    4014    -270
------------------------------------------------------------------------------
(add/remove: 2/0 grow/shrink: 0/4 up/down: 20/-326)          Total: -306 bytes

Signed-off-by: Denys Vlasenko <vda.linux at googlemail.com>
---
 archival/libunarchive/decompress_unxz.c    |   18 +++--
 archival/libunarchive/unxz/xz.h            |   58 +++++++++------
 archival/libunarchive/unxz/xz_config.h     |    6 +-
 archival/libunarchive/unxz/xz_dec_bcj.c    |   10 ++-
 archival/libunarchive/unxz/xz_dec_stream.c |  113 ++++++++++++++++++++--------
 archival/libunarchive/unxz/xz_private.h    |    2 +-
 archival/libunarchive/unxz/xz_stream.h     |   15 +++-
 7 files changed, 154 insertions(+), 68 deletions(-)

diff --git a/archival/libunarchive/decompress_unxz.c b/archival/libunarchive/decompress_unxz.c
index 924a525..374b76d 100644
--- a/archival/libunarchive/decompress_unxz.c
+++ b/archival/libunarchive/decompress_unxz.c
@@ -16,9 +16,13 @@
 #define XZ_FUNC FAST_FUNC
 #define XZ_EXTERN static
 
-#define xz_crc32_init(table) crc32_filltable(table, /*endian:*/ 0)
-static uint32_t xz_crc32(uint32_t *crc32_table,
-		const uint8_t *buf, size_t size, uint32_t crc)
+/* Skip check (rather than fail) of unsupported hash functions */
+#define XZ_DEC_ANY_CHECK  1
+
+/* We use our own crc32 function */
+#define XZ_INTERNAL_CRC32 0
+static uint32_t *crc32_table;
+static uint32_t xz_crc32(const uint8_t *buf, size_t size, uint32_t crc)
 {
 	crc = ~crc;
 
@@ -29,8 +33,8 @@ static uint32_t xz_crc32(uint32_t *crc32_table,
 
 	return ~crc;
 }
-#define xz_crc32 xz_crc32
 
+/* We use arch-optimized unaligned accessors */
 #define get_unaligned_le32(buf) ({ uint32_t v; move_from_unaligned32(v, buf); SWAP_LE32(v); })
 #define get_unaligned_be32(buf) ({ uint32_t v; move_from_unaligned32(v, buf); SWAP_BE32(v); })
 #define put_unaligned_le32(val, buf) move_to_unaligned16(buf, SWAP_LE32(val))
@@ -64,8 +68,10 @@ unpack_xz_stream(int src_fd, int dst_fd)
 	iobuf.out = membuf + IN_SIZE;
 	iobuf.out_size = OUT_SIZE;
 
+	if (!crc32_table)
+		crc32_table = crc32_filltable(NULL, /*endian:*/ 0);
+
 	state = xz_dec_init(64*1024); /* initial dict of 64k */
-	xz_crc32_init(state->crc32_table);
 
 	while (1) {
 		enum xz_ret r;
@@ -102,7 +108,7 @@ unpack_xz_stream(int src_fd, int dst_fd)
 		) {
 			break;
 		}
-		if (r != XZ_OK) {
+		if (r != XZ_OK && r != XZ_UNSUPPORTED_CHECK) {
 			bb_error_msg("corrupted data");
 			total = -1;
 			break;
diff --git a/archival/libunarchive/unxz/xz.h b/archival/libunarchive/unxz/xz.h
index dbb9ba9..eb82706 100644
--- a/archival/libunarchive/unxz/xz.h
+++ b/archival/libunarchive/unxz/xz.h
@@ -31,20 +31,29 @@
 
 /**
  * enum xz_ret - Return codes
- * @XZ_OK:              Everything is OK so far. More input or more output
- *                      space is required to continue.
- * @XZ_STREAM_END:      Operation finished successfully.
- * @XZ_MEMLIMIT_ERROR:  Not enough memory was preallocated at decoder
- *                      initialization time.
- * @XZ_FORMAT_ERROR:    File format was not recognized (wrong magic bytes).
- * @XZ_OPTIONS_ERROR:   This implementation doesn't support the requested
- *                      compression options. In the decoder this means that
- *                      the header CRC32 matches, but the header itself
- *                      specifies something that we don't support.
- * @XZ_DATA_ERROR:      Compressed data is corrupt.
- * @XZ_BUF_ERROR:       Cannot make any progress. Details are slightly
- *                      different between multi-call and single-call mode;
- *                      more information below.
+ * @XZ_OK:                  Everything is OK so far. More input or more
+ *                          output space is required to continue.
+ * @XZ_STREAM_END:          Operation finished successfully.
+ * @XZ_UNSUPPORTED_CHECK:   Integrity check type is not supported. Decoding
+ *                          is still possible in multi-call mode by simply
+ *                          calling xz_dec_run() again.
+ *                          NOTE: This return value is used only if
+ *                          XZ_DEC_ANY_CHECK was defined at build time,
+ *                          which is not used in the kernel. Unsupported
+ *                          check types return XZ_OPTIONS_ERROR if
+ *                          XZ_DEC_ANY_CHECK was not defined at build time.
+ * @XZ_MEMLIMIT_ERROR:      Not enough memory was preallocated at decoder
+ *                          initialization time.
+ * @XZ_FORMAT_ERROR:        File format was not recognized (wrong magic
+ *                          bytes).
+ * @XZ_OPTIONS_ERROR:       This implementation doesn't support the requested
+ *                          compression options. In the decoder this means
+ *                          that the header CRC32 matches, but the header
+ *                          itself specifies something that we don't support.
+ * @XZ_DATA_ERROR:          Compressed data is corrupt.
+ * @XZ_BUF_ERROR:           Cannot make any progress. Details are slightly
+ *                          different between multi-call and single-call
+ *                          mode; more information below.
  *
  * In multi-call mode, XZ_BUF_ERROR is returned when two consecutive calls
  * to XZ code cannot consume any input and cannot produce any new output.
@@ -62,6 +71,7 @@
 enum xz_ret {
 	XZ_OK,
 	XZ_STREAM_END,
+	XZ_UNSUPPORTED_CHECK,
 	XZ_MEMLIMIT_ERROR,
 	XZ_FORMAT_ERROR,
 	XZ_OPTIONS_ERROR,
@@ -129,7 +139,7 @@ struct xz_dec;
  *
  * Because the output buffer is used as the workspace, streams encoded using
  * a big dictionary are not a problem in single-call. It is enough that the
- * output buffer is is big enough to hold the actual uncompressed data; it
+ * output buffer is big enough to hold the actual uncompressed data; it
  * can be smaller than the dictionary size stored in the stream headers.
  *
  * On success, xz_dec_init() returns a pointer to struct xz_dec, which is
@@ -186,23 +196,27 @@ XZ_EXTERN void XZ_FUNC xz_dec_end(struct xz_dec *s);
  * CRC32 module is used instead, and users of this module don't need to
  * care about the functions below.
  */
-#if !defined(__KERNEL__) || defined(XZ_INTERNAL_CRC32)
+#ifndef XZ_INTERNAL_CRC32
+#	ifdef __KERNEL__
+#		define XZ_INTERNAL_CRC32 0
+#	else
+#		define XZ_INTERNAL_CRC32 1
+#	endif
+#endif
+
+#if XZ_INTERNAL_CRC32
 /*
  * This must be called before any other xz_* function to initialize
  * the CRC32 lookup table.
  */
-#ifndef xz_crc32_init
-XZ_EXTERN void XZ_FUNC xz_crc32_init(uint32_t *crc32_table);
-#endif
+XZ_EXTERN void XZ_FUNC xz_crc32_init(void);
 
 /*
  * Update CRC32 value using the polynomial from IEEE-802.3. To start a new
  * calculation, the third argument must be zero. To continue the calculation,
  * the previously returned value is passed as the third argument.
  */
-#ifndef xz_crc32
-XZ_EXTERN uint32_t XZ_FUNC xz_crc32(uint32_t *crc32_table,
+XZ_EXTERN uint32_t XZ_FUNC xz_crc32(
 		const uint8_t *buf, size_t size, uint32_t crc);
 #endif
 #endif
-#endif
diff --git a/archival/libunarchive/unxz/xz_config.h b/archival/libunarchive/unxz/xz_config.h
index 3259815..ff90eff 100644
--- a/archival/libunarchive/unxz/xz_config.h
+++ b/archival/libunarchive/unxz/xz_config.h
@@ -43,7 +43,7 @@
  * becomes slow.
  *
  * NOTE: System headers on GNU/Linux may #define this macro already,
- * so if you want to change it, it you need to #undef it first.
+ * so if you want to change it, you need to #undef it first.
  */
 #ifndef __always_inline
 #	ifdef __GNUC__
@@ -114,6 +114,8 @@ static inline void XZ_FUNC put_unaligned_be32(uint32_t val, uint8_t *buf)
  * little endian systems, #define get_le32(ptr) (*(const uint32_t *)(ptr))
  * could save a few bytes in code size.
  */
-#define get_le32 get_unaligned_le32
+#ifndef get_le32
+#	define get_le32 get_unaligned_le32
+#endif
 
 #endif
diff --git a/archival/libunarchive/unxz/xz_dec_bcj.c b/archival/libunarchive/unxz/xz_dec_bcj.c
index d4b6ef7..09162b5 100644
--- a/archival/libunarchive/unxz/xz_dec_bcj.c
+++ b/archival/libunarchive/unxz/xz_dec_bcj.c
@@ -10,6 +10,12 @@
 
 #include "xz_private.h"
 
+/*
+ * The rest of the file is inside this ifdef. It makes things a little more
+ * convenient when building without support for any BCJ filters.
+ */
+#ifdef XZ_DEC_BCJ
+
 struct xz_dec_bcj {
 	/* Type of the BCJ filter being used */
 	enum {
@@ -331,7 +337,6 @@ static noinline_for_stack size_t XZ_FUNC bcj_sparc(
 }
 #endif
 
-#ifdef XZ_DEC_BCJ
 /*
  * Apply the selected BCJ filter. Update *pos and s->pos to match the amount
  * of data that got filtered.
@@ -388,9 +393,7 @@ static void XZ_FUNC bcj_apply(struct xz_dec_bcj *s,
 	*pos += filtered;
 	s->pos += filtered;
 }
-#endif
 
-#ifdef XZ_DEC_BCJ
 /*
  * Flush pending filtered data from temp to the output buffer.
  * Move the remaining mixture of possibly filtered and unfiltered
@@ -557,4 +560,5 @@ XZ_EXTERN enum xz_ret XZ_FUNC xz_dec_bcj_reset(
 
 	return XZ_OK;
 }
+
 #endif
diff --git a/archival/libunarchive/unxz/xz_dec_stream.c b/archival/libunarchive/unxz/xz_dec_stream.c
index 121c3b5..21db283 100644
--- a/archival/libunarchive/unxz/xz_dec_stream.c
+++ b/archival/libunarchive/unxz/xz_dec_stream.c
@@ -45,8 +45,8 @@ struct xz_dec {
 	/* CRC32 value in Block or Index */
 	uint32_t crc32;
 
-	/* True if CRC32 is calculated from uncompressed data */
-	uint8_t crc_type;
+	/* Type of the integrity check calculated from uncompressed data */
+	enum xz_check check_type;
 
 	/* True if we are operating in single-call mode. */
 	bool single_call;
@@ -134,9 +134,19 @@ struct xz_dec {
 	struct xz_dec_bcj *bcj;
 	bool bcj_active;
 #endif
+};
 
-	uint32_t crc32_table[256];
+#ifdef XZ_DEC_ANY_CHECK
+/* Sizes of the Check field with different Check IDs */
+static const uint8_t check_sizes[16] = {
+	0,
+	4, 4, 4,
+	8, 8, 8,
+	16, 16, 16,
+	32, 32, 32,
+	64, 64, 64
 };
+#endif
 
 /*
  * Fill s->temp by copying data starting from b->in[b->in_pos]. Caller
@@ -231,9 +241,8 @@ static enum xz_ret XZ_FUNC dec_block(struct xz_dec *s, struct xz_buf *b)
 				> s->block_header.uncompressed)
 		return XZ_DATA_ERROR;
 
-	if (s->crc_type == 0x01)
-		s->crc32 = xz_crc32(s->crc32_table,
-				b->out + s->out_start,
+	if (s->check_type == XZ_CHECK_CRC32)
+		s->crc32 = xz_crc32(b->out + s->out_start,
 				b->out_pos - s->out_start, s->crc32);
 
 	if (ret == XZ_STREAM_END) {
@@ -249,15 +258,16 @@ static enum xz_ret XZ_FUNC dec_block(struct xz_dec *s, struct xz_buf *b)
 
 		s->block.hash.unpadded += s->block_header.size
 				+ s->block.compressed;
-		if (s->crc_type == 0x01)
+
+#ifdef XZ_DEC_ANY_CHECK
+		s->block.hash.unpadded += check_sizes[s->check_type];
+#else
+		if (s->check_type == XZ_CHECK_CRC32)
 			s->block.hash.unpadded += 4;
-		if (s->crc_type == 0x04) /* CRC64 */
-			s->block.hash.unpadded += 8;
-		if (s->crc_type == 0x0A) /* SHA-256 */
-			s->block.hash.unpadded += 32;
+#endif
 
 		s->block.hash.uncompressed += s->block.uncompressed;
-		s->block.hash.crc32 = xz_crc32(s->crc32_table,
+		s->block.hash.crc32 = xz_crc32(
 				(const uint8_t *)&s->block.hash,
 				sizeof(s->block.hash), s->block.hash.crc32);
 
@@ -272,7 +282,7 @@ static void XZ_FUNC index_update(struct xz_dec *s, const struct xz_buf *b)
 {
 	size_t in_used = b->in_pos - s->in_start;
 	s->index.size += in_used;
-	s->crc32 = xz_crc32(s->crc32_table, b->in + s->in_start, in_used, s->crc32);
+	s->crc32 = xz_crc32(b->in + s->in_start, in_used, s->crc32);
 }
 
 /*
@@ -316,7 +326,7 @@ static enum xz_ret XZ_FUNC dec_index(struct xz_dec *s, struct xz_buf *b)
 
 		case SEQ_INDEX_UNCOMPRESSED:
 			s->index.hash.uncompressed += s->vli;
-			s->index.hash.crc32 = xz_crc32(s->crc32_table,
+			s->index.hash.crc32 = xz_crc32(
 					(const uint8_t *)&s->index.hash,
 					sizeof(s->index.hash),
 					s->index.hash.crc32);
@@ -352,31 +362,58 @@ static enum xz_ret XZ_FUNC crc32_validate(struct xz_dec *s, struct xz_buf *b)
 	return XZ_STREAM_END;
 }
 
+#ifdef XZ_DEC_ANY_CHECK
+/*
+ * Skip over the Check field when the Check ID is not supported.
+ * Returns true once the whole Check field has been skipped over.
+ */
+static bool XZ_FUNC check_skip(struct xz_dec *s, struct xz_buf *b)
+{
+	while (s->pos < check_sizes[s->check_type]) {
+		if (b->in_pos == b->in_size)
+			return false;
+
+		++b->in_pos;
+		++s->pos;
+	}
+
+	s->pos = 0;
+
+	return true;
+}
+#endif
+
 /* Decode the Stream Header field (the first 12 bytes of the .xz Stream). */
 static enum xz_ret XZ_FUNC dec_stream_header(struct xz_dec *s)
 {
 	if (!memeq(s->temp.buf, HEADER_MAGIC, HEADER_MAGIC_SIZE))
 		return XZ_FORMAT_ERROR;
 
-	if (xz_crc32(s->crc32_table, s->temp.buf + HEADER_MAGIC_SIZE, 2, 0)
+	if (xz_crc32(s->temp.buf + HEADER_MAGIC_SIZE, 2, 0)
 			!= get_le32(s->temp.buf + HEADER_MAGIC_SIZE + 2))
 		return XZ_DATA_ERROR;
 
+	if (s->temp.buf[HEADER_MAGIC_SIZE] != 0)
+		return XZ_OPTIONS_ERROR;
+
 	/*
-	 * Decode the Stream Flags field. Of integrity checks, we support
-	 * only none (Check ID = 0) and CRC32 (Check ID = 1).
-	 * We also accept CRC64 and SHA-256, but they will not be verified.
+	 * Of integrity checks, we support only none (Check ID = 0) and
+	 * CRC32 (Check ID = 1). However, if XZ_DEC_ANY_CHECK is defined,
+	 * we will accept other check types too, but then the check won't
+	 * be verified and a warning (XZ_UNSUPPORTED_CHECK) will be given.
 	 */
-	if (s->temp.buf[HEADER_MAGIC_SIZE] != 0
-			|| (s->temp.buf[HEADER_MAGIC_SIZE + 1] > 1
-			    && s->temp.buf[HEADER_MAGIC_SIZE + 1] != 0x04 /* CRC64 */
-			    && s->temp.buf[HEADER_MAGIC_SIZE + 1] != 0x0A /* SHA-256 */
-			)
-	) {
+	s->check_type = s->temp.buf[HEADER_MAGIC_SIZE + 1];
+
+#ifdef XZ_DEC_ANY_CHECK
+	if (s->check_type > XZ_CHECK_MAX)
 		return XZ_OPTIONS_ERROR;
-	}
 
-	s->crc_type = s->temp.buf[HEADER_MAGIC_SIZE + 1];
+	if (s->check_type > XZ_CHECK_CRC32)
+		return XZ_UNSUPPORTED_CHECK;
+#else
+	if (s->check_type > XZ_CHECK_CRC32)
+		return XZ_OPTIONS_ERROR;
+#endif
 
 	return XZ_OK;
 }
@@ -387,7 +424,7 @@ static enum xz_ret XZ_FUNC dec_stream_footer(struct xz_dec *s)
 	if (!memeq(s->temp.buf + 10, FOOTER_MAGIC, FOOTER_MAGIC_SIZE))
 		return XZ_DATA_ERROR;
 
-	if (xz_crc32(s->crc32_table, s->temp.buf + 4, 6, 0) != get_le32(s->temp.buf))
+	if (xz_crc32(s->temp.buf + 4, 6, 0) != get_le32(s->temp.buf))
 		return XZ_DATA_ERROR;
 
 	/*
@@ -398,7 +435,7 @@ static enum xz_ret XZ_FUNC dec_stream_footer(struct xz_dec *s)
 	if ((s->index.size >> 2) != get_le32(s->temp.buf + 4))
 		return XZ_DATA_ERROR;
 
-	if (s->temp.buf[8] != 0 || s->temp.buf[9] != s->crc_type)
+	if (s->temp.buf[8] != 0 || s->temp.buf[9] != s->check_type)
 		return XZ_DATA_ERROR;
 
 	/*
@@ -418,7 +455,7 @@ static enum xz_ret XZ_FUNC dec_block_header(struct xz_dec *s)
 	 * eight bytes so this is safe.
 	 */
 	s->temp.size -= 4;
-	if (xz_crc32(s->crc32_table, s->temp.buf, s->temp.size, 0)
+	if (xz_crc32(s->temp.buf, s->temp.size, 0)
 			!= get_le32(s->temp.buf + s->temp.size))
 		return XZ_DATA_ERROR;
 
@@ -533,12 +570,19 @@ static enum xz_ret XZ_FUNC dec_main(struct xz_dec *s, struct xz_buf *b)
 			if (!fill_temp(s, b))
 				return XZ_OK;
 
+			/*
+			 * If dec_stream_header() returns
+			 * XZ_UNSUPPORTED_CHECK, it is still possible
+			 * to continue decoding if working in multi-call
+			 * mode. Thus, update s->sequence before calling
+			 * dec_stream_header().
+			 */
+			s->sequence = SEQ_BLOCK_START;
+
 			ret = dec_stream_header(s);
 			if (ret != XZ_OK)
 				return ret;
 
-			s->sequence = SEQ_BLOCK_START;
-
 		case SEQ_BLOCK_START:
 			/* We need one byte of input to continue. */
 			if (b->in_pos == b->in_size)
@@ -600,11 +644,16 @@ static enum xz_ret XZ_FUNC dec_main(struct xz_dec *s, struct xz_buf *b)
 			s->sequence = SEQ_BLOCK_CHECK;
 
 		case SEQ_BLOCK_CHECK:
-			if (s->crc_type == 0x01) {
+			if (s->check_type == XZ_CHECK_CRC32) {
 				ret = crc32_validate(s, b);
 				if (ret != XZ_STREAM_END)
 					return ret;
 			}
+#ifdef XZ_DEC_ANY_CHECK
+			else if (!check_skip(s, b)) {
+				return XZ_OK;
+			}
+#endif
 
 			s->sequence = SEQ_BLOCK_START;
 			break;
diff --git a/archival/libunarchive/unxz/xz_private.h b/archival/libunarchive/unxz/xz_private.h
index 9da8d70..f4e0b40 100644
--- a/archival/libunarchive/unxz/xz_private.h
+++ b/archival/libunarchive/unxz/xz_private.h
@@ -112,9 +112,9 @@ XZ_EXTERN enum xz_ret XZ_FUNC xz_dec_bcj_reset(
  */
 XZ_EXTERN enum xz_ret XZ_FUNC xz_dec_bcj_run(struct xz_dec_bcj *s,
 		struct xz_dec_lzma2 *lzma2, struct xz_buf *b);
-#endif
 
 /* Free the memory allocated for the BCJ filters. */
 #define xz_dec_bcj_end(s) kfree(s)
+#endif
 
 #endif
diff --git a/archival/libunarchive/unxz/xz_stream.h b/archival/libunarchive/unxz/xz_stream.h
index efbe75a..36f2a7c 100644
--- a/archival/libunarchive/unxz/xz_stream.h
+++ b/archival/libunarchive/unxz/xz_stream.h
@@ -10,10 +10,10 @@
 #ifndef XZ_STREAM_H
 #define XZ_STREAM_H
 
-#if defined(__KERNEL__) && !defined(XZ_INTERNAL_CRC32)
+#if defined(__KERNEL__) && !XZ_INTERNAL_CRC32
 #	include <linux/crc32.h>
 #	undef crc32
-#	define xz_crc32(crc32_table, buf, size, crc) \
+#	define xz_crc32(buf, size, crc) \
 		(~crc32_le(~(uint32_t)(crc), buf, size))
 #endif
 
@@ -43,4 +43,15 @@ typedef uint64_t vli_type;
 /* Maximum encoded size of a VLI */
 #define VLI_BYTES_MAX (sizeof(vli_type) * 8 / 7)
 
+/* Integrity Check types */
+enum xz_check {
+	XZ_CHECK_NONE = 0,
+	XZ_CHECK_CRC32 = 1,
+	XZ_CHECK_CRC64 = 4,
+	XZ_CHECK_SHA256 = 10
+};
+
+/* Maximum possible Check ID */
+#define XZ_CHECK_MAX 15
+
 #endif
-- 
1.6.3.3



More information about the busybox-cvs mailing list