[git commit] preparatory cleanups for seamless uncompression improvements

Denys Vlasenko vda.linux at googlemail.com
Tue Mar 6 15:23:50 UTC 2012


commit: http://git.busybox.net/busybox/commit/?id=59655077c5bf176f01d8d277665ebb92263704ed
branch: http://git.busybox.net/busybox/commit/?id=refs/heads/master

unpack_gz_stream_with_info: fix buggy error check
man: fix possible accesses past the end of a string
move seamless uncompression helpers from read_printf.c to open_transformer.c

function                                             old     new   delta
show_manpage                                         153     212     +59
unpack_gz_stream_with_info                           520     539     +19

Signed-off-by: Denys Vlasenko <vda.linux at googlemail.com>
---
 archival/libarchive/decompress_uncompress.c |   12 +-
 archival/libarchive/decompress_unzip.c      |   26 +++--
 archival/libarchive/open_transformer.c      |  140 +++++++++++++++++++++++++++
 include/libbb.h                             |    8 ++
 libbb/read_printf.c                         |  139 --------------------------
 miscutils/man.c                             |   41 ++++-----
 6 files changed, 187 insertions(+), 179 deletions(-)

diff --git a/archival/libarchive/decompress_uncompress.c b/archival/libarchive/decompress_uncompress.c
index c6040d0..289f9e2 100644
--- a/archival/libarchive/decompress_uncompress.c
+++ b/archival/libarchive/decompress_uncompress.c
@@ -73,7 +73,7 @@
  */
 
 IF_DESKTOP(long long) int FAST_FUNC
-unpack_Z_stream(int fd_in, int fd_out)
+unpack_Z_stream(int src_fd, int dst_fd)
 {
 	IF_DESKTOP(long long total_written = 0;)
 	IF_DESKTOP(long long) int retval = -1;
@@ -105,14 +105,14 @@ unpack_Z_stream(int fd_in, int fd_out)
 
 	inbuf = xzalloc(IBUFSIZ + 64);
 	outbuf = xzalloc(OBUFSIZ + 2048);
-	htab = xzalloc(HSIZE);  /* wsn't zeroed out before, maybe can xmalloc? */
+	htab = xzalloc(HSIZE);  /* wasn't zeroed out before, maybe can xmalloc? */
 	codetab = xzalloc(HSIZE * sizeof(codetab[0]));
 
 	insize = 0;
 
 	/* xread isn't good here, we have to return - caller may want
 	 * to do some cleanup (e.g. delete incomplete unpacked file etc) */
-	if (full_read(fd_in, inbuf, 1) != 1) {
+	if (full_read(src_fd, inbuf, 1) != 1) {
 		bb_error_msg("short read");
 		goto err;
 	}
@@ -162,7 +162,7 @@ unpack_Z_stream(int fd_in, int fd_out)
 		}
 
 		if (insize < (int) (IBUFSIZ + 64) - IBUFSIZ) {
-			rsize = safe_read(fd_in, inbuf + insize, IBUFSIZ);
+			rsize = safe_read(src_fd, inbuf + insize, IBUFSIZ);
 			if (rsize < 0)
 				bb_error_msg_and_die(bb_msg_read_error);
 			insize += rsize;
@@ -268,7 +268,7 @@ unpack_Z_stream(int fd_in, int fd_out)
 						}
 
 						if (outpos >= OBUFSIZ) {
-							xwrite(fd_out, outbuf, outpos);
+							xwrite(dst_fd, outbuf, outpos);
 							IF_DESKTOP(total_written += outpos;)
 							outpos = 0;
 						}
@@ -296,7 +296,7 @@ unpack_Z_stream(int fd_in, int fd_out)
 	} while (rsize > 0);
 
 	if (outpos > 0) {
-		xwrite(fd_out, outbuf, outpos);
+		xwrite(dst_fd, outbuf, outpos);
 		IF_DESKTOP(total_written += outpos;)
 	}
 
diff --git a/archival/libarchive/decompress_unzip.c b/archival/libarchive/decompress_unzip.c
index aa5d22d..50873e3 100644
--- a/archival/libarchive/decompress_unzip.c
+++ b/archival/libarchive/decompress_unzip.c
@@ -1182,33 +1182,37 @@ static int check_header_gzip(STATE_PARAM unpack_info_t *info)
 }
 
 IF_DESKTOP(long long) int FAST_FUNC
-unpack_gz_stream_with_info(int in, int out, unpack_info_t *info)
+unpack_gz_stream_with_info(int src_fd, int dst_fd, unpack_info_t *info)
 {
 	uint32_t v32;
-	IF_DESKTOP(long long) int n;
+	IF_DESKTOP(long long) int total, n;
 	DECLARE_STATE;
 
-	n = 0;
+	total = 0;
 
 	ALLOC_STATE;
 	to_read = -1;
 //	bytebuffer_max = 0x8000;
 	bytebuffer = xmalloc(bytebuffer_max);
-	gunzip_src_fd = in;
+	gunzip_src_fd = src_fd;
 
  again:
 	if (!check_header_gzip(PASS_STATE info)) {
 		bb_error_msg("corrupted data");
-		n = -1;
+		total = -1;
 		goto ret;
 	}
-	n += inflate_unzip_internal(PASS_STATE in, out);
-	if (n < 0)
+
+	n = inflate_unzip_internal(PASS_STATE src_fd, dst_fd);
+	if (n < 0) {
+		total = -1;
 		goto ret;
+	}
+	total += n;
 
 	if (!top_up(PASS_STATE 8)) {
 		bb_error_msg("corrupted data");
-		n = -1;
+		total = -1;
 		goto ret;
 	}
 
@@ -1216,7 +1220,7 @@ unpack_gz_stream_with_info(int in, int out, unpack_info_t *info)
 	v32 = buffer_read_le_u32(PASS_STATE_ONLY);
 	if ((~gunzip_crc) != v32) {
 		bb_error_msg("crc error");
-		n = -1;
+		total = -1;
 		goto ret;
 	}
 
@@ -1224,7 +1228,7 @@ unpack_gz_stream_with_info(int in, int out, unpack_info_t *info)
 	v32 = buffer_read_le_u32(PASS_STATE_ONLY);
 	if ((uint32_t)gunzip_bytes_out != v32) {
 		bb_error_msg("incorrect length");
-		n = -1;
+		total = -1;
 	}
 
 	if (!top_up(PASS_STATE 2))
@@ -1242,7 +1246,7 @@ unpack_gz_stream_with_info(int in, int out, unpack_info_t *info)
  ret:
 	free(bytebuffer);
 	DEALLOC_STATE;
-	return n;
+	return total;
 }
 
 IF_DESKTOP(long long) int FAST_FUNC
diff --git a/archival/libarchive/open_transformer.c b/archival/libarchive/open_transformer.c
index aa8c102..743ffee 100644
--- a/archival/libarchive/open_transformer.c
+++ b/archival/libarchive/open_transformer.c
@@ -6,6 +6,16 @@
 #include "libbb.h"
 #include "bb_archive.h"
 
+#define ZIPPED (ENABLE_FEATURE_SEAMLESS_LZMA \
+	|| ENABLE_FEATURE_SEAMLESS_BZ2 \
+	|| ENABLE_FEATURE_SEAMLESS_GZ \
+	/* || ENABLE_FEATURE_SEAMLESS_Z */ \
+)
+
+#if ZIPPED
+# include "bb_archive.h"
+#endif
+
 /* transformer(), more than meets the eye */
 /*
  * On MMU machine, the transform_prog is removed by macro magic
@@ -52,3 +62,133 @@ void FAST_FUNC open_transformer(int fd,
 	close(fd_pipe.wr); /* don't want to write to the child */
 	xmove_fd(fd_pipe.rd, fd);
 }
+
+
+/* Used by e.g. rpm which gives us a fd without filename,
+ * thus we can't guess the format from filename's extension.
+ */
+#if ZIPPED
+void FAST_FUNC setup_unzip_on_fd(int fd /*, int fail_if_not_detected*/)
+{
+	const int fail_if_not_detected = 1;
+	union {
+		uint8_t b[4];
+		uint16_t b16[2];
+		uint32_t b32[1];
+	} magic;
+	int offset = -2;
+# if BB_MMU
+	IF_DESKTOP(long long) int FAST_FUNC (*xformer)(int src_fd, int dst_fd);
+	enum { xformer_prog = 0 };
+# else
+	enum { xformer = 0 };
+	const char *xformer_prog;
+# endif
+
+	/* .gz and .bz2 both have 2-byte signature, and their
+	 * unpack_XXX_stream wants this header skipped. */
+	xread(fd, magic.b16, sizeof(magic.b16[0]));
+	if (ENABLE_FEATURE_SEAMLESS_GZ
+	 && magic.b16[0] == GZIP_MAGIC
+	) {
+# if BB_MMU
+		xformer = unpack_gz_stream;
+# else
+		xformer_prog = "gunzip";
+# endif
+		goto found_magic;
+	}
+	if (ENABLE_FEATURE_SEAMLESS_BZ2
+	 && magic.b16[0] == BZIP2_MAGIC
+	) {
+# if BB_MMU
+		xformer = unpack_bz2_stream;
+# else
+		xformer_prog = "bunzip2";
+# endif
+		goto found_magic;
+	}
+	if (ENABLE_FEATURE_SEAMLESS_XZ
+	 && magic.b16[0] == XZ_MAGIC1
+	) {
+		offset = -6;
+		xread(fd, magic.b32, sizeof(magic.b32[0]));
+		if (magic.b32[0] == XZ_MAGIC2) {
+# if BB_MMU
+			xformer = unpack_xz_stream;
+			/* unpack_xz_stream wants fd at position 6, no need to seek */
+			//xlseek(fd, offset, SEEK_CUR);
+# else
+			xformer_prog = "unxz";
+# endif
+			goto found_magic;
+		}
+	}
+
+	/* No known magic seen */
+	if (fail_if_not_detected)
+		bb_error_msg_and_die("no gzip"
+			IF_FEATURE_SEAMLESS_BZ2("/bzip2")
+			IF_FEATURE_SEAMLESS_XZ("/xz")
+			" magic");
+	xlseek(fd, offset, SEEK_CUR);
+	return;
+
+ found_magic:
+# if !BB_MMU
+	/* NOMMU version of open_transformer execs
+	 * an external unzipper that wants
+	 * file position at the start of the file */
+	xlseek(fd, offset, SEEK_CUR);
+# endif
+	open_transformer(fd, xformer, xformer_prog);
+}
+#endif /* ZIPPED */
+
+int FAST_FUNC open_zipped(const char *fname)
+{
+#if !ZIPPED
+	return open(fname, O_RDONLY);
+#else
+	char *sfx;
+	int fd;
+
+	fd = open(fname, O_RDONLY);
+	if (fd < 0)
+		return fd;
+
+	sfx = strrchr(fname, '.');
+	if (sfx) {
+		sfx++;
+		if (ENABLE_FEATURE_SEAMLESS_LZMA && strcmp(sfx, "lzma") == 0)
+			/* .lzma has no header/signature, just trust it */
+			open_transformer(fd, unpack_lzma_stream, "unlzma");
+		else
+		if ((ENABLE_FEATURE_SEAMLESS_GZ && strcmp(sfx, "gz") == 0)
+		 || (ENABLE_FEATURE_SEAMLESS_BZ2 && strcmp(sfx, "bz2") == 0)
+		 || (ENABLE_FEATURE_SEAMLESS_XZ && strcmp(sfx, "xz") == 0)
+		) {
+			setup_unzip_on_fd(fd /*, fail_if_not_detected: 1*/);
+		}
+	}
+
+	return fd;
+#endif
+}
+
+void* FAST_FUNC xmalloc_open_zipped_read_close(const char *fname, size_t *maxsz_p)
+{
+	int fd;
+	char *image;
+
+	fd = open_zipped(fname);
+	if (fd < 0)
+		return NULL;
+
+	image = xmalloc_read(fd, maxsz_p);
+	if (!image)
+		bb_perror_msg("read error from '%s'", fname);
+	close(fd);
+
+	return image;
+}
diff --git a/include/libbb.h b/include/libbb.h
index f743bdf..c896e54 100644
--- a/include/libbb.h
+++ b/include/libbb.h
@@ -713,6 +713,14 @@ extern void *xmalloc_read(int fd, size_t *maxsz_p) FAST_FUNC RETURNS_MALLOC;
 extern void *xmalloc_open_read_close(const char *filename, size_t *maxsz_p) FAST_FUNC RETURNS_MALLOC;
 /* Never returns NULL */
 extern void *xmalloc_xopen_read_close(const char *filename, size_t *maxsz_p) FAST_FUNC RETURNS_MALLOC;
+
+#define SEAMLESS_COMPRESSION (0 \
+ || ENABLE_FEATURE_SEAMLESS_XZ \
+ || ENABLE_FEATURE_SEAMLESS_LZMA \
+ || ENABLE_FEATURE_SEAMLESS_BZ2 \
+ || ENABLE_FEATURE_SEAMLESS_GZ \
+ || ENABLE_FEATURE_SEAMLESS_Z)
+
 /* Autodetects gzip/bzip2 formats. fd may be in the middle of the file! */
 #if ENABLE_FEATURE_SEAMLESS_LZMA \
  || ENABLE_FEATURE_SEAMLESS_BZ2 \
diff --git a/libbb/read_printf.c b/libbb/read_printf.c
index 0bbf780..5ed6e36 100644
--- a/libbb/read_printf.c
+++ b/libbb/read_printf.c
@@ -8,16 +8,6 @@
  */
 #include "libbb.h"
 
-#define ZIPPED (ENABLE_FEATURE_SEAMLESS_LZMA \
-	|| ENABLE_FEATURE_SEAMLESS_BZ2 \
-	|| ENABLE_FEATURE_SEAMLESS_GZ \
-	/* || ENABLE_FEATURE_SEAMLESS_Z */ \
-)
-
-#if ZIPPED
-# include "bb_archive.h"
-#endif
-
 
 /* Suppose that you are a shell. You start child processes.
  * They work and eventually exit. You want to get user input.
@@ -244,132 +234,3 @@ void* FAST_FUNC xmalloc_xopen_read_close(const char *filename, size_t *maxsz_p)
 		bb_perror_msg_and_die("can't read '%s'", filename);
 	return buf;
 }
-
-/* Used by e.g. rpm which gives us a fd without filename,
- * thus we can't guess the format from filename's extension.
- */
-#if ZIPPED
-void FAST_FUNC setup_unzip_on_fd(int fd /*, int fail_if_not_detected*/)
-{
-	const int fail_if_not_detected = 1;
-	union {
-		uint8_t b[4];
-		uint16_t b16[2];
-		uint32_t b32[1];
-	} magic;
-	int offset = -2;
-# if BB_MMU
-	IF_DESKTOP(long long) int FAST_FUNC (*xformer)(int src_fd, int dst_fd);
-	enum { xformer_prog = 0 };
-# else
-	enum { xformer = 0 };
-	const char *xformer_prog;
-# endif
-
-	/* .gz and .bz2 both have 2-byte signature, and their
-	 * unpack_XXX_stream wants this header skipped. */
-	xread(fd, magic.b16, sizeof(magic.b16[0]));
-	if (ENABLE_FEATURE_SEAMLESS_GZ
-	 && magic.b16[0] == GZIP_MAGIC
-	) {
-# if BB_MMU
-		xformer = unpack_gz_stream;
-# else
-		xformer_prog = "gunzip";
-# endif
-		goto found_magic;
-	}
-	if (ENABLE_FEATURE_SEAMLESS_BZ2
-	 && magic.b16[0] == BZIP2_MAGIC
-	) {
-# if BB_MMU
-		xformer = unpack_bz2_stream;
-# else
-		xformer_prog = "bunzip2";
-# endif
-		goto found_magic;
-	}
-	if (ENABLE_FEATURE_SEAMLESS_XZ
-	 && magic.b16[0] == XZ_MAGIC1
-	) {
-		offset = -6;
-		xread(fd, magic.b32, sizeof(magic.b32[0]));
-		if (magic.b32[0] == XZ_MAGIC2) {
-# if BB_MMU
-			xformer = unpack_xz_stream;
-			/* unpack_xz_stream wants fd at position 6, no need to seek */
-			//xlseek(fd, offset, SEEK_CUR);
-# else
-			xformer_prog = "unxz";
-# endif
-			goto found_magic;
-		}
-	}
-
-	/* No known magic seen */
-	if (fail_if_not_detected)
-		bb_error_msg_and_die("no gzip"
-			IF_FEATURE_SEAMLESS_BZ2("/bzip2")
-			IF_FEATURE_SEAMLESS_XZ("/xz")
-			" magic");
-	xlseek(fd, offset, SEEK_CUR);
-	return;
-
- found_magic:
-# if !BB_MMU
-	/* NOMMU version of open_transformer execs
-	 * an external unzipper that wants
-	 * file position at the start of the file */
-	xlseek(fd, offset, SEEK_CUR);
-# endif
-	open_transformer(fd, xformer, xformer_prog);
-}
-#endif /* ZIPPED */
-
-int FAST_FUNC open_zipped(const char *fname)
-{
-#if !ZIPPED
-	return open(fname, O_RDONLY);
-#else
-	char *sfx;
-	int fd;
-
-	fd = open(fname, O_RDONLY);
-	if (fd < 0)
-		return fd;
-
-	sfx = strrchr(fname, '.');
-	if (sfx) {
-		sfx++;
-		if (ENABLE_FEATURE_SEAMLESS_LZMA && strcmp(sfx, "lzma") == 0)
-			/* .lzma has no header/signature, just trust it */
-			open_transformer(fd, unpack_lzma_stream, "unlzma");
-		else
-		if ((ENABLE_FEATURE_SEAMLESS_GZ && strcmp(sfx, "gz") == 0)
-		 || (ENABLE_FEATURE_SEAMLESS_BZ2 && strcmp(sfx, "bz2") == 0)
-		 || (ENABLE_FEATURE_SEAMLESS_XZ && strcmp(sfx, "xz") == 0)
-		) {
-			setup_unzip_on_fd(fd /*, fail_if_not_detected: 1*/);
-		}
-	}
-
-	return fd;
-#endif
-}
-
-void* FAST_FUNC xmalloc_open_zipped_read_close(const char *fname, size_t *maxsz_p)
-{
-	int fd;
-	char *image;
-
-	fd = open_zipped(fname);
-	if (fd < 0)
-		return NULL;
-
-	image = xmalloc_read(fd, maxsz_p);
-	if (!image)
-		bb_perror_msg("read error from '%s'", fname);
-	close(fd);
-
-	return image;
-}
diff --git a/miscutils/man.c b/miscutils/man.c
index 3bf7e84..6114663 100644
--- a/miscutils/man.c
+++ b/miscutils/man.c
@@ -30,16 +30,6 @@ echo ".pl \n(nlu+10"
 
 */
 
-#if ENABLE_FEATURE_SEAMLESS_LZMA
-#define Z_SUFFIX ".lzma"
-#elif ENABLE_FEATURE_SEAMLESS_BZ2
-#define Z_SUFFIX ".bz2"
-#elif ENABLE_FEATURE_SEAMLESS_GZ
-#define Z_SUFFIX ".gz"
-#else
-#define Z_SUFFIX ""
-#endif
-
 static int show_manpage(const char *pager, char *man_filename, int man, int level);
 
 static int run_pipe(const char *pager, char *man_filename, int man, int level)
@@ -102,7 +92,7 @@ static int run_pipe(const char *pager, char *man_filename, int man, int level)
 
 		/* Links do not have .gz extensions, even if manpage
 		 * is compressed */
-		man_filename = xasprintf("%s/%s" Z_SUFFIX, man_filename, linkname);
+		man_filename = xasprintf("%s/%s", man_filename, linkname);
 		free(line);
 		/* Note: we leak "new" man_filename string as well... */
 		if (show_manpage(pager, man_filename, man, level + 1))
@@ -124,32 +114,37 @@ static int run_pipe(const char *pager, char *man_filename, int man, int level)
 	return 1;
 }
 
-/* man_filename is of the form "/dir/dir/dir/name.s" Z_SUFFIX */
+/* man_filename is of the form "/dir/dir/dir/name.s" */
 static int show_manpage(const char *pager, char *man_filename, int man, int level)
 {
+#if SEAMLESS_COMPRESSION
+	/* We leak this allocation... */
+	char *filename_with_zext = xasprintf("%s.lzma", man_filename);
+	char *ext = strrchr(filename_with_zext, '.') + 1;
+#endif
+
 #if ENABLE_FEATURE_SEAMLESS_LZMA
+	if (run_pipe(pager, filename_with_zext, man, level))
+		return 1;
+#endif
+#if ENABLE_FEATURE_SEAMLESS_XZ
+	strcpy(ext, "xz");
 	if (run_pipe(pager, man_filename, man, level))
 		return 1;
 #endif
-
 #if ENABLE_FEATURE_SEAMLESS_BZ2
-#if ENABLE_FEATURE_SEAMLESS_LZMA
-	strcpy(strrchr(man_filename, '.') + 1, "bz2");
-#endif
+	strcpy(ext, "bz2");
 	if (run_pipe(pager, man_filename, man, level))
 		return 1;
 #endif
-
 #if ENABLE_FEATURE_SEAMLESS_GZ
-#if ENABLE_FEATURE_SEAMLESS_LZMA || ENABLE_FEATURE_SEAMLESS_BZ2
-	strcpy(strrchr(man_filename, '.') + 1, "gz");
-#endif
+	strcpy(ext, "gz");
 	if (run_pipe(pager, man_filename, man, level))
 		return 1;
 #endif
 
-#if ENABLE_FEATURE_SEAMLESS_LZMA || ENABLE_FEATURE_SEAMLESS_BZ2 || ENABLE_FEATURE_SEAMLESS_GZ
-	*strrchr(man_filename, '.') = '\0';
+#if SEAMLESS_COMPRESSION
+	ext[-1] = '\0';
 #endif
 	if (run_pipe(pager, man_filename, man, level))
 		return 1;
@@ -262,7 +257,7 @@ int man_main(int argc UNUSED_PARAM, char **argv)
 				/* Search for cat, then man page */
 				while (cat0man1 < 2) {
 					int found_here;
-					man_filename = xasprintf("%s/%s%.*s/%s.%.*s" Z_SUFFIX,
+					man_filename = xasprintf("%s/%s%.*s/%s.%.*s",
 							cur_path,
 							"cat\0man" + (cat0man1 * 4),
 							sect_len, cur_sect,


More information about the busybox-cvs mailing list