[git commit] tar: add support for --strip-components=N

Denys Vlasenko vda.linux at googlemail.com
Wed Oct 21 23:07:13 UTC 2015


commit: http://git.busybox.net/busybox/commit/?id=6c563e370d0f2f3cf36f3b274e8fe1392ca7125f
branch: http://git.busybox.net/busybox/commit/?id=refs/heads/master

function                                             old     new   delta
data_extract_all                                     882     995    +113
tar_longopts                                         290     309     +19
tar_main                                             938     942      +4
------------------------------------------------------------------------------
(add/remove: 0/0 grow/shrink: 3/0 up/down: 136/0)             Total: 136 bytes

Signed-off-by: Denys Vlasenko <vda.linux at googlemail.com>
---
 archival/libarchive/data_extract_all.c |  108 ++++++++++++++++++++++---------
 archival/libarchive/get_header_tar.c   |    1 +
 archival/tar.c                         |   80 +++++++++++++++++++----
 include/bb_archive.h                   |    3 +
 testsuite/tar.tests                    |   10 +++
 5 files changed, 156 insertions(+), 46 deletions(-)

diff --git a/archival/libarchive/data_extract_all.c b/archival/libarchive/data_extract_all.c
index 45776dc..bd51d2a 100644
--- a/archival/libarchive/data_extract_all.c
+++ b/archival/libarchive/data_extract_all.c
@@ -8,9 +8,17 @@
 
 void FAST_FUNC data_extract_all(archive_handle_t *archive_handle)
 {
+
 	file_header_t *file_header = archive_handle->file_header;
 	int dst_fd;
 	int res;
+#if ENABLE_FEATURE_TAR_LONG_OPTIONS
+	char *dst_name;
+	char *dst_link;
+#else
+# define dst_name (file_header->name)
+# define dst_link (file_header->link_target)
+#endif
 
 #if ENABLE_FEATURE_TAR_SELINUX
 	char *sctx = archive_handle->tar__sctx[PAX_NEXT_FILE];
@@ -23,11 +31,47 @@ void FAST_FUNC data_extract_all(archive_handle_t *archive_handle)
 	}
 #endif
 
+#if ENABLE_FEATURE_TAR_LONG_OPTIONS
+	dst_name = file_header->name;
+	dst_link = file_header->link_target;
+	if (archive_handle->tar__strip_components) {
+		unsigned n = archive_handle->tar__strip_components;
+		do {
+			dst_name = strchr(dst_name, '/');
+			if (!dst_name || dst_name[1] == '\0') {
+				data_skip(archive_handle);
+				return;
+			}
+			dst_name++;
+			/*
+			 * Link target is shortened only for hardlinks:
+			 * softlinks restored unchanged.
+			 */
+			if (S_ISREG(file_header->mode)
+			 && file_header->size == 0
+			 && dst_link
+			) {
+// GNU tar 1.26 does not check that we reached end of link name:
+// if "dir/hardlink" is hardlinked to "file",
+// tar xvf a.tar --strip-components=1 says:
+//  tar: hardlink: Cannot hard link to '': No such file or directory
+// and continues processing. We silently skip such entries.
+				dst_link = strchr(dst_link, '/');
+				if (!dst_link || dst_link[1] == '\0') {
+					data_skip(archive_handle);
+					return;
+				}
+				dst_link++;
+			}
+		} while (--n != 0);
+	}
+#endif
+
 	if (archive_handle->ah_flags & ARCHIVE_CREATE_LEADING_DIRS) {
-		char *slash = strrchr(file_header->name, '/');
+		char *slash = strrchr(dst_name, '/');
 		if (slash) {
 			*slash = '\0';
-			bb_make_directory(file_header->name, -1, FILEUTILS_RECUR);
+			bb_make_directory(dst_name, -1, FILEUTILS_RECUR);
 			*slash = '/';
 		}
 	}
@@ -38,8 +82,8 @@ void FAST_FUNC data_extract_all(archive_handle_t *archive_handle)
 			/* Is it hardlink?
 			 * We encode hard links as regular files of size 0 with a symlink */
 			if (S_ISREG(file_header->mode)
-			 && file_header->link_target
 			 && file_header->size == 0
+			 && dst_link
 			) {
 				/* Ugly special case:
 				 * tar cf t.tar hardlink1 hardlink2 hardlink1
@@ -48,22 +92,22 @@ void FAST_FUNC data_extract_all(archive_handle_t *archive_handle)
 				 * hardlink2 -> hardlink1
 				 * hardlink1 -> hardlink1 <== !!!
 				 */
-				if (strcmp(file_header->link_target, file_header->name) == 0)
+				if (strcmp(dst_link, dst_name) == 0)
 					goto ret;
 			}
 			/* Proceed with deleting */
-			if (unlink(file_header->name) == -1
+			if (unlink(dst_name) == -1
 			 && errno != ENOENT
 			) {
 				bb_perror_msg_and_die("can't remove old file %s",
-						file_header->name);
+						dst_name);
 			}
 		}
 	}
 	else if (archive_handle->ah_flags & ARCHIVE_EXTRACT_NEWER) {
 		/* Remove the existing entry if its older than the extracted entry */
 		struct stat existing_sb;
-		if (lstat(file_header->name, &existing_sb) == -1) {
+		if (lstat(dst_name, &existing_sb) == -1) {
 			if (errno != ENOENT) {
 				bb_perror_msg_and_die("can't stat old file");
 			}
@@ -73,30 +117,30 @@ void FAST_FUNC data_extract_all(archive_handle_t *archive_handle)
 			 && !S_ISDIR(file_header->mode)
 			) {
 				bb_error_msg("%s not created: newer or "
-					"same age file exists", file_header->name);
+					"same age file exists", dst_name);
 			}
 			data_skip(archive_handle);
 			goto ret;
 		}
-		else if ((unlink(file_header->name) == -1) && (errno != EISDIR)) {
+		else if ((unlink(dst_name) == -1) && (errno != EISDIR)) {
 			bb_perror_msg_and_die("can't remove old file %s",
-					file_header->name);
+					dst_name);
 		}
 	}
 
 	/* Handle hard links separately
 	 * We encode hard links as regular files of size 0 with a symlink */
 	if (S_ISREG(file_header->mode)
-	 && file_header->link_target
 	 && file_header->size == 0
+	 && dst_link
 	) {
-		/* hard link */
-		res = link(file_header->link_target, file_header->name);
-		if ((res == -1) && !(archive_handle->ah_flags & ARCHIVE_EXTRACT_QUIET)) {
+		/* Hard link */
+		res = link(dst_link, dst_name);
+		if (res != 0 && !(archive_handle->ah_flags & ARCHIVE_EXTRACT_QUIET)) {
 			bb_perror_msg("can't create %slink "
 					"from %s to %s", "hard",
-					file_header->name,
-					file_header->link_target);
+					dst_name,
+					dst_link);
 		}
 		/* Hardlinks have no separate mode/ownership, skip chown/chmod */
 		goto ret;
@@ -106,17 +150,17 @@ void FAST_FUNC data_extract_all(archive_handle_t *archive_handle)
 	switch (file_header->mode & S_IFMT) {
 	case S_IFREG: {
 		/* Regular file */
-		char *dst_name;
+		char *dst_nameN;
 		int flags = O_WRONLY | O_CREAT | O_EXCL;
 		if (archive_handle->ah_flags & ARCHIVE_O_TRUNC)
 			flags = O_WRONLY | O_CREAT | O_TRUNC;
-		dst_name = file_header->name;
+		dst_nameN = dst_name;
 #ifdef ARCHIVE_REPLACE_VIA_RENAME
 		if (archive_handle->ah_flags & ARCHIVE_REPLACE_VIA_RENAME)
 			/* rpm-style temp file name */
-			dst_name = xasprintf("%s;%x", dst_name, (int)getpid());
+			dst_nameN = xasprintf("%s;%x", dst_name, (int)getpid());
 #endif
-		dst_fd = xopen3(dst_name,
+		dst_fd = xopen3(dst_nameN,
 			flags,
 			file_header->mode
 			);
@@ -124,32 +168,32 @@ void FAST_FUNC data_extract_all(archive_handle_t *archive_handle)
 		close(dst_fd);
 #ifdef ARCHIVE_REPLACE_VIA_RENAME
 		if (archive_handle->ah_flags & ARCHIVE_REPLACE_VIA_RENAME) {
-			xrename(dst_name, file_header->name);
-			free(dst_name);
+			xrename(dst_nameN, dst_name);
+			free(dst_nameN);
 		}
 #endif
 		break;
 	}
 	case S_IFDIR:
-		res = mkdir(file_header->name, file_header->mode);
+		res = mkdir(dst_name, file_header->mode);
 		if ((res == -1)
 		 && (errno != EISDIR) /* btw, Linux doesn't return this */
 		 && (errno != EEXIST)
 		 && !(archive_handle->ah_flags & ARCHIVE_EXTRACT_QUIET)
 		) {
-			bb_perror_msg("can't make dir %s", file_header->name);
+			bb_perror_msg("can't make dir %s", dst_name);
 		}
 		break;
 	case S_IFLNK:
 		/* Symlink */
 //TODO: what if file_header->link_target == NULL (say, corrupted tarball?)
-		res = symlink(file_header->link_target, file_header->name);
-		if ((res == -1)
+		res = symlink(file_header->link_target, dst_name);
+		if (res != 0
 		 && !(archive_handle->ah_flags & ARCHIVE_EXTRACT_QUIET)
 		) {
 			bb_perror_msg("can't create %slink "
 				"from %s to %s", "sym",
-				file_header->name,
+				dst_name,
 				file_header->link_target);
 		}
 		break;
@@ -157,11 +201,11 @@ void FAST_FUNC data_extract_all(archive_handle_t *archive_handle)
 	case S_IFBLK:
 	case S_IFCHR:
 	case S_IFIFO:
-		res = mknod(file_header->name, file_header->mode, file_header->device);
+		res = mknod(dst_name, file_header->mode, file_header->device);
 		if ((res == -1)
 		 && !(archive_handle->ah_flags & ARCHIVE_EXTRACT_QUIET)
 		) {
-			bb_perror_msg("can't create node %s", file_header->name);
+			bb_perror_msg("can't create node %s", dst_name);
 		}
 		break;
 	default:
@@ -186,20 +230,20 @@ void FAST_FUNC data_extract_all(archive_handle_t *archive_handle)
 			}
 #endif
 			/* GNU tar 1.15.1 uses chown, not lchown */
-			chown(file_header->name, uid, gid);
+			chown(dst_name, uid, gid);
 		}
 		/* uclibc has no lchmod, glibc is even stranger -
 		 * it has lchmod which seems to do nothing!
 		 * so we use chmod... */
 		if (!(archive_handle->ah_flags & ARCHIVE_DONT_RESTORE_PERM)) {
-			chmod(file_header->name, file_header->mode);
+			chmod(dst_name, file_header->mode);
 		}
 		if (archive_handle->ah_flags & ARCHIVE_RESTORE_DATE) {
 			struct timeval t[2];
 
 			t[1].tv_sec = t[0].tv_sec = file_header->mtime;
 			t[1].tv_usec = t[0].tv_usec = 0;
-			utimes(file_header->name, t);
+			utimes(dst_name, t);
 		}
 	}
 
diff --git a/archival/libarchive/get_header_tar.c b/archival/libarchive/get_header_tar.c
index fb68673..ac2be72 100644
--- a/archival/libarchive/get_header_tar.c
+++ b/archival/libarchive/get_header_tar.c
@@ -418,6 +418,7 @@ char FAST_FUNC get_header_tar(archive_handle_t *archive_handle)
 
 	/* Everything up to and including last ".." component is stripped */
 	overlapping_strcpy(file_header->name, strip_unsafe_prefix(file_header->name));
+//TODO: do the same for file_header->link_target?
 
 	/* Strip trailing '/' in directories */
 	/* Must be done after mode is set as '/' is used to check if it's a directory */
diff --git a/archival/tar.c b/archival/tar.c
index aa03ba9..566ba34 100644
--- a/archival/tar.c
+++ b/archival/tar.c
@@ -152,9 +152,12 @@
 # define FNM_LEADING_DIR 0
 #endif
 
-
-//#define DBG(fmt, ...) bb_error_msg("%s: " fmt, __func__, ## __VA_ARGS__)
-#define DBG(...) ((void)0)
+#if 0
+# define DBG(fmt, ...) bb_error_msg("%s: " fmt, __func__, ## __VA_ARGS__)
+#else
+# define DBG(...) ((void)0)
+#endif
+#define DBG_OPTION_PARSING 0
 
 
 #define block_buf bb_common_bufsiz1
@@ -855,6 +858,7 @@ enum {
 	IF_FEATURE_SEAMLESS_Z(   OPTBIT_COMPRESS    ,)
 	IF_FEATURE_TAR_NOPRESERVE_TIME(OPTBIT_NOPRESERVE_TIME,)
 #if ENABLE_FEATURE_TAR_LONG_OPTIONS
+	OPTBIT_STRIP_COMPONENTS,
 	OPTBIT_NORECURSION,
 	IF_FEATURE_TAR_TO_COMMAND(OPTBIT_2COMMAND   ,)
 	OPTBIT_NUMERIC_OWNER,
@@ -879,12 +883,13 @@ enum {
 	OPT_GZIP         = IF_FEATURE_SEAMLESS_GZ(  (1 << OPTBIT_GZIP        )) + 0, // z
 	OPT_XZ           = IF_FEATURE_SEAMLESS_XZ(  (1 << OPTBIT_XZ          )) + 0, // J
 	OPT_COMPRESS     = IF_FEATURE_SEAMLESS_Z(   (1 << OPTBIT_COMPRESS    )) + 0, // Z
-	OPT_NOPRESERVE_TIME = IF_FEATURE_TAR_NOPRESERVE_TIME((1 << OPTBIT_NOPRESERVE_TIME)) + 0, // m
-	OPT_NORECURSION     = IF_FEATURE_TAR_LONG_OPTIONS((1 << OPTBIT_NORECURSION    )) + 0, // no-recursion
-	OPT_2COMMAND        = IF_FEATURE_TAR_TO_COMMAND(  (1 << OPTBIT_2COMMAND       )) + 0, // to-command
-	OPT_NUMERIC_OWNER   = IF_FEATURE_TAR_LONG_OPTIONS((1 << OPTBIT_NUMERIC_OWNER  )) + 0, // numeric-owner
-	OPT_NOPRESERVE_PERM = IF_FEATURE_TAR_LONG_OPTIONS((1 << OPTBIT_NOPRESERVE_PERM)) + 0, // no-same-permissions
-	OPT_OVERWRITE       = IF_FEATURE_TAR_LONG_OPTIONS((1 << OPTBIT_OVERWRITE      )) + 0, // overwrite
+	OPT_NOPRESERVE_TIME  = IF_FEATURE_TAR_NOPRESERVE_TIME((1 << OPTBIT_NOPRESERVE_TIME)) + 0, // m
+	OPT_STRIP_COMPONENTS = IF_FEATURE_TAR_LONG_OPTIONS((1 << OPTBIT_STRIP_COMPONENTS)) + 0, // strip-components
+	OPT_NORECURSION      = IF_FEATURE_TAR_LONG_OPTIONS((1 << OPTBIT_NORECURSION    )) + 0, // no-recursion
+	OPT_2COMMAND         = IF_FEATURE_TAR_TO_COMMAND(  (1 << OPTBIT_2COMMAND       )) + 0, // to-command
+	OPT_NUMERIC_OWNER    = IF_FEATURE_TAR_LONG_OPTIONS((1 << OPTBIT_NUMERIC_OWNER  )) + 0, // numeric-owner
+	OPT_NOPRESERVE_PERM  = IF_FEATURE_TAR_LONG_OPTIONS((1 << OPTBIT_NOPRESERVE_PERM)) + 0, // no-same-permissions
+	OPT_OVERWRITE        = IF_FEATURE_TAR_LONG_OPTIONS((1 << OPTBIT_OVERWRITE      )) + 0, // overwrite
 
 	OPT_ANY_COMPRESS = (OPT_BZIP2 | OPT_LZMA | OPT_GZIP | OPT_XZ | OPT_COMPRESS),
 };
@@ -928,6 +933,7 @@ static const char tar_longopts[] ALIGN1 =
 # if ENABLE_FEATURE_TAR_NOPRESERVE_TIME
 	"touch\0"               No_argument       "m"
 # endif
+	"strip-components\0"	Required_argument "\xf9"
 	"no-recursion\0"	No_argument       "\xfa"
 # if ENABLE_FEATURE_TAR_TO_COMMAND
 	"to-command\0"		Required_argument "\xfb"
@@ -973,11 +979,15 @@ int tar_main(int argc UNUSED_PARAM, char **argv)
 		"tt:vv:" // count -t,-v
 		IF_FEATURE_TAR_FROM("X::T::") // cumulative lists
 #if ENABLE_FEATURE_TAR_LONG_OPTIONS && ENABLE_FEATURE_TAR_FROM
-		"\xff::" // cumulative lists for --exclude
+		"\xff::" // --exclude=PATTERN is a list
 #endif
 		IF_FEATURE_TAR_CREATE("c:") "t:x:" // at least one of these is reqd
 		IF_FEATURE_TAR_CREATE("c--tx:t--cx:x--ct") // mutually exclusive
-		IF_NOT_FEATURE_TAR_CREATE("t--x:x--t"); // mutually exclusive
+		IF_NOT_FEATURE_TAR_CREATE("t--x:x--t") // mutually exclusive
+#if ENABLE_FEATURE_TAR_LONG_OPTIONS
+		":\xf9+" // --strip-components=NUM
+#endif
+	;
 #if ENABLE_FEATURE_TAR_LONG_OPTIONS
 	applet_long_options = tar_longopts;
 #endif
@@ -1018,10 +1028,14 @@ int tar_main(int argc UNUSED_PARAM, char **argv)
 		IF_FEATURE_SEAMLESS_XZ(  "J"   )
 		IF_FEATURE_SEAMLESS_Z(   "Z"   )
 		IF_FEATURE_TAR_NOPRESERVE_TIME("m")
+		IF_FEATURE_TAR_LONG_OPTIONS("\xf9:") // --strip-components
 		, &base_dir // -C dir
 		, &tar_filename // -f filename
 		IF_FEATURE_TAR_FROM(, &(tar_handle->accept)) // T
 		IF_FEATURE_TAR_FROM(, &(tar_handle->reject)) // X
+#if ENABLE_FEATURE_TAR_LONG_OPTIONS
+		, &tar_handle->tar__strip_components // --strip-components
+#endif
 		IF_FEATURE_TAR_TO_COMMAND(, &(tar_handle->tar__to_command)) // --to-command
 #if ENABLE_FEATURE_TAR_LONG_OPTIONS && ENABLE_FEATURE_TAR_FROM
 		, &excludes // --exclude
@@ -1029,11 +1043,49 @@ int tar_main(int argc UNUSED_PARAM, char **argv)
 		, &verboseFlag // combined count for -t and -v
 		, &verboseFlag // combined count for -t and -v
 		);
-	//bb_error_msg("opt:%08x", opt);
+#if DBG_OPTION_PARSING
+	bb_error_msg("opt: 0x%08x", opt);
+# define showopt(o) bb_error_msg("opt & %s(%x): %x", #o, o, opt & o);
+	showopt(OPT_TEST            );
+	showopt(OPT_EXTRACT         );
+	showopt(OPT_BASEDIR         );
+	showopt(OPT_TARNAME         );
+	showopt(OPT_2STDOUT         );
+	showopt(OPT_NOPRESERVE_OWNER);
+	showopt(OPT_P               );
+	showopt(OPT_VERBOSE         );
+	showopt(OPT_KEEP_OLD        );
+	showopt(OPT_CREATE          );
+	showopt(OPT_DEREFERENCE     );
+	showopt(OPT_BZIP2           );
+	showopt(OPT_LZMA            );
+	showopt(OPT_INCLUDE_FROM    );
+	showopt(OPT_EXCLUDE_FROM    );
+	showopt(OPT_GZIP            );
+	showopt(OPT_XZ              );
+	showopt(OPT_COMPRESS        );
+	showopt(OPT_NOPRESERVE_TIME );
+	showopt(OPT_STRIP_COMPONENTS);
+	showopt(OPT_NORECURSION     );
+	showopt(OPT_2COMMAND        );
+	showopt(OPT_NUMERIC_OWNER   );
+	showopt(OPT_NOPRESERVE_PERM );
+	showopt(OPT_OVERWRITE       );
+	showopt(OPT_ANY_COMPRESS    );
+	bb_error_msg("base_dir:'%s'", base_dir);
+	bb_error_msg("tar_filename:'%s'", tar_filename);
+	bb_error_msg("verboseFlag:%d", verboseFlag);
+	bb_error_msg("tar_handle->tar__to_command:'%s'", tar_handle->tar__to_command);
+	bb_error_msg("tar_handle->tar__strip_components:%u", tar_handle->tar__strip_components);
+	return 0;
+# undef showopt
+#endif
 	argv += optind;
 
-	if (verboseFlag) tar_handle->action_header = header_verbose_list;
-	if (verboseFlag == 1) tar_handle->action_header = header_list;
+	if (verboseFlag)
+		tar_handle->action_header = header_verbose_list;
+	if (verboseFlag == 1)
+		tar_handle->action_header = header_list;
 
 	if (opt & OPT_EXTRACT)
 		tar_handle->action_data = data_extract_all;
diff --git a/include/bb_archive.h b/include/bb_archive.h
index 2329d02..10969b5 100644
--- a/include/bb_archive.h
+++ b/include/bb_archive.h
@@ -77,6 +77,9 @@ typedef struct archive_handle_t {
 	off_t offset;
 
 	/* Archiver specific. Can make it a union if it ever gets big */
+#if ENABLE_FEATURE_TAR_LONG_OPTIONS
+	unsigned tar__strip_components;
+#endif
 #define PAX_NEXT_FILE 0
 #define PAX_GLOBAL    1
 #if ENABLE_TAR || ENABLE_DPKG || ENABLE_DPKG_DEB
diff --git a/testsuite/tar.tests b/testsuite/tar.tests
index 4929f4e..383a464 100755
--- a/testsuite/tar.tests
+++ b/testsuite/tar.tests
@@ -53,6 +53,15 @@ dd if=/dev/zero bs=512 count=20 2>/dev/null | tar xvf - 2>&1; echo $?
 "" ""
 SKIP=
 
+# "tar cf test.tar input input_dir/ input_hard1 input_hard2 input_hard1 input_dir/ input":
+# GNU tar 1.26 records as hardlinks:
+#  input_hard2 -> input_hard1
+#  input_hard1 -> input_hard1 (!!!)
+#  input_dir/file -> input_dir/file
+#  input -> input
+# As of 1.24.0, we don't record last two: for them, nlink==1
+# and we check for "hardlink"ness only files with nlink!=1
+# We also don't use "hrw-r--r--" notation for hardlinks in "tar tv" listing.
 optional FEATURE_TAR_CREATE FEATURE_LS_SORTFILES
 testing "tar hardlinks and repeated files" '\
 rm -rf input_* test.tar 2>/dev/null
@@ -64,6 +73,7 @@ chmod -R 644 *
 chmod    755 input_dir
 tar cf test.tar input input_dir/ input_hard1 input_hard2 input_hard1 input_dir/ input
 tar tvf test.tar | sed "s/.*[0-9] input/input/"
+rm -rf input_dir
 tar xf test.tar 2>&1
 echo Ok: $?
 ls -l . input_dir/* | grep input_ | sed "s/\\(^[^ ]*\\) .* input/\\1 input/"


More information about the busybox-cvs mailing list