[git commit master 1/1] xargs: simplify logic

Denys Vlasenko vda.linux at googlemail.com
Sun Jun 13 22:57:05 UTC 2010


commit: http://git.busybox.net/busybox/commit/?id=7a4021debaa1f89c0ee2ad41f446a8234f8261c7
branch: http://git.busybox.net/busybox/commit/?id=refs/heads/master

Removed double-buffering in a linked list.

function                                             old     new   delta
store_param                                            -      56     +56
xargs_main                                           871     840     -31
process_stdin                                        458     396     -62
process0_stdin                                       267     143    -124
------------------------------------------------------------------------------
(add/remove: 1/0 grow/shrink: 0/3 up/down: 56/-217)          Total: -161 bytes

Signed-off-by: Denys Vlasenko <vda.linux at googlemail.com>
---
 findutils/xargs.c |  383 +++++++++++++++++++++++++---------------------------
 1 files changed, 184 insertions(+), 199 deletions(-)

diff --git a/findutils/xargs.c b/findutils/xargs.c
index 9988e3d..857773d 100644
--- a/findutils/xargs.c
+++ b/findutils/xargs.c
@@ -59,10 +59,6 @@
 //config:	  are not special.
 
 #include "libbb.h"
-
-/* This is a NOEXEC applet. Be very careful! */
-
-
 /* COMPAT:  SYSV version defaults size (and has a max value of) to 470.
    We try to make it as large as possible. */
 #if !defined(ARG_MAX) && defined(_SC_ARG_MAX)
@@ -72,6 +68,12 @@
 # define ARG_MAX 470
 #endif
 
+/* This is a NOEXEC applet. Be very careful! */
+
+
+//#define dbg_msg(...) bb_error_msg(__VA_ARGS__)
+#define dbg_msg(...) ((void)0)
+
 
 #ifdef TEST
 # ifndef ENABLE_FEATURE_XARGS_SUPPORT_CONFIRMATION
@@ -88,26 +90,36 @@
 # endif
 #endif
 
+
+struct globals {
+	char **args;
+	const char *eof_str;
+	int idx;
+} FIX_ALIASING;
+#define G (*(struct globals*)&bb_common_bufsiz1)
+#define INIT_G() do { } while (0)
+
+
 /*
  * This function has special algorithm.
  * Don't use fork and include to main!
  */
-static int xargs_exec(char **args)
+static int xargs_exec(void)
 {
 	int status;
 
-	status = spawn_and_wait(args);
+	status = spawn_and_wait(G.args);
 	if (status < 0) {
-		bb_simple_perror_msg(args[0]);
+		bb_simple_perror_msg(G.args[0]);
 		return errno == ENOENT ? 127 : 126;
 	}
 	if (status == 255) {
-		bb_error_msg("%s: exited with status 255; aborting", args[0]);
+		bb_error_msg("%s: exited with status 255; aborting", G.args[0]);
 		return 124;
 	}
 	if (status >= 0x180) {
 		bb_error_msg("%s: terminated by signal %d",
-			args[0], status - 0x180);
+			G.args[0], status - 0x180);
 		return 125;
 	}
 	if (status)
@@ -115,51 +127,59 @@ static int xargs_exec(char **args)
 	return 0;
 }
 
-
-typedef struct xlist_t {
-	struct xlist_t *link;
-	size_t length; /* length of xstr[] including NUL */
-	char xstr[1];
-} xlist_t;
-
 /* In POSIX/C locale isspace is only these chars: "\t\n\v\f\r" and space.
  * "\t\n\v\f\r" happen to have ASCII codes 9,10,11,12,13.
  */
 #define ISSPACE(a) ({ unsigned char xargs__isspace = (a) - 9; xargs__isspace == (' ' - 9) || xargs__isspace <= (13 - 9); })
 
+static void store_param(char *s)
+{
+	/* Grow by 256 elements at once */
+	if (!(G.idx & 0xff)) { /* G.idx == N*256 */
+		/* Enlarge, make G.args[(N+1)*256 - 1] last valid idx */
+		G.args = xrealloc(G.args, sizeof(G.args[0]) * (G.idx + 0x100));
+	}
+	G.args[G.idx++] = s;
+}
+
+/* process[0]_stdin:
+ * Read characters into buf[n_max_chars+1], and when parameter delimiter
+ * is seen, store the address of a new parameter to args[].
+ * If reading discovers that last chars do not form the complete
+ * parameter, the pointer to the first such "tail character" is returned.
+ * (buf has extra byte at the end to accomodate terminating NUL
+ * of "tail characters" string).
+ * Otherwise, the returned pointer points to NUL byte.
+ * The args[] vector is NULL-terminated.
+ * On entry, buf[] may contain some "seed chars" which are to become
+ * the beginning of the first parameter.
+ */
+
 #if ENABLE_FEATURE_XARGS_SUPPORT_QUOTES
-static xlist_t* process_stdin(xlist_t *list_arg,
-	const char *eof_str, size_t n_max_chars, char *buf)
+static char* FAST_FUNC process_stdin(int n_max_chars, int n_max_arg, char *buf)
 {
 #define NORM      0
 #define QUOTE     1
 #define BACKSLASH 2
 #define SPACE     4
-	char *s = NULL;         /* start of the word */
-	char *p = NULL;         /* pointer to end of the word */
+	char *s;                /* start of the word */
+	char *p;                /* pointer to end of the word */
 	char q = '\0';          /* quote char */
 	char state = NORM;
-	char eof_str_detected = 0;
-	size_t line_l = 0;      /* size of loaded args */
-	xlist_t *cur;
-	xlist_t *prev;
-
-	prev = cur = list_arg;
-	while (cur) {
-		prev = cur;
-		line_l += cur->length;
-		cur = cur->link;
-	}
+
+	s = buf;
+	p = s + strlen(buf);
+
+	/* "goto ret" is used instead of "break" to make control flow
+	 * more obvious: */
 
 	while (1) {
 		int c = getchar();
 		if (c == EOF) {
-			if (s)
-				goto unexpected_eof;
-			break;
+			if (p != s)
+				goto close_word;
+			goto ret;
 		}
-		if (eof_str_detected) /* skip till EOF */
-			continue;
 		if (state == BACKSLASH) {
 			state = NORM;
 			goto set;
@@ -171,15 +191,13 @@ static xlist_t* process_stdin(xlist_t *list_arg,
 			state = NORM;
 		} else { /* if (state == NORM) */
 			if (ISSPACE(c)) {
-				if (s) {
- unexpected_eof:
+				if (p != s) {
+ close_word:
 					state = SPACE;
 					c = '\0';
 					goto set;
 				}
 			} else {
-				if (s == NULL)
-					s = p = buf;
 				if (c == '\\') {
 					state = BACKSLASH;
 				} else if (c == '\'' || c == '"') {
@@ -187,8 +205,6 @@ static xlist_t* process_stdin(xlist_t *list_arg,
 					state = QUOTE;
 				} else {
  set:
-					if ((size_t)(p - buf) >= n_max_chars)
-						bb_error_msg_and_die("argument line too long");
 					*p++ = c;
 				}
 			}
@@ -199,149 +215,128 @@ static xlist_t* process_stdin(xlist_t *list_arg,
 					q == '\'' ? "single" : "double");
 			}
 			/* A full word is loaded */
-			if (eof_str) {
-				eof_str_detected = (strcmp(s, eof_str) == 0);
-			}
-			if (!eof_str_detected) {
-				size_t length = (p - buf);
-				/* Dont xzalloc - it can be quite big */
-				cur = xmalloc(offsetof(xlist_t, xstr) + length);
-				cur->link = NULL;
-				cur->length = length;
-				memcpy(cur->xstr, s, length);
-				if (prev == NULL) {
-					list_arg = cur;
-				} else {
-					prev->link = cur;
+			if (G.eof_str) {
+				if (strcmp(s, G.eof_str) == 0) {
+					while (getchar() != EOF)
+						continue;
+					p = s;
+					goto ret;
 				}
-				prev = cur;
-				line_l += length;
-				if (line_l >= n_max_chars) /* limit memory usage */
-					break;
 			}
-			s = NULL;
+			n_max_chars -= (p - s);
+			/* if (n_max_chars < 0) impossible */
+			store_param(s);
+			dbg_msg("args[]:'%s'", s);
+			s = p;
+			n_max_arg--;
+			if (n_max_arg == 0 || n_max_chars == 0) {
+				goto ret;
+			}
 			state = NORM;
+		} else /* state != SPACE */
+		if (p - s >= n_max_chars) {
+			dbg_msg("s:'%s' p-s:%d n_max_chars:%d", s, (int)(p-s), n_max_chars);
+			goto ret;
 		}
 	}
-	return list_arg;
+ ret:
+	*p = '\0';
+	store_param(NULL);
+	dbg_msg("return:'%s'", s);
+	return s;
 }
 #else
 /* The variant does not support single quotes, double quotes or backslash */
-static xlist_t* process_stdin(xlist_t *list_arg,
-		const char *eof_str, size_t n_max_chars, char *buf)
+static char* FAST_FUNC process_stdin(int n_max_chars, int n_max_arg, char *buf)
 {
-	char eof_str_detected = 0;
-	char *s = NULL;         /* start of the word */
-	char *p = NULL;         /* pointer to end of the word */
-	size_t line_l = 0;      /* size of loaded args */
-	xlist_t *cur;
-	xlist_t *prev;
-
-	prev = cur = list_arg;
-	while (cur) {
-		prev = cur;
-		line_l += cur->length;
-		cur = cur->link;
-	}
+	char *s;                /* start of the word */
+	char *p;                /* pointer to end of the word */
+
+	s = buf;
+	p = s + strlen(buf);
 
 	while (1) {
 		int c = getchar();
 		if (c == EOF) {
-			if (s == NULL)
-				break;
-		}
-		if (eof_str_detected) { /* skip till EOF */
-			continue;
+			if (p == s)
+				goto ret;
 		}
 		if (c == EOF || ISSPACE(c)) {
-			if (s == NULL)
+			if (p == s)
 				continue;
 			c = EOF;
 		}
-		if (s == NULL)
-			s = p = buf;
-		if ((size_t)(p - buf) >= n_max_chars)
-			bb_error_msg_and_die("argument line too long");
 		*p++ = (c == EOF ? '\0' : c);
 		if (c == EOF) { /* word's delimiter or EOF detected */
 			/* A full word is loaded */
-			if (eof_str) {
-				eof_str_detected = (strcmp(s, eof_str) == 0);
-			}
-			if (!eof_str_detected) {
-				size_t length = (p - buf);
-				/* Dont xzalloc - it can be quite big */
-				cur = xmalloc(offsetof(xlist_t, xstr) + length);
-				cur->link = NULL;
-				cur->length = length;
-				memcpy(cur->xstr, s, length);
-				if (prev == NULL) {
-					list_arg = cur;
-				} else {
-					prev->link = cur;
+			if (G.eof_str) {
+				if (strcmp(s, G.eof_str) == 0) {
+					while (getchar() != EOF)
+						continue;
+					p = s;
+					goto ret;
 				}
-				prev = cur;
-				line_l += length;
-				if (line_l >= n_max_chars) /* limit memory usage */
-					break;
 			}
-			s = NULL;
+			n_max_chars -= (p - s);
+			/* if (n_max_chars < 0) impossible */
+			store_param(s);
+			dbg_msg("args[]:'%s'", s);
+			s = p;
+			n_max_arg--;
+			if (n_max_arg == 0 || n_max_chars == 0) {
+				goto ret;
+			}
+		} else /* c != EOF */
+		if (p - s >= n_max_chars) {
+			goto ret;
 		}
 	}
-	return list_arg;
+ ret:
+	*p = '\0';
+	store_param(NULL);
+	dbg_msg("return:'%s'", s);
+	return s;
 }
 #endif /* FEATURE_XARGS_SUPPORT_QUOTES */
 
 #if ENABLE_FEATURE_XARGS_SUPPORT_ZERO_TERM
-static xlist_t* process0_stdin(xlist_t *list_arg,
-		const char *eof_str UNUSED_PARAM, size_t n_max_chars, char *buf)
+static char* FAST_FUNC process0_stdin(int n_max_chars, int n_max_arg, char *buf)
 {
-	char *s = NULL;         /* start of the word */
-	char *p = NULL;         /* pointer to end of the word */
-	size_t line_l = 0;      /* size of loaded args */
-	xlist_t *cur;
-	xlist_t *prev;
-
-	prev = cur = list_arg;
-	while (cur) {
-		prev = cur;
-		line_l += cur->length;
-		cur = cur->link;
-	}
+	char *s;                /* start of the word */
+	char *p;                /* pointer to end of the word */
+
+	s = buf;
+	p = s + strlen(buf);
 
 	while (1) {
 		int c = getchar();
 		if (c == EOF) {
-			if (s == NULL)
-				break;
+			if (p == s)
+				goto ret;
 			c = '\0';
 		}
-		if (s == NULL)
-			s = p = buf;
-		if ((size_t)(p - buf) >= n_max_chars)
-			bb_error_msg_and_die("argument line too long");
 		*p++ = c;
 		if (c == '\0') {   /* word's delimiter or EOF detected */
 			/* A full word is loaded */
-			size_t length = (p - buf);
-			/* Dont xzalloc - it can be quite big */
-			cur = xmalloc(offsetof(xlist_t, xstr) + length);
-			cur->link = NULL;
-			cur->length = length;
-			memcpy(cur->xstr, s, length);
-			if (prev == NULL) {
-				list_arg = cur;
-			} else {
-				prev->link = cur;
+			n_max_chars -= (p - s);
+			/* if (n_max_chars < 0) impossible */
+			store_param(s);
+			dbg_msg("args[]:'%s'", s);
+			n_max_arg--;
+			s = p;
+			if (n_max_arg == 0 || n_max_chars == 0) {
+				goto ret;
 			}
-			prev = cur;
-			line_l += length;
-			if (line_l >= n_max_chars) /* limit memory usage */
-				break;
-			s = NULL;
+		} else /* c != '\0' */
+		if (p - s >= n_max_chars) {
+			goto ret;
 		}
 	}
-	return list_arg;
+ ret:
+	*p = '\0';
+	store_param(NULL);
+	dbg_msg("return:'%s'", s);
+	return s;
 }
 #endif /* FEATURE_XARGS_SUPPORT_ZERO_TERM */
 
@@ -397,28 +392,30 @@ enum {
 int xargs_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
 int xargs_main(int argc, char **argv)
 {
-	xlist_t *list = NULL;
+	int i;
 	int child_error = 0;
 	char *max_args;
 	char *max_chars;
 	char *buf;
-	int n_max_arg;
-	const char *eof_str = NULL;
 	unsigned opt;
-	size_t n_max_chars;
+	int n_max_chars;
+	int n_max_arg;
 #if ENABLE_FEATURE_XARGS_SUPPORT_ZERO_TERM
-	xlist_t* (*read_args)(xlist_t*, const char*, size_t, char*) = process_stdin;
+	char* FAST_FUNC (*read_args)(int, int, char*) = process_stdin;
 #else
 #define read_args process_stdin
 #endif
 
-	opt = getopt32(argv, OPTION_STR, &max_args, &max_chars, &eof_str, &eof_str);
+	INIT_G();
+
+	G.eof_str = NULL;
+	opt = getopt32(argv, OPTION_STR, &max_args, &max_chars, &G.eof_str, &G.eof_str);
 
 	/* -E ""? You may wonder why not just omit -E?
 	 * This is used for portability:
 	 * old xargs was using "_" as default for -E / -e */
-	if ((opt & OPT_EOF_STRING1) && eof_str[0] == '\0')
-		eof_str = NULL;
+	if ((opt & OPT_EOF_STRING1) && G.eof_str[0] == '\0')
+		G.eof_str = NULL;
 
 	if (opt & OPT_ZEROTERM)
 		IF_FEATURE_XARGS_SUPPORT_ZERO_TERM(read_args = process0_stdin);
@@ -451,93 +448,81 @@ int xargs_main(int argc, char **argv)
 		n_max_chars = 20 * 1024;
 
 	if (opt & OPT_UPTO_SIZE) {
-		int i;
 		size_t n_chars = 0;
-		n_max_chars = xatoul_range(max_chars, 1, INT_MAX);
+		n_max_chars = xatou_range(max_chars, 1, INT_MAX);
 		for (i = 0; argv[i]; i++) {
 			n_chars += strlen(argv[i]) + 1;
 		}
 		n_max_chars -= n_chars;
-		if ((ssize_t)n_max_chars <= 0) {
+		if (n_max_chars <= 0) {
 			bb_error_msg_and_die("can't fit single argument within argument list size limit");
 		}
 	}
 
-	buf = xmalloc(n_max_chars);
+	buf = xzalloc(n_max_chars + 1);
 
 	if (opt & OPT_UPTO_NUMBER) {
-		n_max_arg = xatoul_range(max_args, 1, INT_MAX);
+		n_max_arg = xatou_range(max_args, 1, INT_MAX);
 		if (n_max_arg < n_max_chars)
 			goto skip;
 	}
 	n_max_arg = n_max_chars;
  skip:
 
-	while ((list = read_args(list, eof_str, n_max_chars, buf)) != NULL
-	 ||    !(opt & OPT_NO_EMPTY)
-	) {
-		char **args;
-		xlist_t *cur;
-		int i, n;
-		size_t n_chars;
+	/* Allocate pointers for execvp */
+	/* We can statically allocate (argc + n_max_arg + 1) elements
+	 * and do not bother with resizing args[], but on 64-bit machines
+	 * this results in args[] vector which is ~8 times bigger
+	 * than n_max_chars! That is, with n_max_chars == 20k,
+	 * args[] will take 160k (!), which will most likely be
+	 * almost entirely unused.
+	 */
+	/* See store_param() for matching 256-step growth logic */
+	G.args = xmalloc(sizeof(G.args[0]) * ((argc + 0xff) & ~0xff));
 
-		opt |= OPT_NO_EMPTY;
+	/* Store the command to be executed, part 1 */
+	for (i = 0; argv[i]; i++)
+		G.args[i] = argv[i];
 
-		/* take args from list, not exceeding arg and char limits */
-		n_chars = 0;
-		n = 0;
-		for (cur = list; cur; cur = cur->link) {
-			n_chars += cur->length;
-			if (n_chars > n_max_chars || n >= n_max_arg) {
-				if (opt & OPT_TERMINATE)
-					bb_error_msg_and_die("argument list too long");
-				break;
-			}
-			n++;
-		}
+	while (1) {
+		char *rem;
+
+		G.idx = argc;
+		rem = read_args(n_max_chars, n_max_arg, buf);
 
-		/* allocate pointers for execvp */
-		args = xzalloc(sizeof(args[0]) * (argc + n + 1));
-
-		/* store the command to be executed
-		 * (taken from the command line) */
-		for (i = 0; argv[i]; i++)
-			args[i] = argv[i];
-		/* (taken from stdin) */
-		for (cur = list; n; cur = cur->link) {
-			args[i++] = cur->xstr;
-			n--;
+		if (!G.args[argc]) {
+			if (*rem != '\0')
+				bb_error_msg_and_die("argument line too long");
+			if (opt & OPT_NO_EMPTY)
+				break;
 		}
+		opt |= OPT_NO_EMPTY;
 
 		if (opt & (OPT_INTERACTIVE | OPT_VERBOSE)) {
-			for (i = 0; args[i]; i++) {
+			for (i = 0; G.args[i]; i++) {
 				if (i)
 					bb_putchar_stderr(' ');
-				fputs(args[i], stderr);
+				fputs(G.args[i], stderr);
 			}
 			if (!(opt & OPT_INTERACTIVE))
 				bb_putchar_stderr('\n');
 		}
 
 		if (!(opt & OPT_INTERACTIVE) || xargs_ask_confirmation()) {
-			child_error = xargs_exec(args);
-		}
-
-		/* remove list elements which we consumed */
-		for (i = argc; args[i]; i++) {
-			cur = list;
-			list = list->link;
-			free(cur);
+			child_error = xargs_exec();
 		}
-		free(args);
 
 		if (child_error > 0 && child_error != 123) {
 			break;
 		}
+
+		overlapping_strcpy(buf, rem);
 	} /* while */
 
-	if (ENABLE_FEATURE_CLEAN_UP)
+	if (ENABLE_FEATURE_CLEAN_UP) {
+		free(G.args);
 		free(buf);
+	}
 
 	return child_error;
 }
-- 
1.7.1



More information about the busybox-cvs mailing list