[PATCH 1/1] xargs: support -P <N> (parallel execution)
walter harms
wharms at bfs.de
Fri Aug 25 08:25:14 UTC 2017
Am 24.08.2017 13:19, schrieb Denys Vlasenko:
> On Wed, Aug 23, 2017 at 5:25 PM, Johannes Schindelin
> <johannes.schindelin at gmx.de> wrote:
>> The GNU variant of xargs supports a special, non-POSIX extension to run
>> processes in parallel, triggered by the -P <N> option.
>>
>> This feature comes in handy e.g. when running Git's test suite outside
>> of the development environment using only BusyBox (which does not
>> support `make` nor `prove`, Git's go-to solutions for running the test
>> suite in parallel).
>>
>> It so just happens that this patch also addresses the feature requested
>> in https://bugs.busybox.net/show_bug.cgi?id=9511.
>>
>> Signed-off-by: Johannes Schindelin <johannes.schindelin at gmx.de>
>> ---
>> findutils/xargs.c | 112 +++++++++++++++++++++++++++++++++++++++++++++++++++++-
>> 1 file changed, 111 insertions(+), 1 deletion(-)
>
> function old new delta
> xargs_main 787 1174 +387
> packed_usage 31757 31769 +12
> ------------------------------------------------------------------------------
> (add/remove: 0/0 grow/shrink: 2/0 up/down: 399/0) Total: 399 bytes
>
>
> How about this version:
>
> function old new delta
> xargs_exec - 294 +294
> packed_usage 31757 31772 +15
> xargs_main 787 719 -68
> ------------------------------------------------------------------------------
> (add/remove: 1/0 grow/shrink: 1/1 up/down: 309/-68) Total: 241 bytes
>
>
> diff --git a/findutils/xargs.c b/findutils/xargs.c
> index b4e3db0..77e01ef 100644
> --- a/findutils/xargs.c
> +++ b/findutils/xargs.c
> @@ -59,6 +59,11 @@
> //config: depends on XARGS
> //config: help
> //config: Support -I STR and -i[STR] options.
> +//config:
> +//config:config FEATURE_XARGS_SUPPORT_PARALLEL
> +//config: bool "Enable -P N: processes to run in parallel"
> +//config: default y
> +//config: depends on XARGS
>
> //applet:IF_XARGS(APPLET_NOEXEC(xargs, xargs, BB_DIR_USR_BIN,
> BB_SUID_DROP, xargs))
>
> @@ -99,38 +104,121 @@ struct globals {
> #endif
> const char *eof_str;
> int idx;
> +#if ENABLE_FEATURE_XARGS_SUPPORT_PARALLEL
> + int running_procs;
> + int max_procs;
> +#endif
> + smalluint xargs_exitcode;
> } FIX_ALIASING;
> #define G (*(struct globals*)bb_common_bufsiz1)
> #define INIT_G() do { \
> setup_common_bufsiz(); \
> G.eof_str = NULL; /* need to clear by hand because we are NOEXEC
> applet */ \
> G.idx = 0; \
> + IF_FEATURE_XARGS_SUPPORT_PARALLEL(G.running_procs = 0;) \
> + IF_FEATURE_XARGS_SUPPORT_PARALLEL(G.max_procs = 1;) \
> + G.xargs_exitcode = 0; \
> IF_FEATURE_XARGS_SUPPORT_REPL_STR(G.repl_str = "{}";) \
> IF_FEATURE_XARGS_SUPPORT_REPL_STR(G.eol_ch = '\n';) \
> } while (0)
>
>
> +/*
> + * Returns 0 if xargs should continue (but may set G.xargs_exitcode to 123).
> + * Else sets G.xargs_exitcode to error code and returns nonzero.
> + *
> + * If G.max_procs == 0, performs final waitpid() loop for all children.
> + */
> static int xargs_exec(void)
> {
> int status;
>
> +#if !ENABLE_FEATURE_XARGS_SUPPORT_PARALLEL
> status = spawn_and_wait(G.args);
> +#else
> + if (G.max_procs == 1) {
> + status = spawn_and_wait(G.args);
> + } else {
> + pid_t pid;
> + int wstat;
> + again:
> + if (G.running_procs >= G.max_procs)
> + pid = safe_waitpid(-1, &wstat, 0);
> + else
> + pid = wait_any_nohang(&wstat);
> + if (pid > 0) {
> + /* We may have children we don't know about:
> + * sh -c 'sleep 1 & exec xargs ...'
> + * Do not make G.running_procs go negative.
> + */
> + if (G.running_procs != 0)
> + G.running_procs--;
> + status = WIFSIGNALED(wstat)
> + ? 0x180 + WTERMSIG(wstat)
> + : WEXITSTATUS(wstat);
> + if (status > 0 && status < 255) {
> + /* See below why 123 does not abort */
> + G.xargs_exitcode = 123;
> + status = 0;
> + }
> + if (status == 0)
> + goto again; /* maybe we have more children? */
> + /* else: "bad" status, will bail out */
> + } else if (G.max_procs != 0) {
> + /* Not in final waitpid() loop,
> + * and G.running_procs < G.max_procs: start more procs
> + */
> + status = spawn(G.args);
> + /* here "status" actually holds pid, or -1 */
> + if (status > 0) {
> + G.running_procs++;
> + status = 0;
> + }
> + /* else: status == -1 (failed to fork or exec) */
> + } else {
> + /* final waitpid() loop: must be ECHILD "no more children" */
> + status = 0;
> + }
maybe you can make it a function returning status ?
that would improve readability.
re,
wh
> + }
> +#endif
> + /* Manpage:
> + * """xargs exits with the following status:
> + * 0 if it succeeds
> + * 123 if any invocation of the command exited with status 1-125
> + * 124 if the command exited with status 255
> + * ("""If any invocation of the command exits with a status of 255,
> + * xargs will stop immediately without reading any further input.
> + * An error message is issued on stderr when this happens.""")
> + * 125 if the command is killed by a signal
> + * 126 if the command cannot be run
> + * 127 if the command is not found
> + * 1 if some other error occurred."""
> + */
> if (status < 0) {
> bb_simple_perror_msg(G.args[0]);
> - return errno == ENOENT ? 127 : 126;
> - }
> - if (status == 255) {
> - bb_error_msg("%s: exited with status 255; aborting", G.args[0]);
> - return 124;
> + status = (errno == ENOENT) ? 127 : 126;
> }
> - if (status >= 0x180) {
> + else if (status >= 0x180) {
> bb_error_msg("'%s' terminated by signal %d",
> G.args[0], status - 0x180);
> - return 125;
> + status = 125;
> }
> - if (status)
> - return 123;
> - return 0;
> + else if (status != 0) {
> + if (status == 255) {
> + bb_error_msg("%s: exited with status 255; aborting", G.args[0]);
> + return 124;
> + }
> + /* "123 if any invocation of the command exited with status 1-125"
> + * This implies that nonzero exit code is remembered,
> + * but does not cause xargs to stop: we return 0.
> + */
> + G.xargs_exitcode = 123;
> + status = 0;
> + }
> +
> + if (status != 0)
> + G.xargs_exitcode = status;
> + return status;
> }
>
> /* In POSIX/C locale isspace is only these chars: "\t\n\v\f\r" and space.
> @@ -436,6 +524,9 @@ static int xargs_ask_confirmation(void)
> //usage: IF_FEATURE_XARGS_SUPPORT_REPL_STR(
> //usage: "\n -I STR Replace STR within PROG ARGS with input line"
> //usage: )
> +//usage: IF_FEATURE_XARGS_SUPPORT_PARALLEL(
> +//usage: "\n -P N Run up to N PROGs in parallel"
> +//usage: )
> //usage: IF_FEATURE_XARGS_SUPPORT_TERMOPT(
> //usage: "\n -x Exit if size is exceeded"
> //usage: )
> @@ -473,14 +564,14 @@ enum {
> IF_FEATURE_XARGS_SUPPORT_CONFIRMATION("p") \
> IF_FEATURE_XARGS_SUPPORT_TERMOPT( "x") \
> IF_FEATURE_XARGS_SUPPORT_ZERO_TERM( "0") \
> - IF_FEATURE_XARGS_SUPPORT_REPL_STR( "I:i::")
> + IF_FEATURE_XARGS_SUPPORT_REPL_STR( "I:i::") \
> + IF_FEATURE_XARGS_SUPPORT_PARALLEL( "P:+")
>
> int xargs_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
> int xargs_main(int argc UNUSED_PARAM, char **argv)
> {
> int initial_idx;
> int i;
> - int child_error = 0;
> char *max_args;
> char *max_chars;
> char *buf;
> @@ -500,8 +591,14 @@ int xargs_main(int argc UNUSED_PARAM, char **argv)
> "no-run-if-empty\0" No_argument "r",
> &max_args, &max_chars, &G.eof_str, &G.eof_str
> IF_FEATURE_XARGS_SUPPORT_REPL_STR(, &G.repl_str, &G.repl_str)
> + IF_FEATURE_XARGS_SUPPORT_PARALLEL(, &G.max_procs)
> );
>
> +#if ENABLE_FEATURE_XARGS_SUPPORT_PARALLEL
> + if (G.max_procs <= 0) /* -P0 means "run lots of them" */
> + G.max_procs = 100; /* let's not go crazy high */
> +#endif
> +
> /* -E ""? You may wonder why not just omit -E?
> * This is used for portability:
> * old xargs was using "_" as default for -E / -e */
> @@ -620,11 +717,8 @@ int xargs_main(int argc UNUSED_PARAM, char **argv)
> }
>
> if (!(opt & OPT_INTERACTIVE) || xargs_ask_confirmation()) {
> - child_error = xargs_exec();
> - }
> -
> - if (child_error > 0 && child_error != 123) {
> - break;
> + if (xargs_exec() != 0)
> + break; /* G.xargs_exitcode is set by xargs_exec() */
> }
>
> overlapping_strcpy(buf, rem);
> @@ -635,7 +729,12 @@ int xargs_main(int argc UNUSED_PARAM, char **argv)
> free(buf);
> }
>
> - return child_error;
> +#if ENABLE_FEATURE_XARGS_SUPPORT_PARALLEL
> + G.max_procs = 0;
> + xargs_exec(); /* final waitpid() loop */
> +#endif
> +
> + return G.xargs_exitcode;
> }
> _______________________________________________
> busybox mailing list
> busybox at busybox.net
> http://lists.busybox.net/mailman/listinfo/busybox
>
More information about the busybox
mailing list