[PATCH 1/1] xargs: support -P <N> (parallel execution)

walter harms wharms at bfs.de
Fri Aug 25 08:25:14 UTC 2017



Am 24.08.2017 13:19, schrieb Denys Vlasenko:
> On Wed, Aug 23, 2017 at 5:25 PM, Johannes Schindelin
> <johannes.schindelin at gmx.de> wrote:
>> The GNU variant of xargs supports a special, non-POSIX extension to run
>> processes in parallel, triggered by the -P <N> option.
>>
>> This feature comes in handy e.g. when running Git's test suite outside
>> of the development environment using only BusyBox (which does not
>> support `make` nor `prove`, Git's go-to solutions for running the test
>> suite in parallel).
>>
>> It so just happens that this patch also addresses the feature requested
>> in https://bugs.busybox.net/show_bug.cgi?id=9511.
>>
>> Signed-off-by: Johannes Schindelin <johannes.schindelin at gmx.de>
>> ---
>>  findutils/xargs.c | 112 +++++++++++++++++++++++++++++++++++++++++++++++++++++-
>>  1 file changed, 111 insertions(+), 1 deletion(-)
> 
> function                                             old     new   delta
> xargs_main                                           787    1174    +387
> packed_usage                                       31757   31769     +12
> ------------------------------------------------------------------------------
> (add/remove: 0/0 grow/shrink: 2/0 up/down: 399/0)             Total: 399 bytes
> 
> 
> How about this version:
> 
> function                                             old     new   delta
> xargs_exec                                             -     294    +294
> packed_usage                                       31757   31772     +15
> xargs_main                                           787     719     -68
> ------------------------------------------------------------------------------
> (add/remove: 1/0 grow/shrink: 1/1 up/down: 309/-68)           Total: 241 bytes
> 
> 
> diff --git a/findutils/xargs.c b/findutils/xargs.c
> index b4e3db0..77e01ef 100644
> --- a/findutils/xargs.c
> +++ b/findutils/xargs.c
> @@ -59,6 +59,11 @@
>  //config:    depends on XARGS
>  //config:    help
>  //config:    Support -I STR and -i[STR] options.
> +//config:
> +//config:config FEATURE_XARGS_SUPPORT_PARALLEL
> +//config:    bool "Enable -P N: processes to run in parallel"
> +//config:    default y
> +//config:    depends on XARGS
> 
>  //applet:IF_XARGS(APPLET_NOEXEC(xargs, xargs, BB_DIR_USR_BIN,
> BB_SUID_DROP, xargs))
> 
> @@ -99,38 +104,121 @@ struct globals {
>  #endif
>      const char *eof_str;
>      int idx;
> +#if ENABLE_FEATURE_XARGS_SUPPORT_PARALLEL
> +    int running_procs;
> +    int max_procs;
> +#endif
> +    smalluint xargs_exitcode;
>  } FIX_ALIASING;
>  #define G (*(struct globals*)bb_common_bufsiz1)
>  #define INIT_G() do { \
>      setup_common_bufsiz(); \
>      G.eof_str = NULL; /* need to clear by hand because we are NOEXEC
> applet */ \
>      G.idx = 0; \
> +    IF_FEATURE_XARGS_SUPPORT_PARALLEL(G.running_procs = 0;) \
> +    IF_FEATURE_XARGS_SUPPORT_PARALLEL(G.max_procs = 1;) \
> +    G.xargs_exitcode = 0; \
>      IF_FEATURE_XARGS_SUPPORT_REPL_STR(G.repl_str = "{}";) \
>      IF_FEATURE_XARGS_SUPPORT_REPL_STR(G.eol_ch = '\n';) \
>  } while (0)
> 
> 
> +/*
> + * Returns 0 if xargs should continue (but may set G.xargs_exitcode to 123).
> + * Else sets G.xargs_exitcode to error code and returns nonzero.
> + *
> + * If G.max_procs == 0, performs final waitpid() loop for all children.
> + */
>  static int xargs_exec(void)
>  {
>      int status;
> 
> +#if !ENABLE_FEATURE_XARGS_SUPPORT_PARALLEL
>      status = spawn_and_wait(G.args);
> +#else
> +    if (G.max_procs == 1) {
> +        status = spawn_and_wait(G.args);
> +    } else {


> +        pid_t pid;
> +        int wstat;
> + again:
> +        if (G.running_procs >= G.max_procs)
> +            pid = safe_waitpid(-1, &wstat, 0);
> +        else
> +            pid = wait_any_nohang(&wstat);
> +        if (pid > 0) {
> +            /* We may have children we don't know about:
> +             * sh -c 'sleep 1 & exec xargs ...'
> +             * Do not make G.running_procs go negative.
> +             */
> +            if (G.running_procs != 0)
> +                G.running_procs--;
> +            status = WIFSIGNALED(wstat)
> +                ? 0x180 + WTERMSIG(wstat)
> +                : WEXITSTATUS(wstat);
> +            if (status > 0 && status < 255) {
> +                /* See below why 123 does not abort */
> +                G.xargs_exitcode = 123;
> +                status = 0;
> +            }
> +            if (status == 0)
> +                goto again; /* maybe we have more children? */
> +            /* else: "bad" status, will bail out */
> +        } else if (G.max_procs != 0) {
> +            /* Not in final waitpid() loop,
> +             * and G.running_procs < G.max_procs: start more procs
> +             */
> +            status = spawn(G.args);
> +            /* here "status" actually holds pid, or -1 */
> +            if (status > 0) {
> +                G.running_procs++;
> +                status = 0;
> +            }
> +            /* else: status == -1 (failed to fork or exec) */
> +        } else {
> +            /* final waitpid() loop: must be ECHILD "no more children" */
> +            status = 0;
> +        }

maybe you can make it a function returning status ?
that would improve readability.

re,
 wh


> +    }



> +#endif
> +    /* Manpage:
> +     * """xargs exits with the following status:
> +     * 0 if it succeeds
> +     * 123 if any invocation of the command exited with status 1-125
> +     * 124 if the command exited with status 255
> +     *     ("""If any invocation of the command exits with a status of 255,
> +     *     xargs will stop immediately without reading any further input.
> +     *     An error message is issued on stderr when this happens.""")
> +     * 125 if the command is killed by a signal
> +     * 126 if the command cannot be run
> +     * 127 if the command is not found
> +     * 1 if some other error occurred."""
> +     */
>      if (status < 0) {
>          bb_simple_perror_msg(G.args[0]);
> -        return errno == ENOENT ? 127 : 126;
> -    }
> -    if (status == 255) {
> -        bb_error_msg("%s: exited with status 255; aborting", G.args[0]);
> -        return 124;
> +        status = (errno == ENOENT) ? 127 : 126;
>      }
> -    if (status >= 0x180) {
> +    else if (status >= 0x180) {
>          bb_error_msg("'%s' terminated by signal %d",
>              G.args[0], status - 0x180);
> -        return 125;
> +        status = 125;
>      }
> -    if (status)
> -        return 123;
> -    return 0;
> +    else if (status != 0) {
> +        if (status == 255) {
> +            bb_error_msg("%s: exited with status 255; aborting", G.args[0]);
> +            return 124;
> +        }
> +        /* "123 if any invocation of the command exited with status 1-125"
> +         * This implies that nonzero exit code is remembered,
> +         * but does not cause xargs to stop: we return 0.
> +         */
> +        G.xargs_exitcode = 123;
> +        status = 0;
> +    }
> +
> +    if (status != 0)
> +        G.xargs_exitcode = status;
> +    return status;
>  }
> 
>  /* In POSIX/C locale isspace is only these chars: "\t\n\v\f\r" and space.
> @@ -436,6 +524,9 @@ static int xargs_ask_confirmation(void)
>  //usage:    IF_FEATURE_XARGS_SUPPORT_REPL_STR(
>  //usage:     "\n    -I STR    Replace STR within PROG ARGS with input line"
>  //usage:    )
> +//usage:    IF_FEATURE_XARGS_SUPPORT_PARALLEL(
> +//usage:     "\n    -P N    Run up to N PROGs in parallel"
> +//usage:    )
>  //usage:    IF_FEATURE_XARGS_SUPPORT_TERMOPT(
>  //usage:     "\n    -x    Exit if size is exceeded"
>  //usage:    )
> @@ -473,14 +564,14 @@ enum {
>      IF_FEATURE_XARGS_SUPPORT_CONFIRMATION("p") \
>      IF_FEATURE_XARGS_SUPPORT_TERMOPT(     "x") \
>      IF_FEATURE_XARGS_SUPPORT_ZERO_TERM(   "0") \
> -    IF_FEATURE_XARGS_SUPPORT_REPL_STR(    "I:i::")
> +    IF_FEATURE_XARGS_SUPPORT_REPL_STR(    "I:i::") \
> +    IF_FEATURE_XARGS_SUPPORT_PARALLEL(    "P:+")
> 
>  int xargs_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
>  int xargs_main(int argc UNUSED_PARAM, char **argv)
>  {
>      int initial_idx;
>      int i;
> -    int child_error = 0;
>      char *max_args;
>      char *max_chars;
>      char *buf;
> @@ -500,8 +591,14 @@ int xargs_main(int argc UNUSED_PARAM, char **argv)
>          "no-run-if-empty\0" No_argument "r",
>          &max_args, &max_chars, &G.eof_str, &G.eof_str
>          IF_FEATURE_XARGS_SUPPORT_REPL_STR(, &G.repl_str, &G.repl_str)
> +        IF_FEATURE_XARGS_SUPPORT_PARALLEL(, &G.max_procs)
>      );
> 
> +#if ENABLE_FEATURE_XARGS_SUPPORT_PARALLEL
> +    if (G.max_procs <= 0) /* -P0 means "run lots of them" */
> +        G.max_procs = 100; /* let's not go crazy high */
> +#endif
> +
>      /* -E ""? You may wonder why not just omit -E?
>       * This is used for portability:
>       * old xargs was using "_" as default for -E / -e */
> @@ -620,11 +717,8 @@ int xargs_main(int argc UNUSED_PARAM, char **argv)
>          }
> 
>          if (!(opt & OPT_INTERACTIVE) || xargs_ask_confirmation()) {
> -            child_error = xargs_exec();
> -        }
> -
> -        if (child_error > 0 && child_error != 123) {
> -            break;
> +            if (xargs_exec() != 0)
> +                break; /* G.xargs_exitcode is set by xargs_exec() */
>          }
> 
>          overlapping_strcpy(buf, rem);
> @@ -635,7 +729,12 @@ int xargs_main(int argc UNUSED_PARAM, char **argv)
>          free(buf);
>      }
> 
> -    return child_error;
> +#if ENABLE_FEATURE_XARGS_SUPPORT_PARALLEL
> +    G.max_procs = 0;
> +    xargs_exec(); /* final waitpid() loop */
> +#endif
> +
> +    return G.xargs_exitcode;
>  }
> _______________________________________________
> busybox mailing list
> busybox at busybox.net
> http://lists.busybox.net/mailman/listinfo/busybox
> 


More information about the busybox mailing list