[PATCH] libbb: in xmalloc_fgetline, use getline if enabled
tito
farmatito at tiscali.it
Sun May 28 19:53:39 UTC 2023
On Sun, 28 May 2023 19:01:52 +0200
Elvira Khabirova <lineprinter0 at gmail.com> wrote:
> When xmalloc_fgetline was introduced, getline(3) was a GNU extension
> and was not necessarily implemented in libcs. Since then,
> it's become a part of POSIX.1-2008 and is now implemented in
> glibc, uClibc-ng, and musl.
>
> Previously, xmalloc_fgetline directly called bb_get_chunk_from_file.
> The issue with that approach is that bb_get_chunk_from_file stops
> both at \n and at \0. This introduces unexpected behavior when tools
> are presented with inputs containing \0. For example, in a comparison
> of GNU core utils cut with busybox cut:
>
> % echo -ne '\x00\x01\x00\x03\x04\x05\x06' | cut -b1-3 | hexdump -C
> 00000000 00 01 00 0a |....|
> 00000004
> % echo -ne '\x00\x01\x00\x03\x04\x05\x06' | ./busybox cut -b1-3 | hexdump -C
> 00000000 0a 01 0a 03 04 05 0a |.......|
> 00000007
>
> Here, due to bb_get_chunk_from_file splitting lines by \n and \0,
> busybox cut interprets the input as three lines instead of one.
>
> Also, since xmalloc_fgetline never returned strings with embedded \0,
> cut_file expects strlen of the returned string to match the string's
> total length.
>
> To fix the behavior of the cut utility, introduce xmalloc_fgetline_n,
> that fully matches the behavior of xmalloc_fgetline,
> but also returns the amount of bytes read.
>
> Add a configuration option, FEATURE_USE_GETLINE, and enable it
> by default. Use getline in xmalloc_fgetline_n if the feature is enabled.
>
> Change the behavior of cut_file to use the values returned
> by the new function instead of calling strlen.
>
> Call xmalloc_fgetline_n from xmalloc_fgetline.
>
> Add a test for the previously mentioned case.
>
> With FEATURE_USE_GETLINE:
>
> function old new delta
> xmalloc_fgetline_n - 173 +173
> xmalloc_fgetline 85 58 -27
> cut_main 1406 1367 -39
> ------------------------------------------------------------------------------
> (add/remove: 1/0 grow/shrink: 0/2 up/down: 173/-66) Total: 107 bytes
>
> Without FEATURE_USE_GETLINE:
>
> function old new delta
> xmalloc_fgetline_n - 41 +41
> xmalloc_fgetline 85 58 -27
> cut_main 1406 1367 -39
> ------------------------------------------------------------------------------
> (add/remove: 1/0 grow/shrink: 0/2 up/down: 41/-66) Total: -25 bytes
>
> Fixes: https://bugs.busybox.net/show_bug.cgi?id=15276
> Signed-off-by: Elvira Khabirova <lineprinter0 at gmail.com>
> ---
> coreutils/cut.c | 4 ++--
> include/libbb.h | 2 ++
> libbb/Config.src | 11 +++++++++++
> libbb/get_line_from_file.c | 37 ++++++++++++++++++++++++++++++++-----
> testsuite/cut.tests | 2 ++
> 5 files changed, 49 insertions(+), 7 deletions(-)
>
> diff --git a/coreutils/cut.c b/coreutils/cut.c
> index 55bdd9386..7c87467ca 100644
> --- a/coreutils/cut.c
> +++ b/coreutils/cut.c
> @@ -89,6 +89,7 @@ static void cut_file(FILE *file, const char *delim, const char *odelim,
> const struct cut_list *cut_lists, unsigned nlists)
> {
> char *line;
> + size_t linelen = 0;
> unsigned linenum = 0; /* keep these zero-based to be consistent */
> regex_t reg;
> int spos, shoe = option_mask32 & CUT_OPT_REGEX_FLGS;
> @@ -96,10 +97,9 @@ static void cut_file(FILE *file, const char *delim, const char *odelim,
> if (shoe) xregcomp(®, delim, REG_EXTENDED);
>
> /* go through every line in the file */
> - while ((line = xmalloc_fgetline(file)) != NULL) {
> + while ((line = xmalloc_fgetline_n(file, &linelen)) != NULL) {
>
> /* set up a list so we can keep track of what's been printed */
> - int linelen = strlen(line);
> char *printed = xzalloc(linelen + 1);
> char *orig_line = line;
> unsigned cl_pos = 0;
> diff --git a/include/libbb.h b/include/libbb.h
> index 6191debb1..73d16647a 100644
> --- a/include/libbb.h
> +++ b/include/libbb.h
> @@ -1048,6 +1048,8 @@ extern char *xmalloc_fgetline_str(FILE *file, const char *terminating_string) FA
> extern char *xmalloc_fgets(FILE *file) FAST_FUNC RETURNS_MALLOC;
> /* Chops off '\n' from the end, unlike fgets: */
> extern char *xmalloc_fgetline(FILE *file) FAST_FUNC RETURNS_MALLOC;
> +/* Same as xmalloc_fgetline but returns number of bytes read: */
> +extern char* xmalloc_fgetline_n(FILE *file, size_t *n) FAST_FUNC RETURNS_MALLOC;
> /* Same, but doesn't try to conserve space (may have some slack after the end) */
> /* extern char *xmalloc_fgetline_fast(FILE *file) FAST_FUNC RETURNS_MALLOC; */
>
> diff --git a/libbb/Config.src b/libbb/Config.src
> index b980f19a9..7a37929d6 100644
> --- a/libbb/Config.src
> +++ b/libbb/Config.src
> @@ -147,6 +147,17 @@ config MONOTONIC_SYSCALL
> will be used instead (which gives wrong results if date/time
> is reset).
>
> +config FEATURE_USE_GETLINE
> + bool "Use getline library function"
> + default y
> + help
> + When enabled, busybox will use the libc getline() function
> + instead of getc loops to read data from files.
> + Using getline provides better behavior when utilities
> + encounter NUL bytes in their inputs.
> + getline() was originally a GNU extension, but now is a part
> + of POSIX.1-2008 and is implemented in contemporary libcs.
> +
> config IOCTL_HEX2STR_ERROR
> bool "Use ioctl names rather than hex values in error messages"
> default y
> diff --git a/libbb/get_line_from_file.c b/libbb/get_line_from_file.c
> index 903ff1fb6..096bbd66f 100644
> --- a/libbb/get_line_from_file.c
> +++ b/libbb/get_line_from_file.c
> @@ -12,9 +12,9 @@
>
> char* FAST_FUNC bb_get_chunk_from_file(FILE *file, size_t *end)
> {
> - int ch;
> - size_t idx = 0;
> char *linebuf = NULL;
> + size_t idx = 0;
> + int ch;
>
> while ((ch = getc(file)) != EOF) {
> /* grow the line buffer as necessary */
> @@ -55,10 +55,37 @@ char* FAST_FUNC xmalloc_fgets(FILE *file)
> char* FAST_FUNC xmalloc_fgetline(FILE *file)
> {
> size_t i;
> - char *c = bb_get_chunk_from_file(file, &i);
>
> - if (i && c[--i] == '\n')
> - c[i] = '\0';
> + return xmalloc_fgetline_n(file, &i);
> +}
> +
> +/* Get line. Remove trailing \n. Return the number of bytes read. */
> +char* FAST_FUNC xmalloc_fgetline_n(FILE *file, size_t *n)
> +{
> + char *c = NULL;
> +#if ENABLE_FEATURE_USE_GETLINE
> + size_t idx = 0;
> + ssize_t nread;
> +
> + nread = getline(&c, &idx, file);
> + if (nread < 0) {
> + if (errno == ENOMEM)
> + bb_die_memory_exhausted();
> + free(c);
> + c = NULL;
> + *n = 0;
> + } else {
> + *n = nread;
> + }
> +#else
> + c = bb_get_chunk_from_file(file, n);
> +#endif
> +
> + if (*n) {
> + if (c[*n - 1] == '\n') {
> + c[--*n] = '\0';
> + }
> + }
>
> return c;
> }
> diff --git a/testsuite/cut.tests b/testsuite/cut.tests
> index 2458c019c..0fd731688 100755
> --- a/testsuite/cut.tests
> +++ b/testsuite/cut.tests
> @@ -81,4 +81,6 @@ SKIP=
> testing "cut empty field" "cut -d ':' -f 1-3" "a::b\n" "" "a::b\n"
> testing "cut empty field 2" "cut -d ':' -f 3-5" "b::c\n" "" "a::b::c:d\n"
>
> +testing "cut with embedded NUL" "cut -c 1-3" "\x00\x01\x00\n" "" "\x00\x01\x00\x03\x04\x05\x06"
> +
> exit $FAILCOUNT
Hi,
what is this change for?
- int ch;
- size_t idx = 0;
char *linebuf = NULL;
+ size_t idx = 0;
+ int ch;
Ciao,
Tito
More information about the busybox
mailing list