[PATCH] libbb: in xmalloc_fgetline, use getline if enabled

Elvira Khabirova lineprinter0 at gmail.com
Sun May 28 17:01:52 UTC 2023


When xmalloc_fgetline was introduced, getline(3) was a GNU extension
and was not necessarily implemented in libcs. Since then,
it's become a part of POSIX.1-2008 and is now implemented in
glibc, uClibc-ng, and musl.

Previously, xmalloc_fgetline directly called bb_get_chunk_from_file.
The issue with that approach is that bb_get_chunk_from_file stops
both at \n and at \0. This introduces unexpected behavior when tools
are presented with inputs containing \0. For example, in a comparison
of GNU core utils cut with busybox cut:

% echo -ne '\x00\x01\x00\x03\x04\x05\x06' | cut -b1-3 | hexdump -C
00000000  00 01 00 0a                                       |....|
00000004
% echo -ne '\x00\x01\x00\x03\x04\x05\x06' | ./busybox cut -b1-3 | hexdump -C
00000000  0a 01 0a 03 04 05 0a                              |.......|
00000007

Here, due to bb_get_chunk_from_file splitting lines by \n and \0,
busybox cut interprets the input as three lines instead of one.

Also, since xmalloc_fgetline never returned strings with embedded \0,
cut_file expects strlen of the returned string to match the string's
total length.

To fix the behavior of the cut utility, introduce xmalloc_fgetline_n,
that fully matches the behavior of xmalloc_fgetline,
but also returns the amount of bytes read.

Add a configuration option, FEATURE_USE_GETLINE, and enable it
by default. Use getline in xmalloc_fgetline_n if the feature is enabled.

Change the behavior of cut_file to use the values returned
by the new function instead of calling strlen.

Call xmalloc_fgetline_n from xmalloc_fgetline.

Add a test for the previously mentioned case.

With FEATURE_USE_GETLINE:

function                                             old     new   delta
xmalloc_fgetline_n                                     -     173    +173
xmalloc_fgetline                                      85      58     -27
cut_main                                            1406    1367     -39
------------------------------------------------------------------------------
(add/remove: 1/0 grow/shrink: 0/2 up/down: 173/-66)           Total: 107 bytes

Without FEATURE_USE_GETLINE:

function                                             old     new   delta
xmalloc_fgetline_n                                     -      41     +41
xmalloc_fgetline                                      85      58     -27
cut_main                                            1406    1367     -39
------------------------------------------------------------------------------
(add/remove: 1/0 grow/shrink: 0/2 up/down: 41/-66)            Total: -25 bytes

Fixes: https://bugs.busybox.net/show_bug.cgi?id=15276
Signed-off-by: Elvira Khabirova <lineprinter0 at gmail.com>
---
 coreutils/cut.c            |  4 ++--
 include/libbb.h            |  2 ++
 libbb/Config.src           | 11 +++++++++++
 libbb/get_line_from_file.c | 37 ++++++++++++++++++++++++++++++++-----
 testsuite/cut.tests        |  2 ++
 5 files changed, 49 insertions(+), 7 deletions(-)

diff --git a/coreutils/cut.c b/coreutils/cut.c
index 55bdd9386..7c87467ca 100644
--- a/coreutils/cut.c
+++ b/coreutils/cut.c
@@ -89,6 +89,7 @@ static void cut_file(FILE *file, const char *delim, const char *odelim,
 		const struct cut_list *cut_lists, unsigned nlists)
 {
 	char *line;
+	size_t linelen = 0;
 	unsigned linenum = 0;	/* keep these zero-based to be consistent */
 	regex_t reg;
 	int spos, shoe = option_mask32 & CUT_OPT_REGEX_FLGS;
@@ -96,10 +97,9 @@ static void cut_file(FILE *file, const char *delim, const char *odelim,
 	if (shoe) xregcomp(&reg, delim, REG_EXTENDED);
 
 	/* go through every line in the file */
-	while ((line = xmalloc_fgetline(file)) != NULL) {
+	while ((line = xmalloc_fgetline_n(file, &linelen)) != NULL) {
 
 		/* set up a list so we can keep track of what's been printed */
-		int linelen = strlen(line);
 		char *printed = xzalloc(linelen + 1);
 		char *orig_line = line;
 		unsigned cl_pos = 0;
diff --git a/include/libbb.h b/include/libbb.h
index 6191debb1..73d16647a 100644
--- a/include/libbb.h
+++ b/include/libbb.h
@@ -1048,6 +1048,8 @@ extern char *xmalloc_fgetline_str(FILE *file, const char *terminating_string) FA
 extern char *xmalloc_fgets(FILE *file) FAST_FUNC RETURNS_MALLOC;
 /* Chops off '\n' from the end, unlike fgets: */
 extern char *xmalloc_fgetline(FILE *file) FAST_FUNC RETURNS_MALLOC;
+/* Same as xmalloc_fgetline but returns number of bytes read: */
+extern char* xmalloc_fgetline_n(FILE *file, size_t *n) FAST_FUNC RETURNS_MALLOC;
 /* Same, but doesn't try to conserve space (may have some slack after the end) */
 /* extern char *xmalloc_fgetline_fast(FILE *file) FAST_FUNC RETURNS_MALLOC; */
 
diff --git a/libbb/Config.src b/libbb/Config.src
index b980f19a9..7a37929d6 100644
--- a/libbb/Config.src
+++ b/libbb/Config.src
@@ -147,6 +147,17 @@ config MONOTONIC_SYSCALL
 	will be used instead (which gives wrong results if date/time
 	is reset).
 
+config FEATURE_USE_GETLINE
+	bool "Use getline library function"
+	default y
+	help
+	When enabled, busybox will use the libc getline() function
+	instead of getc loops to read data from files.
+	Using getline provides better behavior when utilities
+	encounter NUL bytes in their inputs.
+	getline() was originally a GNU extension, but now is a part
+	of POSIX.1-2008 and is implemented in contemporary libcs.
+
 config IOCTL_HEX2STR_ERROR
 	bool "Use ioctl names rather than hex values in error messages"
 	default y
diff --git a/libbb/get_line_from_file.c b/libbb/get_line_from_file.c
index 903ff1fb6..096bbd66f 100644
--- a/libbb/get_line_from_file.c
+++ b/libbb/get_line_from_file.c
@@ -12,9 +12,9 @@
 
 char* FAST_FUNC bb_get_chunk_from_file(FILE *file, size_t *end)
 {
-	int ch;
-	size_t idx = 0;
 	char *linebuf = NULL;
+	size_t idx = 0;
+	int ch;
 
 	while ((ch = getc(file)) != EOF) {
 		/* grow the line buffer as necessary */
@@ -55,10 +55,37 @@ char* FAST_FUNC xmalloc_fgets(FILE *file)
 char* FAST_FUNC xmalloc_fgetline(FILE *file)
 {
 	size_t i;
-	char *c = bb_get_chunk_from_file(file, &i);
 
-	if (i && c[--i] == '\n')
-		c[i] = '\0';
+	return xmalloc_fgetline_n(file, &i);
+}
+
+/* Get line.  Remove trailing \n.  Return the number of bytes read. */
+char* FAST_FUNC xmalloc_fgetline_n(FILE *file, size_t *n)
+{
+	char *c = NULL;
+#if ENABLE_FEATURE_USE_GETLINE
+	size_t idx = 0;
+	ssize_t nread;
+
+	nread = getline(&c, &idx, file);
+	if (nread < 0) {
+		if (errno == ENOMEM)
+			bb_die_memory_exhausted();
+		free(c);
+		c = NULL;
+		*n = 0;
+	} else {
+		*n = nread;
+	}
+#else
+	c = bb_get_chunk_from_file(file, n);
+#endif
+
+	if (*n) {
+		if (c[*n - 1] == '\n') {
+			c[--*n] = '\0';
+		}
+	}
 
 	return c;
 }
diff --git a/testsuite/cut.tests b/testsuite/cut.tests
index 2458c019c..0fd731688 100755
--- a/testsuite/cut.tests
+++ b/testsuite/cut.tests
@@ -81,4 +81,6 @@ SKIP=
 testing "cut empty field" "cut -d ':' -f 1-3" "a::b\n" "" "a::b\n"
 testing "cut empty field 2" "cut -d ':' -f 3-5" "b::c\n" "" "a::b::c:d\n"
 
+testing "cut with embedded NUL" "cut -c 1-3" "\x00\x01\x00\n" "" "\x00\x01\x00\x03\x04\x05\x06"
+
 exit $FAILCOUNT
-- 
2.34.1



More information about the busybox mailing list