[PATCH] libbb: in xmalloc_fgetline, use getline if enabled
Elvira Khabirova
lineprinter0 at gmail.com
Sun May 28 17:01:52 UTC 2023
When xmalloc_fgetline was introduced, getline(3) was a GNU extension
and was not necessarily implemented in libcs. Since then,
it's become a part of POSIX.1-2008 and is now implemented in
glibc, uClibc-ng, and musl.
Previously, xmalloc_fgetline directly called bb_get_chunk_from_file.
The issue with that approach is that bb_get_chunk_from_file stops
both at \n and at \0. This introduces unexpected behavior when tools
are presented with inputs containing \0. For example, in a comparison
of GNU core utils cut with busybox cut:
% echo -ne '\x00\x01\x00\x03\x04\x05\x06' | cut -b1-3 | hexdump -C
00000000 00 01 00 0a |....|
00000004
% echo -ne '\x00\x01\x00\x03\x04\x05\x06' | ./busybox cut -b1-3 | hexdump -C
00000000 0a 01 0a 03 04 05 0a |.......|
00000007
Here, due to bb_get_chunk_from_file splitting lines by \n and \0,
busybox cut interprets the input as three lines instead of one.
Also, since xmalloc_fgetline never returned strings with embedded \0,
cut_file expects strlen of the returned string to match the string's
total length.
To fix the behavior of the cut utility, introduce xmalloc_fgetline_n,
that fully matches the behavior of xmalloc_fgetline,
but also returns the amount of bytes read.
Add a configuration option, FEATURE_USE_GETLINE, and enable it
by default. Use getline in xmalloc_fgetline_n if the feature is enabled.
Change the behavior of cut_file to use the values returned
by the new function instead of calling strlen.
Call xmalloc_fgetline_n from xmalloc_fgetline.
Add a test for the previously mentioned case.
With FEATURE_USE_GETLINE:
function old new delta
xmalloc_fgetline_n - 173 +173
xmalloc_fgetline 85 58 -27
cut_main 1406 1367 -39
------------------------------------------------------------------------------
(add/remove: 1/0 grow/shrink: 0/2 up/down: 173/-66) Total: 107 bytes
Without FEATURE_USE_GETLINE:
function old new delta
xmalloc_fgetline_n - 41 +41
xmalloc_fgetline 85 58 -27
cut_main 1406 1367 -39
------------------------------------------------------------------------------
(add/remove: 1/0 grow/shrink: 0/2 up/down: 41/-66) Total: -25 bytes
Fixes: https://bugs.busybox.net/show_bug.cgi?id=15276
Signed-off-by: Elvira Khabirova <lineprinter0 at gmail.com>
---
coreutils/cut.c | 4 ++--
include/libbb.h | 2 ++
libbb/Config.src | 11 +++++++++++
libbb/get_line_from_file.c | 37 ++++++++++++++++++++++++++++++++-----
testsuite/cut.tests | 2 ++
5 files changed, 49 insertions(+), 7 deletions(-)
diff --git a/coreutils/cut.c b/coreutils/cut.c
index 55bdd9386..7c87467ca 100644
--- a/coreutils/cut.c
+++ b/coreutils/cut.c
@@ -89,6 +89,7 @@ static void cut_file(FILE *file, const char *delim, const char *odelim,
const struct cut_list *cut_lists, unsigned nlists)
{
char *line;
+ size_t linelen = 0;
unsigned linenum = 0; /* keep these zero-based to be consistent */
regex_t reg;
int spos, shoe = option_mask32 & CUT_OPT_REGEX_FLGS;
@@ -96,10 +97,9 @@ static void cut_file(FILE *file, const char *delim, const char *odelim,
if (shoe) xregcomp(®, delim, REG_EXTENDED);
/* go through every line in the file */
- while ((line = xmalloc_fgetline(file)) != NULL) {
+ while ((line = xmalloc_fgetline_n(file, &linelen)) != NULL) {
/* set up a list so we can keep track of what's been printed */
- int linelen = strlen(line);
char *printed = xzalloc(linelen + 1);
char *orig_line = line;
unsigned cl_pos = 0;
diff --git a/include/libbb.h b/include/libbb.h
index 6191debb1..73d16647a 100644
--- a/include/libbb.h
+++ b/include/libbb.h
@@ -1048,6 +1048,8 @@ extern char *xmalloc_fgetline_str(FILE *file, const char *terminating_string) FA
extern char *xmalloc_fgets(FILE *file) FAST_FUNC RETURNS_MALLOC;
/* Chops off '\n' from the end, unlike fgets: */
extern char *xmalloc_fgetline(FILE *file) FAST_FUNC RETURNS_MALLOC;
+/* Same as xmalloc_fgetline but returns number of bytes read: */
+extern char* xmalloc_fgetline_n(FILE *file, size_t *n) FAST_FUNC RETURNS_MALLOC;
/* Same, but doesn't try to conserve space (may have some slack after the end) */
/* extern char *xmalloc_fgetline_fast(FILE *file) FAST_FUNC RETURNS_MALLOC; */
diff --git a/libbb/Config.src b/libbb/Config.src
index b980f19a9..7a37929d6 100644
--- a/libbb/Config.src
+++ b/libbb/Config.src
@@ -147,6 +147,17 @@ config MONOTONIC_SYSCALL
will be used instead (which gives wrong results if date/time
is reset).
+config FEATURE_USE_GETLINE
+ bool "Use getline library function"
+ default y
+ help
+ When enabled, busybox will use the libc getline() function
+ instead of getc loops to read data from files.
+ Using getline provides better behavior when utilities
+ encounter NUL bytes in their inputs.
+ getline() was originally a GNU extension, but now is a part
+ of POSIX.1-2008 and is implemented in contemporary libcs.
+
config IOCTL_HEX2STR_ERROR
bool "Use ioctl names rather than hex values in error messages"
default y
diff --git a/libbb/get_line_from_file.c b/libbb/get_line_from_file.c
index 903ff1fb6..096bbd66f 100644
--- a/libbb/get_line_from_file.c
+++ b/libbb/get_line_from_file.c
@@ -12,9 +12,9 @@
char* FAST_FUNC bb_get_chunk_from_file(FILE *file, size_t *end)
{
- int ch;
- size_t idx = 0;
char *linebuf = NULL;
+ size_t idx = 0;
+ int ch;
while ((ch = getc(file)) != EOF) {
/* grow the line buffer as necessary */
@@ -55,10 +55,37 @@ char* FAST_FUNC xmalloc_fgets(FILE *file)
char* FAST_FUNC xmalloc_fgetline(FILE *file)
{
size_t i;
- char *c = bb_get_chunk_from_file(file, &i);
- if (i && c[--i] == '\n')
- c[i] = '\0';
+ return xmalloc_fgetline_n(file, &i);
+}
+
+/* Get line. Remove trailing \n. Return the number of bytes read. */
+char* FAST_FUNC xmalloc_fgetline_n(FILE *file, size_t *n)
+{
+ char *c = NULL;
+#if ENABLE_FEATURE_USE_GETLINE
+ size_t idx = 0;
+ ssize_t nread;
+
+ nread = getline(&c, &idx, file);
+ if (nread < 0) {
+ if (errno == ENOMEM)
+ bb_die_memory_exhausted();
+ free(c);
+ c = NULL;
+ *n = 0;
+ } else {
+ *n = nread;
+ }
+#else
+ c = bb_get_chunk_from_file(file, n);
+#endif
+
+ if (*n) {
+ if (c[*n - 1] == '\n') {
+ c[--*n] = '\0';
+ }
+ }
return c;
}
diff --git a/testsuite/cut.tests b/testsuite/cut.tests
index 2458c019c..0fd731688 100755
--- a/testsuite/cut.tests
+++ b/testsuite/cut.tests
@@ -81,4 +81,6 @@ SKIP=
testing "cut empty field" "cut -d ':' -f 1-3" "a::b\n" "" "a::b\n"
testing "cut empty field 2" "cut -d ':' -f 3-5" "b::c\n" "" "a::b::c:d\n"
+testing "cut with embedded NUL" "cut -c 1-3" "\x00\x01\x00\n" "" "\x00\x01\x00\x03\x04\x05\x06"
+
exit $FAILCOUNT
--
2.34.1
More information about the busybox
mailing list