[git commit] cut: fix "-s" flag to omit blank lines
Denys Vlasenko
vda.linux at googlemail.com
Tue Dec 10 00:27:04 UTC 2024
commit: https://git.busybox.net/busybox/commit/?id=50e2b59370542097eb0efd79cc8d3d39ee52bf82
branch: https://git.busybox.net/busybox/commit/?id=refs/heads/master
Using cut with the delimiter flag ("-d") with the "-s" flag to only
output lines containing the delimiter will print blank lines. This is
deviant behavior from cut provided by GNU Coreutils. Blank lines should
be omitted if "-s" is used with "-d".
This change introduces a somewhat naiive, yet efficient solution, where
line length is checked before looping though bytes. If line length is
zero and the "-s" flag is used, the code will jump to parsing the next
line to avoid printing a newline character.
function old new delta
cut_main 1196 1185 -11
Signed-off-by: Colin McAllister <colinmca242 at gmail.com>
Signed-off-by: Denys Vlasenko <vda.linux at googlemail.com>
---
coreutils/cut.c | 20 +++++++++++++++-----
testsuite/cut.tests | 9 +++++++++
2 files changed, 24 insertions(+), 5 deletions(-)
diff --git a/coreutils/cut.c b/coreutils/cut.c
index d129f9b9d..b7fe11126 100644
--- a/coreutils/cut.c
+++ b/coreutils/cut.c
@@ -152,11 +152,18 @@ static void cut_file(FILE *file, const char *delim, const char *odelim,
unsigned uu = 0, start = 0, end = 0, out = 0;
int dcount = 0;
+ /* Blank line? Check -s (later check for -s does not catch empty lines) */
+ if (linelen == 0) {
+ if (option_mask32 & CUT_OPT_SUPPRESS_FLGS)
+ goto next_line;
+ }
+
/* Loop through bytes, finding next delimiter */
for (;;) {
/* End of current range? */
if (end == linelen || dcount > cut_lists[cl_pos].endpos) {
- if (++cl_pos >= nlists) break;
+ if (++cl_pos >= nlists)
+ break;
if (option_mask32 & CUT_OPT_NOSORT_FLGS)
start = dcount = uu = 0;
end = 0;
@@ -175,15 +182,18 @@ static void cut_file(FILE *file, const char *delim, const char *odelim,
if (shoe) {
regmatch_t rr = {-1, -1};
- if (!regexec(®, line+uu, 1, &rr, REG_NOTBOL|REG_NOTEOL)) {
+ if (!regexec(®, line + uu, 1, &rr, REG_NOTBOL|REG_NOTEOL)) {
end = uu + rr.rm_so;
uu += rr.rm_eo;
} else {
uu = linelen;
continue;
}
- } else if (line[end = uu++] != *delim)
- continue;
+ } else {
+ end = uu++;
+ if (line[end] != *delim)
+ continue;
+ }
/* Got delimiter. Loop if not yet within range. */
if (dcount++ < cut_lists[cl_pos].startpos) {
@@ -192,7 +202,7 @@ static void cut_file(FILE *file, const char *delim, const char *odelim,
}
}
if (end != start || !shoe)
- printf("%s%.*s", out++ ? odelim : "", end-start, line + start);
+ printf("%s%.*s", out++ ? odelim : "", end - start, line + start);
start = uu;
if (!dcount)
break;
diff --git a/testsuite/cut.tests b/testsuite/cut.tests
index 2458c019c..0b401bc00 100755
--- a/testsuite/cut.tests
+++ b/testsuite/cut.tests
@@ -65,6 +65,15 @@ testing "cut with -d -f( ) -s" "cut -d' ' -f3 -s input && echo yes" "yes\n" "$in
testing "cut with -d -f(a) -s" "cut -da -f3 -s input" "n\nsium:Jim\n\ncion:Ed\n" "$input" ""
testing "cut with -d -f(a) -s -n" "cut -da -f3 -s -n input" "n\nsium:Jim\n\ncion:Ed\n" "$input" ""
+input="\
+
+foo bar baz
+
+bing bong boop
+
+"
+testing "cut with -d -s omits blank lines" "cut -d' ' -f2 -s input" "bar\nbong\n" "$input" ""
+
# substitute for awk
optional FEATURE_CUT_REGEX
testing "cut -DF" "cut -DF 2,7,5" \
More information about the busybox-cvs
mailing list