[git commit] cut: fix "-s" flag to omit blank lines

Denys Vlasenko vda.linux at googlemail.com
Tue Dec 10 00:27:04 UTC 2024


commit: https://git.busybox.net/busybox/commit/?id=50e2b59370542097eb0efd79cc8d3d39ee52bf82
branch: https://git.busybox.net/busybox/commit/?id=refs/heads/master

Using cut with the delimiter flag ("-d") with the "-s" flag to only
output lines containing the delimiter will print blank lines. This is
deviant behavior from cut provided by GNU Coreutils. Blank lines should
be omitted if "-s" is used with "-d".

This change introduces a somewhat naiive, yet efficient solution, where
line length is checked before looping though bytes. If line length is
zero and the "-s" flag is used, the code will jump to parsing the next
line to avoid printing a newline character.

function                                             old     new   delta
cut_main                                            1196    1185     -11

Signed-off-by: Colin McAllister <colinmca242 at gmail.com>
Signed-off-by: Denys Vlasenko <vda.linux at googlemail.com>
---
 coreutils/cut.c     | 20 +++++++++++++++-----
 testsuite/cut.tests |  9 +++++++++
 2 files changed, 24 insertions(+), 5 deletions(-)

diff --git a/coreutils/cut.c b/coreutils/cut.c
index d129f9b9d..b7fe11126 100644
--- a/coreutils/cut.c
+++ b/coreutils/cut.c
@@ -152,11 +152,18 @@ static void cut_file(FILE *file, const char *delim, const char *odelim,
 			unsigned uu = 0, start = 0, end = 0, out = 0;
 			int dcount = 0;
 
+			/* Blank line? Check -s (later check for -s does not catch empty lines) */
+			if (linelen == 0) {
+				if (option_mask32 & CUT_OPT_SUPPRESS_FLGS)
+					goto next_line;
+			}
+
 			/* Loop through bytes, finding next delimiter */
 			for (;;) {
 				/* End of current range? */
 				if (end == linelen || dcount > cut_lists[cl_pos].endpos) {
-					if (++cl_pos >= nlists) break;
+					if (++cl_pos >= nlists)
+						break;
 					if (option_mask32 & CUT_OPT_NOSORT_FLGS)
 						start = dcount = uu = 0;
 					end = 0;
@@ -175,15 +182,18 @@ static void cut_file(FILE *file, const char *delim, const char *odelim,
 					if (shoe) {
 						regmatch_t rr = {-1, -1};
 
-						if (!regexec(&reg, line+uu, 1, &rr, REG_NOTBOL|REG_NOTEOL)) {
+						if (!regexec(&reg, line + uu, 1, &rr, REG_NOTBOL|REG_NOTEOL)) {
 							end = uu + rr.rm_so;
 							uu += rr.rm_eo;
 						} else {
 							uu = linelen;
 							continue;
 						}
-					} else if (line[end = uu++] != *delim)
-						continue;
+					} else {
+						end = uu++;
+						if (line[end] != *delim)
+							continue;
+					}
 
 					/* Got delimiter. Loop if not yet within range. */
 					if (dcount++ < cut_lists[cl_pos].startpos) {
@@ -192,7 +202,7 @@ static void cut_file(FILE *file, const char *delim, const char *odelim,
 					}
 				}
 				if (end != start || !shoe)
-					printf("%s%.*s", out++ ? odelim : "", end-start, line + start);
+					printf("%s%.*s", out++ ? odelim : "", end - start, line + start);
 				start = uu;
 				if (!dcount)
 					break;
diff --git a/testsuite/cut.tests b/testsuite/cut.tests
index 2458c019c..0b401bc00 100755
--- a/testsuite/cut.tests
+++ b/testsuite/cut.tests
@@ -65,6 +65,15 @@ testing "cut with -d -f( ) -s" "cut -d' ' -f3 -s input && echo yes" "yes\n" "$in
 testing "cut with -d -f(a) -s" "cut -da -f3 -s input" "n\nsium:Jim\n\ncion:Ed\n" "$input" ""
 testing "cut with -d -f(a) -s -n" "cut -da -f3 -s -n input" "n\nsium:Jim\n\ncion:Ed\n" "$input" ""
 
+input="\
+
+foo bar baz
+
+bing bong boop
+
+"
+testing "cut with -d -s omits blank lines" "cut -d' ' -f2 -s input" "bar\nbong\n" "$input" ""
+
 # substitute for awk
 optional FEATURE_CUT_REGEX
 testing "cut -DF" "cut -DF 2,7,5" \


More information about the busybox-cvs mailing list