[git commit] cut: fix -d$'\n' --output-delimiter=@@ behavior

Denys Vlasenko vda.linux at googlemail.com
Tue Dec 10 20:01:52 UTC 2024


commit: https://git.busybox.net/busybox/commit/?id=9e364b16d1020cb7b8f8f4d4f3fe1833496b7a12
branch: https://git.busybox.net/busybox/commit/?id=refs/heads/master

function                                             old     new   delta
cut_main                                            1261    1353     +92
packed_usage                                       34925   34901     -24
------------------------------------------------------------------------------
(add/remove: 0/0 grow/shrink: 1/1 up/down: 92/-24)             Total: 68 bytes

Signed-off-by: Denys Vlasenko <vda.linux at googlemail.com>
---
 coreutils/cut.c     | 34 +++++++++++++++++++++++++++-------
 testsuite/cut.tests | 26 ++++++++++++++++++++++++++
 2 files changed, 53 insertions(+), 7 deletions(-)

diff --git a/coreutils/cut.c b/coreutils/cut.c
index 9f5b649d8..2d0a6237c 100644
--- a/coreutils/cut.c
+++ b/coreutils/cut.c
@@ -43,11 +43,19 @@
 //usage:     )
 //usage:     "\n	-s	Drop lines with no delimiter (else print them in full)"
 //usage:     "\n	-D	Don't sort/collate sections or match -f"IF_FEATURE_CUT_REGEX("F")" lines without delimeter"
+//usage:     IF_LONG_OPTS(
+//usage:     IF_FEATURE_CUT_REGEX(
+//usage:     "\n	--output-delimiter SEP Output field delimeter (default = -d for -f, one space for -F)"
+//usage:     ) IF_NOT_FEATURE_CUT_REGEX(
+//usage:     "\n	--output-delimiter SEP Output field delimeter (default = -d)"
+//usage:     )
+//usage:     ) IF_NOT_LONG_OPTS(
 //usage:     IF_FEATURE_CUT_REGEX(
 //usage:     "\n	-O SEP	Output field delimeter (default = -d for -f, one space for -F)"
 //usage:     ) IF_NOT_FEATURE_CUT_REGEX(
 //usage:     "\n	-O SEP	Output field delimeter (default = -d)"
 //usage:     )
+//usage:     )
 //TODO: --output-delimiter=SEP
 //usage:     "\n	-n	Ignored"
 //(manpage:-n	with -b: don't split multibyte characters)
@@ -96,6 +104,7 @@ static void cut_file(FILE *file, const char *delim, const char *odelim,
 {
 	char *line;
 	unsigned linenum = 0;	/* keep these zero-based to be consistent */
+	int first_print = 1;
 
 	/* go through every line in the file */
 	while ((line = xmalloc_fgetline(file)) != NULL) {
@@ -130,16 +139,16 @@ static void cut_file(FILE *file, const char *delim, const char *odelim,
 			free(printed);
 		/* Cut by lines */
 		} else if (!opt_REGEX && *delim == '\n') {
-			int spos = cut_list[cl_pos].startpos;
+			unsigned spos = cut_list[cl_pos].startpos;
 
 			/* get out if we have no more lists to process or if the lines
 			 * are lower than what we're interested in */
-			if (((int)linenum < spos) || (cl_pos >= nlists))
+			if ((linenum < spos) || (cl_pos >= nlists))
 				goto next_line;
 
 			/* if the line we're looking for is lower than the one we were
 			 * passed, it means we displayed it already, so move on */
-			while (spos < (int)linenum) {
+			while (spos < linenum) {
 				spos++;
 				/* go to the next list if we're at the end of this one */
 				if (spos > cut_list[cl_pos].endpos) {
@@ -150,20 +159,23 @@ static void cut_file(FILE *file, const char *delim, const char *odelim,
 					spos = cut_list[cl_pos].startpos;
 					/* get out if the current line is lower than the one
 					 * we just became interested in */
-					if ((int)linenum < spos)
+					if (linenum < spos)
 						goto next_line;
 				}
 			}
 
 			/* If we made it here, it means we've found the line we're
 			 * looking for, so print it */
-			puts(line);
+			if (first_print) {
+				first_print = 0;
+				fputs_stdout(line);
+			} else
+				printf("%s%s", odelim, line);
 			goto next_line;
 		/* Cut by fields */
 		} else {
 			unsigned next = 0, start = 0, end = 0;
 			int dcount = 0; /* we saw Nth delimiter (0 - didn't see any yet) */
-			int first_print = 1;
 
 			/* Blank line? Check -s (later check for -s does not catch empty lines) */
 			if (linelen == 0) {
@@ -173,6 +185,7 @@ static void cut_file(FILE *file, const char *delim, const char *odelim,
 
 			if (!odelim)
 				odelim = "\t";
+			first_print = 1;
 
 			/* Loop through bytes, finding next delimiter */
 			for (;;) {
@@ -233,7 +246,10 @@ static void cut_file(FILE *file, const char *delim, const char *odelim,
 						continue;
 					}
 				}
-				if (end != start || !opt_REGEX) {
+#if ENABLE_FEATURE_CUT_REGEX
+				if (end != start || !opt_REGEX)
+#endif
+				{
 					if (first_print) {
 						first_print = 0;
 						printf("%.*s", end - start, line + start);
@@ -251,6 +267,10 @@ static void cut_file(FILE *file, const char *delim, const char *odelim,
 		linenum++;
 		free(line);
 	} /* while (got line) */
+
+	/* For -d$'\n' --output-delimiter=^, the overall output is still terminated with \n, not ^ */
+	if (!opt_REGEX && *delim == '\n' && !first_print)
+		putchar('\n');
 }
 
 int cut_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
diff --git a/testsuite/cut.tests b/testsuite/cut.tests
index ba5f88d60..8da390cd7 100755
--- a/testsuite/cut.tests
+++ b/testsuite/cut.tests
@@ -116,4 +116,30 @@ testing "cut non-existing field" "cut -d ':' -f1,3" \
 	"1\n" \
 	"" "1:\n"
 
+# cut -d$'\n' has a special meaning: "select input lines".
+# I didn't find any documentation for this feature.
+testing "cut -dNEWLINE" \
+	"cut -d'
+' -f4,2,6-8" \
+	"2\n4\n6\n7\n" \
+	"" "1\n2\n3\n4\n5\n6\n7"
+
+testing "cut -dNEWLINE --output-delimiter" \
+	"cut -d'
+' -O@@ -f4,2,6-8" \
+	"2@@4@@6@@7\n" \
+	"" "1\n2\n3\n4\n5\n6\n7"
+
+testing "cut -dNEWLINE --output-delimiter 2" \
+	"cut -d'
+' -O@@ -f4,2,6-8" \
+	"2@@4@@6@@7\n" \
+	"" "1\n2\n3\n4\n5\n6\n7\n"
+
+testing "cut -dNEWLINE --output-delimiter EMPTY_INPUT" \
+	"cut -d'
+' -O@@ -f4,2,6-8" \
+	"" \
+	"" ""
+
 exit $FAILCOUNT


More information about the busybox-cvs mailing list