[git commit] awk: make -F STR interpret escape sequences. Closes 5126

Denys Vlasenko vda.linux at googlemail.com
Fri Jun 22 16:41:01 UTC 2012


commit: http://git.busybox.net/busybox/commit/?id=ea664dde87d8e52df7d4003d988352a66562a945
branch: http://git.busybox.net/busybox/commit/?id=refs/heads/master

Signed-off-by: Denys Vlasenko <vda.linux at googlemail.com>
---
 editors/awk.c       |   26 +++++++++++++++++++-------
 testsuite/awk.tests |    5 +++++
 2 files changed, 24 insertions(+), 7 deletions(-)

diff --git a/editors/awk.c b/editors/awk.c
index 71abca2..d69720d 100644
--- a/editors/awk.c
+++ b/editors/awk.c
@@ -696,6 +696,10 @@ static char nextchar(char **s)
 	pps = *s;
 	if (c == '\\')
 		c = bb_process_escape_sequence((const char**)s);
+	/* Example awk statement:
+	 * s = "abc\"def"
+	 * we must treat \" as "
+	 */
 	if (c == '\\' && *s == pps) { /* unrecognized \z? */
 		c = *(*s); /* yes, fetch z */
 		if (c)
@@ -704,6 +708,15 @@ static char nextchar(char **s)
 	return c;
 }
 
+/* TODO: merge with strcpy_and_process_escape_sequences()?
+ */
+static void unescape_string_in_place(char *s1)
+{
+	char *s = s1;
+	while ((*s1 = nextchar(&s)) != '\0')
+		s1++;
+}
+
 static ALWAYS_INLINE int isalnum_(int c)
 {
 	return (isalnum(c) || c == '_');
@@ -2992,7 +3005,7 @@ static int awk_exit(int r)
  * otherwise return 0 */
 static int is_assignment(const char *expr)
 {
-	char *exprc, *val, *s, *s1;
+	char *exprc, *val;
 
 	if (!isalnum_(*expr) || (val = strchr(expr, '=')) == NULL) {
 		return FALSE;
@@ -3002,10 +3015,7 @@ static int is_assignment(const char *expr)
 	val = exprc + (val - expr);
 	*val++ = '\0';
 
-	s = s1 = val;
-	while ((*s1 = nextchar(&s)) != '\0')
-		s1++;
-
+	unescape_string_in_place(val);
 	setvar_u(newvar(exprc), val);
 	free(exprc);
 	return TRUE;
@@ -3118,8 +3128,10 @@ int awk_main(int argc, char **argv)
 	opt = getopt32(argv, "F:v:f:W:", &opt_F, &list_v, &list_f, NULL);
 	argv += optind;
 	argc -= optind;
-	if (opt & 0x1)
-		setvar_s(intvar[FS], opt_F); // -F
+	if (opt & 0x1) { /* -F */
+		unescape_string_in_place(opt_F);
+		setvar_s(intvar[FS], opt_F);
+	}
 	while (list_v) { /* -v */
 		if (!is_assignment(llist_pop(&list_v)))
 			bb_show_usage();
diff --git a/testsuite/awk.tests b/testsuite/awk.tests
index e671907..d4c390d 100755
--- a/testsuite/awk.tests
+++ b/testsuite/awk.tests
@@ -43,6 +43,11 @@ testing "awk long field sep" "awk -F-- '{ print NF, length(\$NF), \$NF }'" \
 	"" \
 	"a--\na--b--\na--b--c--\na--b--c--d--"
 
+testing "awk -F handles escapes" "awk -F'\\x21' '{print \$1}'" \
+	"a\n" \
+	"" \
+	"a!b\n"
+
 # '@(samp|code|file)\{' is an invalid extended regex (unmatched '{'),
 # but gawk 3.1.5 does not bail out on it.
 testing "awk gsub falls back to non-extended-regex" \


More information about the busybox-cvs mailing list