[git commit master 1/1] awk: fix breakage in last commit

Denys Vlasenko vda.linux at googlemail.com
Sat Oct 23 23:58:04 UTC 2010


commit: http://git.busybox.net/busybox/commit/?id=2b299fed6a77d3aaf7e4e768fb519f2536c2eff0
branch: http://git.busybox.net/busybox/commit/?id=refs/heads/master

While at it, made bb_process_escape_sequence faster (same size)

function                                             old     new   delta
nextchar                                              49      53      +4
bb_process_escape_sequence                           138     140      +2
next_token                                           838     839      +1
static.charmap                                        20      18      -2
is_assignment                                        143     135      -8
------------------------------------------------------------------------------
(add/remove: 0/0 grow/shrink: 3/2 up/down: 7/-10)              Total: -3 bytes

Signed-off-by: Denys Vlasenko <vda.linux at googlemail.com>
---
 editors/awk.c                   |   27 ++++++++++++----------
 libbb/process_escape_sequence.c |   46 +++++++++++++++++++-------------------
 2 files changed, 38 insertions(+), 35 deletions(-)

diff --git a/editors/awk.c b/editors/awk.c
index fb3bf6b..9646ced 100644
--- a/editors/awk.c
+++ b/editors/awk.c
@@ -684,8 +684,11 @@ static char nextchar(char **s)
 	pps = *s;
 	if (c == '\\')
 		c = bb_process_escape_sequence((const char**)s);
-	if (c == '\\' && *s == pps)
-		c = *(*s)++;
+	if (c == '\\' && *s == pps) { /* unrecognized \z? */
+		c = *(*s); /* yes, fetch z */
+		if (c)
+			(*s)++; /* advance unless z = NUL */
+	}
 	return c;
 }
 
@@ -1007,9 +1010,10 @@ static uint32_t next_token(uint32_t expected)
 			/* it's a string */
 			t_string = s = ++p;
 			while (*p != '\"') {
-				char *pp = p;
+				char *pp;
 				if (*p == '\0' || *p == '\n')
 					syntax_error(EMSG_UNEXP_EOS);
+				pp = p;
 				*s++ = nextchar(&pp);
 				p = pp;
 			}
@@ -2926,22 +2930,21 @@ static int awk_exit(int r)
  * otherwise return 0 */
 static int is_assignment(const char *expr)
 {
-	char *exprc, *s, *s0, *s1;
+	char *exprc, *val, *s, *s1;
 
-	if (!isalnum_(*expr) || (s0 = strchr(expr, '=')) == NULL) {
+	if (!isalnum_(*expr) || (val = strchr(expr, '=')) == NULL) {
 		return FALSE;
 	}
 
 	exprc = xstrdup(expr);
-	s0 = exprc + (s0 - expr);
-	*s++ = '\0';
+	val = exprc + (val - expr);
+	*val++ = '\0';
 
-	s = s1 = s0;
-	while (*s)
-		*s1++ = nextchar(&s);
-	*s1 = '\0';
+	s = s1 = val;
+	while ((*s1 = nextchar(&s)) != '\0')
+		s1++;
 
-	setvar_u(newvar(exprc), s0);
+	setvar_u(newvar(exprc), val);
 	free(exprc);
 	return TRUE;
 }
diff --git a/libbb/process_escape_sequence.c b/libbb/process_escape_sequence.c
index dd6e076..7b1d97f 100644
--- a/libbb/process_escape_sequence.c
+++ b/libbb/process_escape_sequence.c
@@ -18,18 +18,8 @@
 
 char FAST_FUNC bb_process_escape_sequence(const char **ptr)
 {
-	/* bash builtin "echo -e '\ec'" interprets \e as ESC,
-	 * but coreutils "/bin/echo -e '\ec'" does not.
-	 * manpages tend to support coreutils way.
-	 * Update: coreutils added support for \e on 28 Oct 2009. */
-	static const char charmap[] ALIGN1 = {
-		'a',  'b', 'e', 'f',  'n',  'r',  't',  'v',  '\\', 0,
-		'\a', '\b', 27, '\f', '\n', '\r', '\t', '\v', '\\', '\\' };
-
-	const char *p;
 	const char *q;
 	unsigned num_digits;
-	unsigned r;
 	unsigned n;
 	unsigned base;
 
@@ -37,18 +27,17 @@ char FAST_FUNC bb_process_escape_sequence(const char **ptr)
 	base = 8;
 	q = *ptr;
 
-#if WANT_HEX_ESCAPES
-	if (*q == 'x') {
+	if (WANT_HEX_ESCAPES && *q == 'x') {
 		++q;
 		base = 16;
 		++num_digits;
 	}
-#endif
 
 	/* bash requires leading 0 in octal escapes:
 	 * \02 works, \2 does not (prints \ and 2).
 	 * We treat \2 as a valid octal escape sequence. */
 	do {
+		unsigned r;
 #if !WANT_HEX_ESCAPES
 		unsigned d = (unsigned char)(*q) - '0';
 #else
@@ -60,8 +49,9 @@ char FAST_FUNC bb_process_escape_sequence(const char **ptr)
 			if (WANT_HEX_ESCAPES && base == 16) {
 				--num_digits;
 				if (num_digits == 0) {
-					/* \x<bad_char> */
-					--q; /* go back to x */
+					/* \x<bad_char>: return '\',
+					 * leave ptr pointing to x */
+					return '\\';
 				}
 			}
 			break;
@@ -76,20 +66,30 @@ char FAST_FUNC bb_process_escape_sequence(const char **ptr)
 		++q;
 	} while (++num_digits < 3);
 
-	if (num_digits == 0) {	/* mnemonic escape sequence? */
-		p = charmap;
+	if (num_digits == 0) {
+		/* Not octal or hex escape sequence.
+		 * Is it one-letter one? */
+
+		/* bash builtin "echo -e '\ec'" interprets \e as ESC,
+		 * but coreutils "/bin/echo -e '\ec'" does not.
+		 * Manpages tend to support coreutils way.
+		 * Update: coreutils added support for \e on 28 Oct 2009. */
+		static const char charmap[] ALIGN1 = {
+			'a',  'b', 'e', 'f',  'n',  'r',  't',  'v',  '\\',
+			'\a', '\b', 27, '\f', '\n', '\r', '\t', '\v', '\\',
+		};
+		const char *p = charmap;
 		do {
 			if (*p == *q) {
 				q++;
 				break;
 			}
-		} while (*++p);
-		/* p points to found escape char or NUL,
+		} while (*++p != '\\');
+		/* p points to found escape char or '\',
 		 * advance it and find what it translates to.
-		 * Note that unrecognized sequence \z returns '\'
-		 * and leaves ptr pointing to z. */
-		p += sizeof(charmap) / 2;
-		n = *p;
+		 * Note that \NUL and unrecognized sequence \z return '\'
+		 * and leave ptr pointing to NUL or z. */
+		n = p[sizeof(charmap) / 2];
 	}
 
 	*ptr = q;
-- 
1.7.1



More information about the busybox-cvs mailing list