[git commit] awk: fix a SEGV

Denys Vlasenko vda.linux at googlemail.com
Sat Sep 10 22:30:56 UTC 2011


commit: http://git.busybox.net/busybox/commit/?id=7b46d11582047d0dd21b547ff4a913defe646d40
branch: http://git.busybox.net/busybox/commit/?id=refs/heads/master

Signed-off-by: Denys Vlasenko <vda.linux at googlemail.com>
---
 editors/awk.c       |   71 ++++++++++++++++++++++++++++++++++++++++++++------
 testsuite/awk.tests |    3 ++
 2 files changed, 65 insertions(+), 9 deletions(-)

diff --git a/editors/awk.c b/editors/awk.c
index 8117cab..71abca2 100644
--- a/editors/awk.c
+++ b/editors/awk.c
@@ -25,6 +25,7 @@
  * to perform debug printfs to stderr: */
 #define debug_printf_walker(...)  do {} while (0)
 #define debug_printf_eval(...)  do {} while (0)
+#define debug_printf_parse(...)  do {} while (0)
 
 #ifndef debug_printf_walker
 # define debug_printf_walker(...) (fprintf(stderr, __VA_ARGS__))
@@ -32,6 +33,9 @@
 #ifndef debug_printf_eval
 # define debug_printf_eval(...) (fprintf(stderr, __VA_ARGS__))
 #endif
+#ifndef debug_printf_parse
+# define debug_printf_parse(...) (fprintf(stderr, __VA_ARGS__))
+#endif
 
 
 
@@ -435,13 +439,13 @@ struct globals {
 	smallint nextrec;
 	smallint nextfile;
 	smallint is_f0_split;
+	smallint t_rollback;
 };
 struct globals2 {
 	uint32_t t_info; /* often used */
 	uint32_t t_tclass;
 	char *t_string;
 	int t_lineno;
-	int t_rollback;
 
 	var *intvar[NUM_INTERNAL_VARS]; /* often used */
 
@@ -499,11 +503,11 @@ struct globals2 {
 #define nextrec      (G1.nextrec     )
 #define nextfile     (G1.nextfile    )
 #define is_f0_split  (G1.is_f0_split )
+#define t_rollback   (G1.t_rollback  )
 #define t_info       (G.t_info      )
 #define t_tclass     (G.t_tclass    )
 #define t_string     (G.t_string    )
 #define t_lineno     (G.t_lineno    )
-#define t_rollback   (G.t_rollback  )
 #define intvar       (G.intvar      )
 #define fsplitter    (G.fsplitter   )
 #define rsplitter    (G.rsplitter   )
@@ -1011,6 +1015,7 @@ static uint32_t next_token(uint32_t expected)
 
 		if (*p == '\0') {
 			tc = TC_EOF;
+			debug_printf_parse("%s: token found: TC_EOF\n", __func__);
 
 		} else if (*p == '\"') {
 			/* it's a string */
@@ -1026,6 +1031,7 @@ static uint32_t next_token(uint32_t expected)
 			p++;
 			*s = '\0';
 			tc = TC_STRING;
+			debug_printf_parse("%s: token found:'%s' TC_STRING\n", __func__, t_string);
 
 		} else if ((expected & TC_REGEXP) && *p == '/') {
 			/* it's regexp */
@@ -1048,6 +1054,7 @@ static uint32_t next_token(uint32_t expected)
 			p++;
 			*s = '\0';
 			tc = TC_REGEXP;
+			debug_printf_parse("%s: token found:'%s' TC_REGEXP\n", __func__, t_string);
 
 		} else if (*p == '.' || isdigit(*p)) {
 			/* it's a number */
@@ -1057,6 +1064,7 @@ static uint32_t next_token(uint32_t expected)
 			if (*p == '.')
 				syntax_error(EMSG_UNEXP_TOKEN);
 			tc = TC_NUMBER;
+			debug_printf_parse("%s: token found:%f TC_NUMBER\n", __func__, t_double);
 
 		} else {
 			/* search for something known */
@@ -1079,6 +1087,7 @@ static uint32_t next_token(uint32_t expected)
 				) {
 					/* then this is what we are looking for */
 					t_info = *ti;
+					debug_printf_parse("%s: token found:'%.*s' t_info:%x\n", __func__, l, p, t_info);
 					p += l;
 					goto token_found;
 				}
@@ -1102,14 +1111,17 @@ static uint32_t next_token(uint32_t expected)
 				p = skip_spaces(p);
 			if (*p == '(') {
 				tc = TC_FUNCTION;
+				debug_printf_parse("%s: token found:'%s' TC_FUNCTION\n", __func__, t_string);
 			} else {
 				if (*p == '[') {
 					p++;
 					tc = TC_ARRAY;
-				}
+					debug_printf_parse("%s: token found:'%s' TC_ARRAY\n", __func__, t_string);
+				} else
+					debug_printf_parse("%s: token found:'%s' TC_VARIABLE\n", __func__, t_string);
 			}
- token_found: ;
 		}
+ token_found:
 		g_pos = p;
 
 		/* skipping newlines in some cases */
@@ -1181,6 +1193,8 @@ static node *parse_expr(uint32_t iexp)
 	uint32_t tc, xtc;
 	var *v;
 
+	debug_printf_parse("%s(%x)\n", __func__, iexp);
+
 	sn.info = PRIMASK;
 	sn.r.n = glptr = NULL;
 	xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
@@ -1189,12 +1203,14 @@ static node *parse_expr(uint32_t iexp)
 
 		if (glptr && (t_info == (OC_COMPARE | VV | P(39) | 2))) {
 			/* input redirection (<) attached to glptr node */
+			debug_printf_parse("%s: input redir\n", __func__);
 			cn = glptr->l.n = new_node(OC_CONCAT | SS | P(37));
 			cn->a.n = glptr;
 			xtc = TC_OPERAND | TC_UOPPRE;
 			glptr = NULL;
 
 		} else if (tc & (TC_BINOP | TC_UOPPOST)) {
+			debug_printf_parse("%s: TC_BINOP | TC_UOPPOST\n", __func__);
 			/* for binary and postfix-unary operators, jump back over
 			 * previous operators with higher priority */
 			vn = cn;
@@ -1224,6 +1240,7 @@ static node *parse_expr(uint32_t iexp)
 			vn->a.n = cn;
 
 		} else {
+			debug_printf_parse("%s: other\n", __func__);
 			/* for operands and prefix-unary operators, attach them
 			 * to last node */
 			vn = cn;
@@ -1231,12 +1248,14 @@ static node *parse_expr(uint32_t iexp)
 			cn->a.n = vn;
 			xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
 			if (tc & (TC_OPERAND | TC_REGEXP)) {
+				debug_printf_parse("%s: TC_OPERAND | TC_REGEXP\n", __func__);
 				xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp;
 				/* one should be very careful with switch on tclass -
 				 * only simple tclasses should be used! */
 				switch (tc) {
 				case TC_VARIABLE:
 				case TC_ARRAY:
+					debug_printf_parse("%s: TC_VARIABLE | TC_ARRAY\n", __func__);
 					cn->info = OC_VAR;
 					v = hash_search(ahash, t_string);
 					if (v != NULL) {
@@ -1253,6 +1272,7 @@ static node *parse_expr(uint32_t iexp)
 
 				case TC_NUMBER:
 				case TC_STRING:
+					debug_printf_parse("%s: TC_NUMBER | TC_STRING\n", __func__);
 					cn->info = OC_VAR;
 					v = cn->l.v = xzalloc(sizeof(var));
 					if (tc & TC_NUMBER)
@@ -1262,32 +1282,41 @@ static node *parse_expr(uint32_t iexp)
 					break;
 
 				case TC_REGEXP:
+					debug_printf_parse("%s: TC_REGEXP\n", __func__);
 					mk_re_node(t_string, cn, xzalloc(sizeof(regex_t)*2));
 					break;
 
 				case TC_FUNCTION:
+					debug_printf_parse("%s: TC_FUNCTION\n", __func__);
 					cn->info = OC_FUNC;
 					cn->r.f = newfunc(t_string);
 					cn->l.n = condition();
 					break;
 
 				case TC_SEQSTART:
+					debug_printf_parse("%s: TC_SEQSTART\n", __func__);
 					cn = vn->r.n = parse_expr(TC_SEQTERM);
+					if (!cn)
+						syntax_error("Empty sequence");
 					cn->a.n = vn;
 					break;
 
 				case TC_GETLINE:
+					debug_printf_parse("%s: TC_GETLINE\n", __func__);
 					glptr = cn;
 					xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
 					break;
 
 				case TC_BUILTIN:
+					debug_printf_parse("%s: TC_BUILTIN\n", __func__);
 					cn->l.n = condition();
 					break;
 				}
 			}
 		}
 	}
+
+	debug_printf_parse("%s() returns %p\n", __func__, sn.r.n);
 	return sn.r.n;
 }
 
@@ -1356,18 +1385,25 @@ static void chain_group(void)
 	} while (c & TC_NEWLINE);
 
 	if (c & TC_GRPSTART) {
+		debug_printf_parse("%s: TC_GRPSTART\n", __func__);
 		while (next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
+			debug_printf_parse("%s: !TC_GRPTERM\n", __func__);
 			if (t_tclass & TC_NEWLINE)
 				continue;
 			rollback_token();
 			chain_group();
 		}
+		debug_printf_parse("%s: TC_GRPTERM\n", __func__);
 	} else if (c & (TC_OPSEQ | TC_OPTERM)) {
+		debug_printf_parse("%s: TC_OPSEQ | TC_OPTERM\n", __func__);
 		rollback_token();
 		chain_expr(OC_EXEC | Vx);
-	} else {						/* TC_STATEMNT */
+	} else {
+		/* TC_STATEMNT */
+		debug_printf_parse("%s: TC_STATEMNT(?)\n", __func__);
 		switch (t_info & OPCLSMASK) {
 		case ST_IF:
+			debug_printf_parse("%s: ST_IF\n", __func__);
 			n = chain_node(OC_BR | Vx);
 			n->l.n = condition();
 			chain_group();
@@ -1382,12 +1418,14 @@ static void chain_group(void)
 			break;
 
 		case ST_WHILE:
+			debug_printf_parse("%s: ST_WHILE\n", __func__);
 			n2 = condition();
 			n = chain_loop(NULL);
 			n->l.n = n2;
 			break;
 
 		case ST_DO:
+			debug_printf_parse("%s: ST_DO\n", __func__);
 			n2 = chain_node(OC_EXEC);
 			n = chain_loop(NULL);
 			n2->a.n = n->a.n;
@@ -1396,6 +1434,7 @@ static void chain_group(void)
 			break;
 
 		case ST_FOR:
+			debug_printf_parse("%s: ST_FOR\n", __func__);
 			next_token(TC_SEQSTART);
 			n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
 			if (t_tclass & TC_SEQTERM) {	/* for-in */
@@ -1421,6 +1460,7 @@ static void chain_group(void)
 
 		case OC_PRINT:
 		case OC_PRINTF:
+			debug_printf_parse("%s: OC_PRINT[F]\n", __func__);
 			n = chain_node(t_info);
 			n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
 			if (t_tclass & TC_OUTRDR) {
@@ -1432,17 +1472,20 @@ static void chain_group(void)
 			break;
 
 		case OC_BREAK:
+			debug_printf_parse("%s: OC_BREAK\n", __func__);
 			n = chain_node(OC_EXEC);
 			n->a.n = break_ptr;
 			break;
 
 		case OC_CONTINUE:
+			debug_printf_parse("%s: OC_CONTINUE\n", __func__);
 			n = chain_node(OC_EXEC);
 			n->a.n = continue_ptr;
 			break;
 
 		/* delete, next, nextfile, return, exit */
 		default:
+			debug_printf_parse("%s: default\n", __func__);
 			chain_expr(t_info);
 		}
 	}
@@ -1460,19 +1503,24 @@ static void parse_program(char *p)
 	while ((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
 			TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
 
-		if (tclass & TC_OPTERM)
+		if (tclass & TC_OPTERM) {
+			debug_printf_parse("%s: TC_OPTERM\n", __func__);
 			continue;
+		}
 
 		seq = &mainseq;
 		if (tclass & TC_BEGIN) {
+			debug_printf_parse("%s: TC_BEGIN\n", __func__);
 			seq = &beginseq;
 			chain_group();
 
 		} else if (tclass & TC_END) {
+			debug_printf_parse("%s: TC_END\n", __func__);
 			seq = &endseq;
 			chain_group();
 
 		} else if (tclass & TC_FUNCDECL) {
+			debug_printf_parse("%s: TC_FUNCDECL\n", __func__);
 			next_token(TC_FUNCTION);
 			g_pos++;
 			f = newfunc(t_string);
@@ -1490,22 +1538,27 @@ static void parse_program(char *p)
 			clear_array(ahash);
 
 		} else if (tclass & TC_OPSEQ) {
+			debug_printf_parse("%s: TC_OPSEQ\n", __func__);
 			rollback_token();
 			cn = chain_node(OC_TEST);
 			cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
 			if (t_tclass & TC_GRPSTART) {
+				debug_printf_parse("%s: TC_GRPSTART\n", __func__);
 				rollback_token();
 				chain_group();
 			} else {
+				debug_printf_parse("%s: !TC_GRPSTART\n", __func__);
 				chain_node(OC_PRINT);
 			}
 			cn->r.n = mainseq.last;
 
 		} else /* if (tclass & TC_GRPSTART) */ {
+			debug_printf_parse("%s: TC_GRPSTART(?)\n", __func__);
 			rollback_token();
 			chain_group();
 		}
 	}
+	debug_printf_parse("%s: TC_EOF\n", __func__);
 }
 
 
@@ -3000,7 +3053,7 @@ int awk_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
 int awk_main(int argc, char **argv)
 {
 	unsigned opt;
-	char *opt_F, *opt_W;
+	char *opt_F;
 	llist_t *list_v = NULL;
 	llist_t *list_f = NULL;
 	int i, j;
@@ -3062,7 +3115,7 @@ int awk_main(int argc, char **argv)
 		}
 	}
 	opt_complementary = "v::f::"; /* -v and -f can occur multiple times */
-	opt = getopt32(argv, "F:v:f:W:", &opt_F, &list_v, &list_f, &opt_W);
+	opt = getopt32(argv, "F:v:f:W:", &opt_F, &list_v, &list_f, NULL);
 	argv += optind;
 	argc -= optind;
 	if (opt & 0x1)
@@ -3096,7 +3149,7 @@ int awk_main(int argc, char **argv)
 		parse_program(*argv++);
 	}
 	if (opt & 0x8) // -W
-		bb_error_msg("warning: unrecognized option '-W %s' ignored", opt_W);
+		bb_error_msg("warning: option -W is ignored");
 
 	/* fill in ARGV array */
 	setvar_i(intvar[ARGC], argc);
diff --git a/testsuite/awk.tests b/testsuite/awk.tests
index 0afe9b9..5a32304 100755
--- a/testsuite/awk.tests
+++ b/testsuite/awk.tests
@@ -202,4 +202,7 @@ end d
 " \
 	"" ""
 
+testing "awk handles empty ()" \
+	"awk 'BEGIN {print()}' 2>&1" "awk: cmd. line:1: Empty sequence\n" "" ""
+
 exit $FAILCOUNT
-- 
1.7.3.4



More information about the busybox-cvs mailing list