[git commit] awk: Fix overly permissive func arg list parsing

Denys Vlasenko vda.linux at googlemail.com
Mon Jan 21 11:59:19 UTC 2019


commit: https://git.busybox.net/busybox/commit/?id=1c42c18e9601ee1416d61663f5a91874954c524d
branch: https://git.busybox.net/busybox/commit/?id=refs/heads/master

It allows things like 'func f(a b)' and 'func f(a,)' which GNU awk forbids.

function                                             old     new   delta
parse_program                                        327     367     +40
chain_expr                                            40      67     +27
parse_expr                                           891     915     +24
EMSG_TOO_FEW_ARGS                                     30      18     -12
------------------------------------------------------------------------------
(add/remove: 0/0 grow/shrink: 3/1 up/down: 91/-12)             Total: 79 bytes

Signed-off-by: Brian Foley <bpfoley at google.com>
Signed-off-by: Denys Vlasenko <vda.linux at googlemail.com>
---
 editors/awk.c       | 15 ++++++++++++++-
 testsuite/awk.tests | 12 ++++++++++++
 2 files changed, 26 insertions(+), 1 deletion(-)

diff --git a/editors/awk.c b/editors/awk.c
index 90edec82c..d25508e5d 100644
--- a/editors/awk.c
+++ b/editors/awk.c
@@ -1613,12 +1613,25 @@ static void parse_program(char *p)
 			f = newfunc(t_string);
 			f->body.first = NULL;
 			f->nargs = 0;
-			while (next_token(TC_VARIABLE | TC_SEQTERM) & TC_VARIABLE) {
+			/* Match func arg list: a comma sep list of >= 0 args, and a close paren */
+			while (next_token(TC_VARIABLE | TC_SEQTERM | TC_COMMA)) {
+				/* Either an empty arg list, or trailing comma from prev iter
+				 * must be followed by an arg */
+				if (f->nargs == 0 && t_tclass == TC_SEQTERM)
+					break;
+
+				/* TC_SEQSTART/TC_COMMA must be followed by TC_VARIABLE */
+				if (t_tclass != TC_VARIABLE)
+					syntax_error(EMSG_UNEXP_TOKEN);
+
 				v = findvar(ahash, t_string);
 				v->x.aidx = f->nargs++;
 
+				/* Arg followed either by end of arg list or 1 comma */
 				if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
 					break;
+				if (t_tclass != TC_COMMA)
+					syntax_error(EMSG_UNEXP_TOKEN);
 			}
 			seq = &f->body;
 			chain_group();
diff --git a/testsuite/awk.tests b/testsuite/awk.tests
index 03fedf771..0db6a26e4 100755
--- a/testsuite/awk.tests
+++ b/testsuite/awk.tests
@@ -280,6 +280,18 @@ testing "awk 'delete a[v--]' evaluates v-- once" \
 " \
 	"" ""
 
+testing "awk func arg parsing 1" \
+	"awk 'func f(,) { }' 2>&1" "awk: cmd. line:1: Unexpected token\n" "" ""
+
+testing "awk func arg parsing 2" \
+	"awk 'func f(a,,b) { }' 2>&1" "awk: cmd. line:1: Unexpected token\n" "" ""
+
+testing "awk func arg parsing 3" \
+	"awk 'func f(a,) { }' 2>&1" "awk: cmd. line:1: Unexpected token\n" "" ""
+
+testing "awk func arg parsing 4" \
+	"awk 'func f(a b) { }' 2>&1" "awk: cmd. line:1: Unexpected token\n" "" ""
+
 testing "awk handles empty ()" \
 	"awk 'BEGIN {print()}' 2>&1" "awk: cmd. line:1: Empty sequence\n" "" ""
 


More information about the busybox-cvs mailing list