[git commit] awk: restore assignment precedence to be lower than ternary ?:
Denys Vlasenko
vda.linux at googlemail.com
Tue Jul 9 13:30:46 UTC 2024
commit: https://git.busybox.net/busybox/commit/?id=38335df9e9f45378c3407defd38b5b610578bdda
branch: https://git.busybox.net/busybox/commit/?id=refs/heads/master
Something is fishy with constrcts like "3==v=3" in gawk,
they should not work, but do. Ignore those for now.
Signed-off-by: Denys Vlasenko <vda.linux at googlemail.com>
---
editors/awk.c | 65 ++++++++++++++++++++++++++++++++++++++++++++---------
testsuite/awk.tests | 31 ++++++++++++++++---------
2 files changed, 74 insertions(+), 22 deletions(-)
diff --git a/editors/awk.c b/editors/awk.c
index 8bc214b69..697a44c8c 100644
--- a/editors/awk.c
+++ b/editors/awk.c
@@ -433,36 +433,47 @@ static const char tokenlist[] ALIGN1 =
;
static const uint32_t tokeninfo[] ALIGN4 = {
- 0,
- 0,
+ 0, /* ( */
+ 0, /* ) */
#define TI_REGEXP OC_REGEXP
- TI_REGEXP,
+ TI_REGEXP, /* / */
+ /* >> > | */
xS|'a', xS|'w', xS|'|',
+ /* ++ -- */
OC_UNARY|xV|P(9)|'p', OC_UNARY|xV|P(9)|'m',
#define TI_PREINC (OC_UNARY|xV|P(9)|'P')
#define TI_PREDEC (OC_UNARY|xV|P(9)|'M')
+ /* ++ -- $ */
TI_PREINC, TI_PREDEC, OC_FIELD|xV|P(5),
- OC_COMPARE|VV|P(39)|5, OC_MOVE|VV|P(38), OC_REPLACE|NV|P(38)|'+', OC_REPLACE|NV|P(38)|'-',
- OC_REPLACE|NV|P(38)|'*', OC_REPLACE|NV|P(38)|'/', OC_REPLACE|NV|P(38)|'%', OC_REPLACE|NV|P(38)|'&',
- OC_BINARY|NV|P(29)|'+', OC_BINARY|NV|P(29)|'-', OC_REPLACE|NV|P(38)|'&', OC_BINARY|NV|P(15)|'&',
+ /* == = += -= */
+ OC_COMPARE|VV|P(39)|5, OC_MOVE|VV|P(74), OC_REPLACE|NV|P(74)|'+', OC_REPLACE|NV|P(74)|'-',
+ /* *= /= %= ^= (^ is exponentiation, NOT xor) */
+ OC_REPLACE|NV|P(74)|'*', OC_REPLACE|NV|P(74)|'/', OC_REPLACE|NV|P(74)|'%', OC_REPLACE|NV|P(74)|'&',
+ /* + - **= ** */
+ OC_BINARY|NV|P(29)|'+', OC_BINARY|NV|P(29)|'-', OC_REPLACE|NV|P(74)|'&', OC_BINARY|NV|P(15)|'&',
+ /* / % ^ * */
OC_BINARY|NV|P(25)|'/', OC_BINARY|NV|P(25)|'%', OC_BINARY|NV|P(15)|'&', OC_BINARY|NV|P(25)|'*',
+ /* != >= <= > */
OC_COMPARE|VV|P(39)|4, OC_COMPARE|VV|P(39)|3, OC_COMPARE|VV|P(39)|0, OC_COMPARE|VV|P(39)|1,
#define TI_LESS (OC_COMPARE|VV|P(39)|2)
+ /* < !~ ~ && */
TI_LESS, OC_MATCH|Sx|P(45)|'!', OC_MATCH|Sx|P(45)|'~', OC_LAND|Vx|P(55),
#define TI_TERNARY (OC_TERNARY|Vx|P(64)|'?')
#define TI_COLON (OC_COLON|xx|P(67)|':')
+ /* || ? : */
OC_LOR|Vx|P(59), TI_TERNARY, TI_COLON,
#define TI_IN (OC_IN|SV|P(49))
TI_IN,
#define TI_COMMA (OC_COMMA|SS|P(80))
TI_COMMA,
#define TI_PGETLINE (OC_PGETLINE|SV|P(37))
- TI_PGETLINE,
+ TI_PGETLINE, /* | */
+ /* + - ! */
OC_UNARY|xV|P(19)|'+', OC_UNARY|xV|P(19)|'-', OC_UNARY|xV|P(19)|'!',
0, /* ] */
- 0,
- 0,
- 0,
+ 0, /* { */
+ 0, /* } */
+ 0, /* ; */
0, /* \n */
ST_IF, ST_DO, ST_FOR, OC_BREAK,
OC_CONTINUE, OC_DELETE|Rx, OC_PRINT,
@@ -511,6 +522,38 @@ static const uint32_t tokeninfo[] ALIGN4 = {
#undef OC_F
};
+/* gawk 5.1.1 manpage says the precedence of comparisons and assignments are as follows:
+ * ......
+ * < > <= >= == !=
+ * ~ !~
+ * in
+ * &&
+ * ||
+ * ?:
+ * = += -= *= /= %= ^=
+ * But there are some abnormalities:
+ * awk 'BEGIN { print v=3==3,v }' - ok:
+ * 1 1
+ * awk 'BEGIN { print 3==v=3,v }' - wrong, (3==v)=3 is not a valid assignment:
+ * 1 3
+ * This also unexpectedly works: echo "foo" | awk '$1==$1="foo" {print $1}'
+ * More than one comparison op fails to parse:
+ * awk 'BEGIN { print 3==3==3 }' - syntax error (wrong, should work)
+ * awk 'BEGIN { print 3==3!=3 }' - syntax error (wrong, should work)
+ *
+ * The ternary a?b:c works as follows in gawk: "a" can't be assignment
+ * ("= has lower precedence than ?") but inside "b" or "c", assignment
+ * is higher precedence:
+ * awk 'BEGIN { u=v=w=1; print u=0?v=4:w=5; print u,v,w }'
+ * 5
+ * 5 1 5
+ * This differs from C and shell's "test" rules for ?: which have implicit ()
+ * around "b" in ?:, but not around "c" - they would barf on "w=5" above.
+ * gawk allows nesting of ?: - this works:
+ * u=0?v=4?5:6:w=7?8:9 means u=0?(v=4?5:6):(w=7?8:9)
+ * bbox is buggy here, requires parens: "u=0?(v=4):(w=5)"
+ */
+
/* internal variable names and their initial values */
/* asterisk marks SPECIAL vars; $ is just no-named Field0 */
enum {
@@ -1409,7 +1452,7 @@ static node *parse_expr(uint32_t term_tc)
vn = vn->a.n;
if (!vn->a.n) syntax_error(EMSG_UNEXP_TOKEN);
}
- if (t_info == TI_TERNARY)
+ if (t_info == TI_TERNARY) /* "?" operator */
//TODO: why?
t_info += PRECEDENCE(6);
cn = vn->a.n->r.n = new_node(t_info);
diff --git a/testsuite/awk.tests b/testsuite/awk.tests
index 063084a1c..be25f6696 100755
--- a/testsuite/awk.tests
+++ b/testsuite/awk.tests
@@ -5,6 +5,7 @@
. ./testing.sh
+sq="'"
# testing "description" "command" "result" "infile" "stdin"
testing "awk -F case 0" "awk -F '[#]' '{ print NF }'" "" "" ""
@@ -479,12 +480,6 @@ testing 'awk backslash+newline eaten with no trace' \
"Hello world\n" \
'' ''
-testing 'awk assign while test' \
- "awk '\$1==\$1=\"foo\" {print \$1}'" \
- "foo\n" \
- "" \
- "foo"
-
# User-supplied bug (SEGV) example, was causing use-after-realloc
testing 'awk assign while assign' \
"awk '\$5=\$\$5=\$0'; echo \$?" \
@@ -543,16 +538,30 @@ testing 'awk assign while assign' \
# If field separator FS=' ' (default), fields are split only on
# space or tab or linefeed, NOT other whitespace.
testing 'awk does not split on CR (char 13)' \
- "awk '{ \$1=\$0; print }'" \
+ 'awk '$sq'{ $1=$0; print }'$sq \
'word1 word2 word3\r word2 word3\r\n' \
'' 'word1 word2 word3\r'
-testing "awk = has higher precedence than == (despite what gawk manpage claims)" \
- "awk 'BEGIN { v=1; print 2==v; print 2==v=2; print v; print v=3==3; print v}'" \
- '0\n1\n2\n1\n3\n' \
+# No, it seems a bug in gawk parser.
+#testing "awk = has higher precedence than == (despite what gawk manpage claims)" \
+# "awk 'BEGIN { v=1; print 2==v; print 2==v=2; print v; print v=3==3; print v}'" \
+# '0\n1\n2\n1\n3\n' \
+# '' ''
+#
+#testing 'awk assign while test' \
+# 'awk '$sq'$1==$1="foo" {print $1}'$sq \
+# "foo\n" \
+# "" \
+# "foo"
+
+testing "awk = and ?: precedence" \
+ 'awk '$sq'BEGIN { a=0?"bug":"ok"; print a}'$sq \
+ 'ok\n' \
'' ''
-sq="'"
+# TODO: gawk can do this: awk 'BEGIN { u=v=w=1; print u=0?v=4:w=5; print u,v,w}'
+# and even this: u=0?v=4?5:6:w=7?8:9
+
testing 'awk gensub backslashes \' \
'awk '$sq'BEGIN { s="\\"; print "s=" s; print gensub("a", s, "g", "a|a") }'$sq \
's=\\
More information about the busybox-cvs
mailing list