[BusyBox] Comments for arith.c

Aaron Lehmann aaronl at vitelus.com
Fri Aug 3 12:10:34 UTC 2001


This patch adds some sorely needed commentary to arith.c. It changes
no code.

Thanks!
-------------- next part --------------
Index: libbb/arith.c
===================================================================
RCS file: /var/cvs/busybox/libbb/arith.c,v
retrieving revision 1.2
diff -u -r1.2 arith.c
--- libbb/arith.c	2001/08/02 05:02:46	1.2
+++ libbb/arith.c	2001/08/03 18:07:40
@@ -24,19 +24,31 @@
  * as a replacement for yacc-based parsers. However, it may well be faster
  * than a comparable parser writen in yacc. The supported operators are
  * listed in #defines below. Parens, order of operations, and error handling
- * are supported. This code is threadsafe. */
+ * are supported. This code is threadsafe. The exact expression format should
+ * be that which POSIX specifies for shells. */
 
-/* To use the routine, call it with an expression string. It returns an
- * integer result. You will also need to define an "error" function
- * that takes printf arguments and _does not return_, or modify the code
- * to use another error mechanism. */
+/* The code uses a simple two-stack algorithm. See
+ * http://www.onthenet.com.au/~grahamis/int2008/week02/lect02.html
+ * for a detailed explaination of the infix-to-postfix algorithm on which
+ * this is based (this code differs in that it applies operators immediately
+ * to the stack instead of adding them to a queue to end up with an
+ * expression). */
 
+/* To use the routine, call it with an expression string and error return
+ * pointer */
+
 #include <stdlib.h>
 #include <string.h>
 #include "libbb.h"
 
 typedef char operator;
 
+/* An operator's token id is a bit of a bitfield. The lower 5 bits are the
+ * precedence, and high 3 are an ID unique accross operators of that
+ * precedence. The ID portion is so that multiple operators can have the
+ * same precedence, ensuring that the leftmost one is evaluated first.
+ * Consider * and /. */
+
 #define tok_decl(prec,id) (((id)<<5)|(prec))
 #define PREC(op) ((op)&0x1F)
 
@@ -70,6 +82,8 @@
 #define TOK_DIV tok_decl(10,1)
 #define TOK_REM tok_decl(10,2)
 
+/* For now all unary operators have the same precedence, and that's used to
+ * identify them as unary operators */
 #define UNARYPREC 14
 #define TOK_BNOT tok_decl(UNARYPREC,0)
 #define TOK_NOT tok_decl(UNARYPREC,1)
@@ -79,9 +93,14 @@
 
 #define ARITH_APPLY(op) arith_apply(op, numstack, &numstackptr)
 #define NUMPTR (*numstackptr)
+
+/* "applying" a token means performing it on the top elements on the integer
+ * stack. For a unary operator it will only change the top element, but a
+ * binary operator will pop two arguments and push a result */
 static short arith_apply(operator op, long *numstack, long **numstackptr)
 {
-	if (NUMPTR == numstack) goto err;
+	if (NUMPTR == numstack) goto err; /* There is no operator that can work
+										 without arguments */
 	if (op == TOK_UMINUS)
 		NUMPTR[-1] *= -1;
 	else if (op == TOK_NOT)
@@ -91,8 +110,9 @@
 
 	/* Binary operators */
 	else {
-	if (NUMPTR-1 == numstack) goto err;
-	--NUMPTR;
+	if (NUMPTR-1 == numstack) goto err; /* ... and binary operators need two
+										   arguments */
+	--NUMPTR; /* ... and they pop one */
 	if (op == TOK_BOR)
 		NUMPTR[-1] |= *NUMPTR;
 	else if (op == TOK_OR)
@@ -140,7 +160,7 @@
 
 extern long arith (const char *startbuf, int *errcode)
 {
-	register char arithval;
+	register char arithval; /* Current character under analysis */
 	const char *expr = startbuf;
 
 	operator lasttok = TOK_MUL, op;
@@ -148,24 +168,34 @@
 	unsigned char prec;
 
 	long *numstack, *numstackptr;
+	/* Stack of operator tokens */
 	operator *stack = alloca(datasizes * sizeof(operator)), *stackptr = stack;
 
 	*errcode = 0;
+	/* Stack of integers */
+	/* The proof that there can be no more than strlen(startbuf)/2+1 integers
+	 * in any given correct or incorrect expression is left as an excersize to
+	 * the reader. */
 	numstack = alloca((datasizes/2+1)*sizeof(long)), numstackptr = numstack;
 
 	while ((arithval = *expr)) {
 		if (arithval == ' ' || arithval == '\n' || arithval == '\t')
-			goto prologue;
+			goto prologue; /* Skip whitespace */
 		if ((unsigned)arithval-'0' <= 9) /* isdigit */ {
 			*numstackptr++ = strtol(expr, (char **) &expr, 10);
 			lasttok = TOK_NUM;
 			continue;
 		} if (arithval == '(') {
+			/* Left parens are simply pushed onto the token stack */
 			*stackptr++ = TOK_LPAREN;
 			lasttok = TOK_LPAREN;
 			goto prologue;
 		} if (arithval == ')') {
-			lasttok = TOK_NUM;
+			lasttok = TOK_NUM; /* Any operator directly after the close paren
+								  should consider itself binary */
+			/* The algorithm employed here is simple: while we don't hit an
+			 * open paren nor the bottom of the stack, pop tokens and apply
+			 * them */
 			while (stackptr != stack) {
 				op = *--stackptr;
 				if (op == TOK_LPAREN)
@@ -228,6 +258,9 @@
 			op = TOK_DIV;
 		else if (arithval == '%')
 			op = TOK_REM;
+		/* Plus and minus are binary (not unary) _only_ if the last token was
+		 * as number, or a right paren (which pretends to be a number, since
+		 * it evaluates to one). Think about it. It makes sense. */
 		else if (arithval == '+') {
 			if (lasttok != TOK_NUM) goto prologue; /* Unary plus */
 			op = TOK_ADD;
@@ -238,21 +271,37 @@
 		else goto err; /* Unknown token */
 
 		prec = PREC(op);
-		if (prec != UNARYPREC)
+		if (prec != UNARYPREC) /* We don't want a unary operator to cause
+								  recursive descent on the stack, because
+								  there can be many in a row and it could
+								  cause an operator to be evaluated before
+								  its argument is pushed onto the integer
+								  stack */
+			/* But for binary operators, "apply" everything on the operator
+			 * stack until we find an operator with a lesser priority than the
+			 * one we have just extracted. */
+			/* Left paren is given the lowest priority so it will never be
+			 * "applied" in this way */
 			while (stackptr != stack && PREC(stackptr[-1]) >= prec) {
 				*errcode = ARITH_APPLY(*--stackptr);
 				if(*errcode) return *errcode;
 			}
+		/* Push this operator to the stack */
 		*stackptr++ = op;
+		/* and remember it */
 		lasttok = op;
 prologue: ++expr;
 	} /* yay */
 
+	/* This is only reached after all tokens have been extracted from the input
+	 * stream. If there are still tokens on the operator stack, they are to be
+	 * applied in order. At the end, there should be a final result on the
+	 * integer stack */
 	while (stackptr != stack) {
 		*errcode = ARITH_APPLY(*--stackptr);
 		if(*errcode) return *errcode;
 	}
-	if (numstackptr != numstack+1) {
+	if (numstackptr != numstack+1) { /* ... but if there isn't, it's bad */
 err: 
 	    *errcode = -1;
 	    return -1;


More information about the busybox mailing list