[BusyBox] Comments for arith.c
Aaron Lehmann
aaronl at vitelus.com
Fri Aug 3 12:10:34 UTC 2001
This patch adds some sorely needed commentary to arith.c. It changes
no code.
Thanks!
-------------- next part --------------
Index: libbb/arith.c
===================================================================
RCS file: /var/cvs/busybox/libbb/arith.c,v
retrieving revision 1.2
diff -u -r1.2 arith.c
--- libbb/arith.c 2001/08/02 05:02:46 1.2
+++ libbb/arith.c 2001/08/03 18:07:40
@@ -24,19 +24,31 @@
* as a replacement for yacc-based parsers. However, it may well be faster
* than a comparable parser writen in yacc. The supported operators are
* listed in #defines below. Parens, order of operations, and error handling
- * are supported. This code is threadsafe. */
+ * are supported. This code is threadsafe. The exact expression format should
+ * be that which POSIX specifies for shells. */
-/* To use the routine, call it with an expression string. It returns an
- * integer result. You will also need to define an "error" function
- * that takes printf arguments and _does not return_, or modify the code
- * to use another error mechanism. */
+/* The code uses a simple two-stack algorithm. See
+ * http://www.onthenet.com.au/~grahamis/int2008/week02/lect02.html
+ * for a detailed explaination of the infix-to-postfix algorithm on which
+ * this is based (this code differs in that it applies operators immediately
+ * to the stack instead of adding them to a queue to end up with an
+ * expression). */
+/* To use the routine, call it with an expression string and error return
+ * pointer */
+
#include <stdlib.h>
#include <string.h>
#include "libbb.h"
typedef char operator;
+/* An operator's token id is a bit of a bitfield. The lower 5 bits are the
+ * precedence, and high 3 are an ID unique accross operators of that
+ * precedence. The ID portion is so that multiple operators can have the
+ * same precedence, ensuring that the leftmost one is evaluated first.
+ * Consider * and /. */
+
#define tok_decl(prec,id) (((id)<<5)|(prec))
#define PREC(op) ((op)&0x1F)
@@ -70,6 +82,8 @@
#define TOK_DIV tok_decl(10,1)
#define TOK_REM tok_decl(10,2)
+/* For now all unary operators have the same precedence, and that's used to
+ * identify them as unary operators */
#define UNARYPREC 14
#define TOK_BNOT tok_decl(UNARYPREC,0)
#define TOK_NOT tok_decl(UNARYPREC,1)
@@ -79,9 +93,14 @@
#define ARITH_APPLY(op) arith_apply(op, numstack, &numstackptr)
#define NUMPTR (*numstackptr)
+
+/* "applying" a token means performing it on the top elements on the integer
+ * stack. For a unary operator it will only change the top element, but a
+ * binary operator will pop two arguments and push a result */
static short arith_apply(operator op, long *numstack, long **numstackptr)
{
- if (NUMPTR == numstack) goto err;
+ if (NUMPTR == numstack) goto err; /* There is no operator that can work
+ without arguments */
if (op == TOK_UMINUS)
NUMPTR[-1] *= -1;
else if (op == TOK_NOT)
@@ -91,8 +110,9 @@
/* Binary operators */
else {
- if (NUMPTR-1 == numstack) goto err;
- --NUMPTR;
+ if (NUMPTR-1 == numstack) goto err; /* ... and binary operators need two
+ arguments */
+ --NUMPTR; /* ... and they pop one */
if (op == TOK_BOR)
NUMPTR[-1] |= *NUMPTR;
else if (op == TOK_OR)
@@ -140,7 +160,7 @@
extern long arith (const char *startbuf, int *errcode)
{
- register char arithval;
+ register char arithval; /* Current character under analysis */
const char *expr = startbuf;
operator lasttok = TOK_MUL, op;
@@ -148,24 +168,34 @@
unsigned char prec;
long *numstack, *numstackptr;
+ /* Stack of operator tokens */
operator *stack = alloca(datasizes * sizeof(operator)), *stackptr = stack;
*errcode = 0;
+ /* Stack of integers */
+ /* The proof that there can be no more than strlen(startbuf)/2+1 integers
+ * in any given correct or incorrect expression is left as an excersize to
+ * the reader. */
numstack = alloca((datasizes/2+1)*sizeof(long)), numstackptr = numstack;
while ((arithval = *expr)) {
if (arithval == ' ' || arithval == '\n' || arithval == '\t')
- goto prologue;
+ goto prologue; /* Skip whitespace */
if ((unsigned)arithval-'0' <= 9) /* isdigit */ {
*numstackptr++ = strtol(expr, (char **) &expr, 10);
lasttok = TOK_NUM;
continue;
} if (arithval == '(') {
+ /* Left parens are simply pushed onto the token stack */
*stackptr++ = TOK_LPAREN;
lasttok = TOK_LPAREN;
goto prologue;
} if (arithval == ')') {
- lasttok = TOK_NUM;
+ lasttok = TOK_NUM; /* Any operator directly after the close paren
+ should consider itself binary */
+ /* The algorithm employed here is simple: while we don't hit an
+ * open paren nor the bottom of the stack, pop tokens and apply
+ * them */
while (stackptr != stack) {
op = *--stackptr;
if (op == TOK_LPAREN)
@@ -228,6 +258,9 @@
op = TOK_DIV;
else if (arithval == '%')
op = TOK_REM;
+ /* Plus and minus are binary (not unary) _only_ if the last token was
+ * as number, or a right paren (which pretends to be a number, since
+ * it evaluates to one). Think about it. It makes sense. */
else if (arithval == '+') {
if (lasttok != TOK_NUM) goto prologue; /* Unary plus */
op = TOK_ADD;
@@ -238,21 +271,37 @@
else goto err; /* Unknown token */
prec = PREC(op);
- if (prec != UNARYPREC)
+ if (prec != UNARYPREC) /* We don't want a unary operator to cause
+ recursive descent on the stack, because
+ there can be many in a row and it could
+ cause an operator to be evaluated before
+ its argument is pushed onto the integer
+ stack */
+ /* But for binary operators, "apply" everything on the operator
+ * stack until we find an operator with a lesser priority than the
+ * one we have just extracted. */
+ /* Left paren is given the lowest priority so it will never be
+ * "applied" in this way */
while (stackptr != stack && PREC(stackptr[-1]) >= prec) {
*errcode = ARITH_APPLY(*--stackptr);
if(*errcode) return *errcode;
}
+ /* Push this operator to the stack */
*stackptr++ = op;
+ /* and remember it */
lasttok = op;
prologue: ++expr;
} /* yay */
+ /* This is only reached after all tokens have been extracted from the input
+ * stream. If there are still tokens on the operator stack, they are to be
+ * applied in order. At the end, there should be a final result on the
+ * integer stack */
while (stackptr != stack) {
*errcode = ARITH_APPLY(*--stackptr);
if(*errcode) return *errcode;
}
- if (numstackptr != numstack+1) {
+ if (numstackptr != numstack+1) { /* ... but if there isn't, it's bad */
err:
*errcode = -1;
return -1;
More information about the busybox
mailing list