[git commit] ash: expand: Do not reprocess data when expanding words

Denys Vlasenko vda.linux at googlemail.com
Mon Feb 24 18:20:51 UTC 2020


commit: https://git.busybox.net/busybox/commit/?id=823318822ccb0e0910abbd4e1f983637dfd9d3c0
branch: https://git.busybox.net/busybox/commit/?id=refs/heads/master

Upstream patch:

    Date: Wed, 30 May 2018 02:06:03 +0800
    expand: Do not reprocess data when expanding words

    Currently various paths will reprocess data when performing word
    expansion.  For example, expari will skip backwards looking for
    the start of the arithmetic expansion, while evalvar will skip
    unexpanded words manually.

    This is cumbersome and error-prone.  This patch fixes this by
    making word expansions proceed in a linear fashion.  This means
    changing argstr and the various expansion functions such as expari
    and subevalvar to return the next character to be expanded.

    This is inspired by similar code from FreeBSD.  However, we take
    things one step further and completely remove the manual word
    skipping in evalvar.  This is accomplished by introducing a new
    EXP_DISCARD flag that tells argstr to only parse and not produce
    any actual expansions.

    Incidentally, argstr will now always NUL-terminate the expansion
    unless the EXP_WORD flag is set.  This is because all but one
    caller of argstr wants the result to be NUL-termianted.

    Signed-off-by: Herbert Xu <herbert at gondor.apana.org.au>

Also includes two one-line follow-up fixes:

    expand: Eat closing brace for length parameter
            if (subtype == VSLENGTH) {
    +               p++;
                    if (flag & EXP_DISCARD)
    expand: Fix double-decrement in argstr
    -               newloc = expdest - (char *)stackblock() - end;
    +               newloc = q - (char *)stackblock() - end;

and changes in code for bash substring extensions.

Signed-off-by: Denys Vlasenko <vda.linux at googlemail.com>
---
 shell/ash.c | 332 ++++++++++++++++++++++++++++--------------------------------
 1 file changed, 154 insertions(+), 178 deletions(-)

diff --git a/shell/ash.c b/shell/ash.c
index 03864c667..425da6bb6 100644
--- a/shell/ash.c
+++ b/shell/ash.c
@@ -6035,6 +6035,7 @@ static int substr_atoi(const char *s)
 #define EXP_WORD        0x40    /* expand word in parameter expansion */
 #define EXP_QUOTED      0x100   /* expand word in double quotes */
 #define EXP_KEEPNUL     0x200   /* do not skip NUL characters */
+#define EXP_DISCARD     0x400   /* discard result of expansion */
 
 /*
  * rmescape() flags
@@ -6452,13 +6453,15 @@ removerecordregions(int endoff)
 }
 
 static char *
-exptilde(char *startp, char *p, int flag)
+exptilde(char *startp, int flag)
 {
 	unsigned char c;
 	char *name;
 	struct passwd *pw;
 	const char *home;
+	char *p;
 
+	p = startp;
 	name = p + 1;
 
 	while ((c = *++p) != '\0') {
@@ -6477,6 +6480,8 @@ exptilde(char *startp, char *p, int flag)
 		}
 	}
  done:
+	if (flag & EXP_DISCARD)
+		goto out;
 	*p = '\0';
 	if (*name == '\0') {
 		home = lookupvar("HOME");
@@ -6486,13 +6491,13 @@ exptilde(char *startp, char *p, int flag)
 			goto lose;
 		home = pw->pw_dir;
 	}
+	*p = c;
 	if (!home)
 		goto lose;
-	*p = c;
 	strtodest(home, flag | EXP_QUOTED);
+ out:
 	return p;
  lose:
-	*p = c;
 	return startp;
 }
 
@@ -6591,6 +6596,9 @@ expbackq(union node *cmd, int flag)
 	int startloc;
 	struct stackmark smark;
 
+	if (flag & EXP_DISCARD)
+		goto out;
+
 	INT_OFF;
 	startloc = expdest - (char *)stackblock();
 	pushstackmark(&smark, startloc);
@@ -6632,64 +6640,57 @@ expbackq(union node *cmd, int flag)
 		(int)((dest - (char *)stackblock()) - startloc),
 		(int)((dest - (char *)stackblock()) - startloc),
 		stackblock() + startloc));
+
+ out:
+	argbackq = argbackq->next;
 }
 
+/* expari needs it */
+static char *argstr(char *p, int flag);
+
 #if ENABLE_FEATURE_SH_MATH
 /*
  * Expand arithmetic expression.  Backup to start of expression,
  * evaluate, place result in (backed up) result, adjust string position.
  */
-static void
-expari(int flag)
+static char *
+expari(char *start, int flag)
 {
-	char *p, *start;
+	struct stackmark sm;
 	int begoff;
+	int endoff;
 	int len;
+	arith_t result;
+	char *p;
 
-	/* ifsfree(); */
-
-	/*
-	 * This routine is slightly over-complicated for
-	 * efficiency.  Next we scan backwards looking for the
-	 * start of arithmetic.
-	 */
-	start = stackblock();
-	p = expdest - 1;
-	*p = '\0';
-	p--;
-	while (1) {
-		int esc;
-
-		while ((unsigned char)*p != CTLARI) {
-			p--;
-#if DEBUG
-			if (p < start) {
-				ash_msg_and_raise_error("missing CTLARI (shouldn't happen)");
-			}
-#endif
-		}
-
-		esc = esclen(start, p);
-		if (!(esc % 2)) {
-			break;
-		}
+	p = stackblock();
+	begoff = expdest - p;
+	p = argstr(start, flag & EXP_DISCARD);
 
-		p -= esc + 1;
-	}
+	if (flag & EXP_DISCARD)
+		goto out;
 
-	begoff = p - start;
+	start = stackblock();
+	endoff = expdest - start;
+	start += begoff;
+	STADJUST(start - expdest, expdest);
 
 	removerecordregions(begoff);
 
-	expdest = p;
-
 	if (flag & QUOTES_ESC)
-		rmescapes(p + 1, 0, NULL);
+		rmescapes(start, 0, NULL);
+
+	pushstackmark(&sm, endoff);
+	result = ash_arith(start);
+	popstackmark(&sm);
 
-	len = cvtnum(ash_arith(p + 1), flag);
+	len = cvtnum(result, flag);
 
 	if (!(flag & EXP_QUOTED))
 		recordregion(begoff, begoff + len, 0);
+
+ out:
+	return p;
 }
 #endif
 
@@ -6701,7 +6702,7 @@ static char *evalvar(char *p, int flags);
  * characters to allow for further processing.  Otherwise treat
  * $@ like $* since no splitting will be performed.
  */
-static void
+static char *
 argstr(char *p, int flag)
 {
 	static const char spclchars[] ALIGN1 = {
@@ -6713,6 +6714,7 @@ argstr(char *p, int flag)
 		CTLVAR,
 		CTLBACKQ,
 #if ENABLE_FEATURE_SH_MATH
+		CTLARI,
 		CTLENDARI,
 #endif
 		'\0'
@@ -6723,41 +6725,45 @@ argstr(char *p, int flag)
 	size_t length;
 	int startloc;
 
-	if (!(flag & EXP_VARTILDE)) {
-		reject += 2;
-	} else if (flag & EXP_VARTILDE2) {
-		reject++;
-	}
+	reject += !!(flag & EXP_VARTILDE2);
+	reject += flag & EXP_VARTILDE ? 0 : 2;
 	inquotes = 0;
 	length = 0;
 	if (flag & EXP_TILDE) {
-		char *q;
-
 		flag &= ~EXP_TILDE;
  tilde:
-		q = p;
-		if (*q == '~')
-			p = exptilde(p, q, flag);
+		if (*p == '~')
+			p = exptilde(p, flag);
 	}
  start:
 	startloc = expdest - (char *)stackblock();
 	for (;;) {
+		int end;
 		unsigned char c;
 
 		length += strcspn(p + length, reject);
+		end = 0;
 		c = p[length];
-		if (c) {
-			if (!(c & 0x80)
-			IF_FEATURE_SH_MATH(|| c == CTLENDARI)
-			) {
-				/* c == '=' || c == ':' || c == CTLENDARI */
-				length++;
-			}
+		if (!(c & 0x80)
+		 IF_FEATURE_SH_MATH(|| c == CTLENDARI)
+		 || c == CTLENDVAR
+		) {
+			/*
+			 * c == '=' || c == ':' || c == '\0' ||
+			 * c == CTLENDARI || c == CTLENDVAR
+			 */
+			length++;
+			/* c == '\0' || c == CTLENDARI || c == CTLENDVAR */
+			end = !!((c - 1) & 0x80);
 		}
-		if (length > 0) {
+		if (length > 0 && !(flag & EXP_DISCARD)) {
 			int newloc;
-			expdest = stnputs(p, length, expdest);
-			newloc = expdest - (char *)stackblock();
+			char *q;
+
+			q = stnputs(p, length, expdest);
+			q[-1] &= end - 1;
+			expdest = q - (flag & EXP_WORD ? end : 0);
+			newloc = q - (char *)stackblock() - end;
 			if (breakall && !inquotes && newloc > startloc) {
 				recordregion(startloc, newloc, 0);
 			}
@@ -6766,14 +6772,11 @@ argstr(char *p, int flag)
 		p += length + 1;
 		length = 0;
 
+		if (end)
+			break;
+
 		switch (c) {
-		case '\0':
-			goto breakloop;
 		case '=':
-			if (flag & EXP_VARTILDE2) {
-				p--;
-				continue;
-			}
 			flag |= EXP_VARTILDE2;
 			reject++;
 			/* fall through */
@@ -6786,11 +6789,6 @@ argstr(char *p, int flag)
 				goto tilde;
 			}
 			continue;
-		}
-
-		switch (c) {
-		case CTLENDVAR: /* ??? */
-			goto breakloop;
 		case CTLQUOTEMARK:
 			/* "$@" syntax adherence hack */
 			if (!inquotes && !memcmp(p, dolatstr + 1, DOLATSTRLEN - 1)) {
@@ -6816,17 +6814,15 @@ argstr(char *p, int flag)
 			goto start;
 		case CTLBACKQ:
 			expbackq(argbackq->n, flag | inquotes);
-			argbackq = argbackq->next;
 			goto start;
 #if ENABLE_FEATURE_SH_MATH
-		case CTLENDARI:
-			p--;
-			expari(flag | inquotes);
+		case CTLARI:
+			p = expari(p, flag | inquotes);
 			goto start;
 #endif
 		}
 	}
- breakloop: ;
+	return p - 1;
 }
 
 static char *
@@ -6951,25 +6947,27 @@ varunset(const char *end, const char *var, const char *umsg, int varflags)
 	ash_msg_and_raise_error("%.*s: %s%s", (int)(end - var - 1), var, msg, tail);
 }
 
-static const char *
-subevalvar(char *p, char *str, int strloc, int subtype,
+static char *
+subevalvar(char *start, char *str, int strloc,
 		int startloc, int varflags, int flag)
 {
-	struct nodelist *saveargbackq = argbackq;
+	int subtype = varflags & VSTYPE;
 	int quotes = flag & QUOTES_ESC;
 	char *startp;
 	char *loc;
 	char *rmesc, *rmescend;
-	int amount, resetloc;
+	long amount;
+	int resetloc;
 	int argstr_flags;
 	IF_BASH_PATTERN_SUBST(int workloc;)
 	IF_BASH_PATTERN_SUBST(int slash_pos;)
 	IF_BASH_PATTERN_SUBST(char *repl;)
 	int zero;
 	char *(*scan)(char*, char*, char*, char*, int, int);
+	char *p;
 
-	//bb_error_msg("subevalvar(p:'%s',str:'%s',strloc:%d,subtype:%d,startloc:%d,varflags:%x,quotes:%d)",
-	//		p, str, strloc, subtype, startloc, varflags, quotes);
+	//bb_error_msg("subevalvar(start:'%s',str:'%s',strloc:%d,startloc:%d,varflags:%x,quotes:%d)",
+	//		start, str, strloc, startloc, varflags, quotes);
 
 #if BASH_PATTERN_SUBST
 	/* For "${v/pattern/repl}", we must find the delimiter _before_
@@ -6979,7 +6977,7 @@ subevalvar(char *p, char *str, int strloc, int subtype,
 	repl = NULL;
 	if (subtype == VSREPLACE || subtype == VSREPLACEALL) {
 		/* Find '/' and replace with NUL */
-		repl = p;
+		repl = start;
 		/* The pattern can't be empty.
 		 * IOW: if the first char after "${v//" is a slash,
 		 * it does not terminate the pattern - it's the first char of the pattern:
@@ -7004,17 +7002,17 @@ subevalvar(char *p, char *str, int strloc, int subtype,
 		}
 	}
 #endif
-	argstr_flags = EXP_TILDE;
-	if (subtype != VSASSIGN
-	 && subtype != VSQUESTION
+	argstr_flags = (flag & EXP_DISCARD) | EXP_TILDE;
+	if (!str
 #if BASH_SUBSTR
 	 && subtype != VSSUBSTR
 #endif
 	) {
 		/* EXP_CASE keeps CTLESC's */
-		argstr_flags = EXP_TILDE | EXP_CASE;
+		argstr_flags |= EXP_CASE;
 	}
-	argstr(p, argstr_flags);
+	p = argstr(start, argstr_flags);
+
 	//bb_error_msg("str0:'%s'", (char *)stackblock() + strloc);
 #if BASH_PATTERN_SUBST
 	slash_pos = -1;
@@ -7022,24 +7020,25 @@ subevalvar(char *p, char *str, int strloc, int subtype,
 		slash_pos = expdest - ((char *)stackblock() + strloc);
 		STPUTC('/', expdest);
 		//bb_error_msg("repl+1:'%s'", repl + 1);
-		argstr(repl + 1, EXP_TILDE); /* EXP_TILDE: echo "${v/x/~}" expands ~ ! */
+		p = argstr(repl + 1, (flag & EXP_DISCARD) | EXP_TILDE); /* EXP_TILDE: echo "${v/x/~}" expands ~ ! */
 		*repl = '/';
 	}
 #endif
-	STPUTC('\0', expdest);
-	argbackq = saveargbackq;
+	if (flag & EXP_DISCARD)
+		return p;
+
 	startp = (char *)stackblock() + startloc;
 	//bb_error_msg("str1:'%s'", (char *)stackblock() + strloc);
 
 	switch (subtype) {
 	case VSASSIGN:
 		setvar0(str, startp);
-		amount = startp - expdest;
-		STADJUST(amount, expdest);
-		return startp;
+
+		loc = startp;
+		goto out;
 
 	case VSQUESTION:
-		varunset(p, str, startp, varflags);
+		varunset(start, str, startp, varflags);
 		/* NOTREACHED */
 
 #if BASH_SUBSTR
@@ -7110,9 +7109,7 @@ subevalvar(char *p, char *str, int strloc, int subtype,
 			*loc++ = *vstr++;
 		}
 		*loc = '\0';
-		amount = loc - expdest;
-		STADJUST(amount, expdest);
-		return loc;
+		goto out;
 	}
 #endif /* BASH_SUBSTR */
 	}
@@ -7178,7 +7175,7 @@ subevalvar(char *p, char *str, int strloc, int subtype,
 
 		/* If there's no pattern to match, return the expansion unmolested */
 		if (str[0] == '\0')
-			return NULL;
+			goto out1;
 
 		len = 0;
 		idx = startp;
@@ -7259,9 +7256,8 @@ subevalvar(char *p, char *str, int strloc, int subtype,
 		startp = (char *)stackblock() + startloc;
 		memmove(startp, (char *)stackblock() + workloc, len + 1);
 		//bb_error_msg("startp:'%s'", startp);
-		amount = expdest - (startp + len);
-		STADJUST(-amount, expdest);
-		return startp;
+		loc = startp + len;
+		goto out;
 	}
 #endif /* BASH_PATTERN_SUBST */
 
@@ -7282,10 +7278,17 @@ subevalvar(char *p, char *str, int strloc, int subtype,
 			loc = startp + (str - loc) - 1;
 		}
 		*loc = '\0';
-		amount = loc - expdest;
-		STADJUST(amount, expdest);
-	}
-	return loc;
+	} else
+		loc = str - 1;
+
+ out:
+	amount = loc - expdest;
+	STADJUST(amount, expdest);
+ out1:
+	/* Remove any recorded regions beyond start of variable */
+	removerecordregions(startloc);
+
+	return p;
 }
 
 /*
@@ -7310,7 +7313,14 @@ varvalue(char *name, int varflags, int flags, int quoted)
 	ssize_t len = 0;
 	int sep;
 	int subtype = varflags & VSTYPE;
-	int discard = subtype == VSPLUS || subtype == VSLENGTH;
+	int discard = (subtype == VSPLUS || subtype == VSLENGTH) | (flags & EXP_DISCARD);
+
+	if (!subtype) {
+		if (discard)
+			return -1;
+
+		raise_error_syntax("bad substitution");
+	}
 
 	flags |= EXP_KEEPNUL;
 	flags &= discard ? ~QUOTES_ESC : ~0;
@@ -7427,6 +7437,7 @@ varvalue(char *name, int varflags, int flags, int quoted)
 
 	if (discard)
 		STADJUST(-len, expdest);
+
 	return len;
 }
 
@@ -7439,18 +7450,15 @@ evalvar(char *p, int flag)
 {
 	char varflags;
 	char subtype;
-	int quoted;
 	char *var;
 	int patloc;
 	int startloc;
 	ssize_t varlen;
+	int quoted;
 
 	varflags = (unsigned char) *p++;
 	subtype = varflags & VSTYPE;
 
-	if (!subtype)
-		raise_error_syntax("bad substitution");
-
 	quoted = flag & EXP_QUOTED;
 	var = p;
 	startloc = expdest - (char *)stackblock();
@@ -7461,35 +7469,29 @@ evalvar(char *p, int flag)
 	if (varflags & VSNUL)
 		varlen--;
 
-	if (subtype == VSPLUS) {
+	switch (subtype) {
+	case VSPLUS:
 		varlen = -1 - varlen;
-		goto vsplus;
-	}
-
-	if (subtype == VSMINUS) {
- vsplus:
-		if (varlen < 0) {
-			argstr(
-				p,
-				flag | EXP_TILDE | EXP_WORD
-			);
-			goto end;
-		}
+		/* fall through */
+	case 0:
+	case VSMINUS:
+		p = argstr(p, flag | EXP_TILDE | EXP_WORD);
+		if (varlen < 0)
+			return p;
 		goto record;
-	}
 
-	if (subtype == VSASSIGN || subtype == VSQUESTION) {
+	case VSASSIGN:
+	case VSQUESTION:
 		if (varlen >= 0)
 			goto record;
 
-		subevalvar(p, var, 0, subtype, startloc, varflags,
+		p = subevalvar(p, var, 0, startloc, varflags,
 			   flag & ~QUOTES_ESC);
+
+		if (flag & EXP_DISCARD)
+			return p;
+
 		varflags &= ~VSNUL;
-		/*
-		 * Remove any recorded regions beyond
-		 * start of variable
-		 */
-		removerecordregions(startloc);
 		goto again;
 	}
 
@@ -7497,20 +7499,15 @@ evalvar(char *p, int flag)
 		varunset(p, var, 0, 0);
 
 	if (subtype == VSLENGTH) {
+		p++;
+		if (flag & EXP_DISCARD)
+			return p;
 		cvtnum(varlen > 0 ? varlen : 0, flag);
 		goto record;
 	}
 
-	if (subtype == VSNORMAL) {
- record:
-		if (quoted) {
-			quoted = *var == '@' && shellparam.nparam;
-			if (!quoted)
-				goto end;
-		}
-		recordregion(startloc, expdest - (char *)stackblock(), quoted);
-		goto end;
-	}
+	if (subtype == VSNORMAL)
+		goto record;
 
 #if DEBUG
 	switch (subtype) {
@@ -7531,46 +7528,28 @@ evalvar(char *p, int flag)
 	}
 #endif
 
-	if (varlen >= 0) {
+	flag |= varlen < 0 ? EXP_DISCARD : 0;
+	if (!(flag & EXP_DISCARD)) {
 		/*
 		 * Terminate the string and start recording the pattern
 		 * right after it
 		 */
 		STPUTC('\0', expdest);
-		patloc = expdest - (char *)stackblock();
-		if (NULL == subevalvar(p, /* varname: */ NULL, patloc, subtype,
-				startloc, varflags, flag)) {
-			int amount = expdest - (
-				(char *)stackblock() + patloc - 1
-			);
-			STADJUST(-amount, expdest);
-		}
-		/* Remove any recorded regions beyond start of variable */
-		removerecordregions(startloc);
-		goto record;
 	}
 
-	varlen = 0;
+	patloc = expdest - (char *)stackblock();
+	p = subevalvar(p, NULL, patloc, startloc, varflags, flag);
 
- end:
-	if (subtype != VSNORMAL) {      /* skip to end of alternative */
-		int nesting = 1;
-		for (;;) {
-			unsigned char c = *p++;
-			if (c == CTLESC)
-				p++;
-			else if (c == CTLBACKQ) {
-				if (varlen >= 0)
-					argbackq = argbackq->next;
-			} else if (c == CTLVAR) {
-				if ((*p++ & VSTYPE) != VSNORMAL)
-					nesting++;
-			} else if (c == CTLENDVAR) {
-				if (--nesting == 0)
-					break;
-			}
-		}
+ record:
+	if (flag & EXP_DISCARD)
+		return p;
+
+	if (quoted) {
+		quoted = *var == '@' && shellparam.nparam;
+		if (!quoted)
+			return p;
 	}
+	recordregion(startloc, expdest - (char *)stackblock(), quoted);
 	return p;
 }
 
@@ -7983,13 +7962,11 @@ expandarg(union node *arg, struct arglist *arglist, int flag)
 	STARTSTACKSTR(expdest);
 	TRACE(("expandarg: argstr('%s',flags:%x)\n", arg->narg.text, flag));
 	argstr(arg->narg.text, flag);
-	p = _STPUTC('\0', expdest);
-	expdest = p - 1;
 	if (arglist == NULL) {
 		/* here document expanded */
 		goto out;
 	}
-	p = grabstackstr(p);
+	p = grabstackstr(expdest);
 	TRACE(("expandarg: p:'%s'\n", p));
 	exparg.lastp = &exparg.list;
 	/*
@@ -8050,7 +8027,6 @@ casematch(union node *pattern, char *val)
 	argbackq = pattern->narg.backquote;
 	STARTSTACKSTR(expdest);
 	argstr(pattern->narg.text, EXP_TILDE | EXP_CASE);
-	STACKSTRNUL(expdest);
 	ifsfree();
 	result = patmatch(stackblock(), val);
 	popstackmark(&smark);


More information about the busybox-cvs mailing list