[PATCH v2 2/2] vi: allow regular expressions in ':s' commands

Ron Yorston rmy at pobox.com
Fri Jul 2 07:24:06 UTC 2021


BusyBox vi has never supported the use of regular expressions in
search/replace (':s') commands.  Implement this using GNU regex
when VI_REGEX_SEARCH is enabled.

The implementation:

- uses basic regular expressions, to match those used in the search
  command;

- only supports substitution of back references ('\0' - '\9') in the
  replacement string.  Any other character following a backslash is
  treated as that literal character.

VI_REGEX_SEARCH isn't enabled in the default build.  In that case:

function                                             old     new   delta
colon                                               4036    4033      -3
------------------------------------------------------------------------------
(add/remove: 0/0 grow/shrink: 0/1 up/down: 0/-3)               Total: -3 bytes

When VI_REGEX_SEARCH is enabled:

function                                             old     new   delta
colon                                               4036    4378    +342
.rodata                                           108207  108229     +22
------------------------------------------------------------------------------
(add/remove: 0/0 grow/shrink: 2/0 up/down: 364/0)             Total: 364 bytes

v2: Rebase.  Code shrink.  Ensure empty replacement string is null terminated.

Signed-off-by: Andrey Dobrovolsky <andrey.dobrovolsky.odessa at gmail.com>
Signed-off-by: Ron Yorston <rmy at pobox.com>
---
 editors/vi.c | 126 +++++++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 112 insertions(+), 14 deletions(-)

diff --git a/editors/vi.c b/editors/vi.c
index a19acb20b..3e91fefbc 100644
--- a/editors/vi.c
+++ b/editors/vi.c
@@ -2675,6 +2675,59 @@ static char *expand_args(char *args)
 # endif
 #endif /* FEATURE_VI_COLON */
 
+#if ENABLE_FEATURE_VI_REGEX_SEARCH
+# define MAX_SUBPATTERN 10	// subpatterns \0 .. \9
+
+// If the return value is not NULL the caller should free R
+static char *regex_search(char *q, regex_t *preg, const char *Rorig,
+				size_t *len_F, size_t *len_R, char **R)
+{
+	regmatch_t regmatch[MAX_SUBPATTERN], *cur_match;
+	char *found = NULL;
+	const char *t;
+	char *r;
+
+	regmatch[0].rm_so = 0;
+	regmatch[0].rm_eo = end_line(q) - q;
+	if (regexec(preg, q, MAX_SUBPATTERN, regmatch, REG_STARTEND) != 0)
+		return found;
+
+	found = q + regmatch[0].rm_so;
+	*len_F = regmatch[0].rm_eo - regmatch[0].rm_so;
+	*R = NULL;
+
+ fill_result:
+	// first pass calculates len_R, second fills R
+	*len_R = 0;
+	for (t = Rorig, r = *R; *t; t++) {
+		size_t len = 1;	// default is to copy one char from replace pattern
+		const char *from = t;
+		if (*t == '\\') {
+			from = ++t;	// skip backslash
+			if (*t >= '0' && *t < '0' + MAX_SUBPATTERN) {
+				cur_match = regmatch + (*t - '0');
+				if (cur_match->rm_so >= 0) {
+					len = cur_match->rm_eo - cur_match->rm_so;
+					from = q + cur_match->rm_so;
+				}
+			}
+		}
+		*len_R += len;
+		if (*R) {
+			memcpy(r, from, len);
+			r += len;
+			/* *r = '\0'; - xzalloc did it */
+		}
+	}
+	if (*R == NULL) {
+		*R = xzalloc(*len_R + 1);
+		goto fill_result;
+	}
+
+	return found;
+}
+#endif /* ENABLE_FEATURE_VI_REGEX_SEARCH */
+
 // buf must be no longer than MAX_INPUT_LEN!
 static void colon(char *buf)
 {
@@ -3081,6 +3134,14 @@ static void colon(char *buf)
 		int subs = 0;	// number of substitutions
 #  if ENABLE_FEATURE_VI_VERBOSE_STATUS
 		int last_line = 0, lines = 0;
+#  endif
+#  if ENABLE_FEATURE_VI_REGEX_SEARCH
+		regex_t preg;
+		int cflags;
+		char *Rorig;
+#   if ENABLE_FEATURE_VI_UNDO
+		int undo = 0;
+#   endif
 #  endif
 
 		// F points to the "find" pattern
@@ -3098,7 +3159,6 @@ static void colon(char *buf)
 			*flags++ = '\0';	// terminate "replace"
 			gflag = *flags;
 		}
-		len_R = strlen(R);
 
 		if (len_F) {	// save "find" as last search pattern
 			free(last_search_pattern);
@@ -3120,33 +3180,67 @@ static void colon(char *buf)
 			b = e;
 		}
 
+#  if ENABLE_FEATURE_VI_REGEX_SEARCH
+		Rorig = R;
+		cflags = 0;
+		if (ignorecase)
+			cflags = REG_ICASE;
+		memset(&preg, 0, sizeof(preg));
+		if (regcomp(&preg, F, cflags) != 0) {
+			status_line(":s bad search pattern");
+			goto regex_search_end;
+		}
+#  else
+		len_R = strlen(R);
+#  endif
+
 		for (i = b; i <= e; i++) {	// so, :20,23 s \0 find \0 replace \0
 			char *ls = q;		// orig line start
 			char *found;
  vc4:
+#  if ENABLE_FEATURE_VI_REGEX_SEARCH
+			found = regex_search(q, &preg, Rorig, &len_F, &len_R, &R);
+#  else
 			found = char_search(q, F, (FORWARD << 1) | LIMITED);	// search cur line only for "find"
+#  endif
 			if (found) {
 				uintptr_t bias;
 				// we found the "find" pattern - delete it
 				// For undo support, the first item should not be chained
-				text_hole_delete(found, found + len_F - 1,
-							subs ? ALLOW_UNDO_CHAIN: ALLOW_UNDO);
-				// can't do this above, no undo => no third argument
-				subs++;
-#  if ENABLE_FEATURE_VI_VERBOSE_STATUS
-				if (last_line != i) {
-					last_line = i;
-					++lines;
-				}
+				// This needs to be handled differently depending on
+				// whether or not regex support is enabled.
+#  if ENABLE_FEATURE_VI_REGEX_SEARCH
+#   define TEST_LEN_F len_F	// len_F may be zero
+#   define TEST_UNDO1 undo++
+#   define TEST_UNDO2 undo++
+#  else
+#   define TEST_LEN_F 1		// len_F is never zero
+#   define TEST_UNDO1 subs
+#   define TEST_UNDO2 1
 #  endif
-				// insert the "replace" pattern, if required
-				if (len_R) {
-					bias = string_insert(found, R, ALLOW_UNDO_CHAIN);
+				if (TEST_LEN_F)	// match can be empty, no delete needed
+					text_hole_delete(found, found + len_F - 1,
+								TEST_UNDO1 ? ALLOW_UNDO_CHAIN: ALLOW_UNDO);
+				if (len_R) {	// insert the "replace" pattern, if required
+					bias = string_insert(found, R,
+								TEST_UNDO2 ? ALLOW_UNDO_CHAIN: ALLOW_UNDO);
 					found += bias;
 					ls += bias;
 					//q += bias; - recalculated anyway
 				}
-				dot = ls;
+#  if ENABLE_FEATURE_VI_REGEX_SEARCH
+				free(R);
+#  endif
+				if (TEST_LEN_F || len_R) {
+					dot = ls;
+					subs++;
+#  if ENABLE_FEATURE_VI_VERBOSE_STATUS
+					if (last_line != i) {
+						last_line = i;
+						++lines;
+					}
+#  endif
+				}
 				// check for "global"  :s/foo/bar/g
 				if (gflag == 'g') {
 					if ((found + len_R) < end_line(ls)) {
@@ -3166,6 +3260,10 @@ static void colon(char *buf)
 				status_line("%d substitutions on %d lines", subs, lines);
 #  endif
 		}
+#  if ENABLE_FEATURE_VI_REGEX_SEARCH
+ regex_search_end:
+		regfree(&preg);
+#  endif
 # endif /* FEATURE_VI_SEARCH */
 	} else if (strncmp(cmd, "version", i) == 0) {  // show software version
 		status_line(BB_VER);
-- 
2.31.1



More information about the busybox mailing list