[PATCH v2 2/2] vi: allow regular expressions in ':s' commands
Ron Yorston
rmy at pobox.com
Fri Jul 2 07:24:06 UTC 2021
BusyBox vi has never supported the use of regular expressions in
search/replace (':s') commands. Implement this using GNU regex
when VI_REGEX_SEARCH is enabled.
The implementation:
- uses basic regular expressions, to match those used in the search
command;
- only supports substitution of back references ('\0' - '\9') in the
replacement string. Any other character following a backslash is
treated as that literal character.
VI_REGEX_SEARCH isn't enabled in the default build. In that case:
function old new delta
colon 4036 4033 -3
------------------------------------------------------------------------------
(add/remove: 0/0 grow/shrink: 0/1 up/down: 0/-3) Total: -3 bytes
When VI_REGEX_SEARCH is enabled:
function old new delta
colon 4036 4378 +342
.rodata 108207 108229 +22
------------------------------------------------------------------------------
(add/remove: 0/0 grow/shrink: 2/0 up/down: 364/0) Total: 364 bytes
v2: Rebase. Code shrink. Ensure empty replacement string is null terminated.
Signed-off-by: Andrey Dobrovolsky <andrey.dobrovolsky.odessa at gmail.com>
Signed-off-by: Ron Yorston <rmy at pobox.com>
---
editors/vi.c | 126 +++++++++++++++++++++++++++++++++++++++++++++------
1 file changed, 112 insertions(+), 14 deletions(-)
diff --git a/editors/vi.c b/editors/vi.c
index a19acb20b..3e91fefbc 100644
--- a/editors/vi.c
+++ b/editors/vi.c
@@ -2675,6 +2675,59 @@ static char *expand_args(char *args)
# endif
#endif /* FEATURE_VI_COLON */
+#if ENABLE_FEATURE_VI_REGEX_SEARCH
+# define MAX_SUBPATTERN 10 // subpatterns \0 .. \9
+
+// If the return value is not NULL the caller should free R
+static char *regex_search(char *q, regex_t *preg, const char *Rorig,
+ size_t *len_F, size_t *len_R, char **R)
+{
+ regmatch_t regmatch[MAX_SUBPATTERN], *cur_match;
+ char *found = NULL;
+ const char *t;
+ char *r;
+
+ regmatch[0].rm_so = 0;
+ regmatch[0].rm_eo = end_line(q) - q;
+ if (regexec(preg, q, MAX_SUBPATTERN, regmatch, REG_STARTEND) != 0)
+ return found;
+
+ found = q + regmatch[0].rm_so;
+ *len_F = regmatch[0].rm_eo - regmatch[0].rm_so;
+ *R = NULL;
+
+ fill_result:
+ // first pass calculates len_R, second fills R
+ *len_R = 0;
+ for (t = Rorig, r = *R; *t; t++) {
+ size_t len = 1; // default is to copy one char from replace pattern
+ const char *from = t;
+ if (*t == '\\') {
+ from = ++t; // skip backslash
+ if (*t >= '0' && *t < '0' + MAX_SUBPATTERN) {
+ cur_match = regmatch + (*t - '0');
+ if (cur_match->rm_so >= 0) {
+ len = cur_match->rm_eo - cur_match->rm_so;
+ from = q + cur_match->rm_so;
+ }
+ }
+ }
+ *len_R += len;
+ if (*R) {
+ memcpy(r, from, len);
+ r += len;
+ /* *r = '\0'; - xzalloc did it */
+ }
+ }
+ if (*R == NULL) {
+ *R = xzalloc(*len_R + 1);
+ goto fill_result;
+ }
+
+ return found;
+}
+#endif /* ENABLE_FEATURE_VI_REGEX_SEARCH */
+
// buf must be no longer than MAX_INPUT_LEN!
static void colon(char *buf)
{
@@ -3081,6 +3134,14 @@ static void colon(char *buf)
int subs = 0; // number of substitutions
# if ENABLE_FEATURE_VI_VERBOSE_STATUS
int last_line = 0, lines = 0;
+# endif
+# if ENABLE_FEATURE_VI_REGEX_SEARCH
+ regex_t preg;
+ int cflags;
+ char *Rorig;
+# if ENABLE_FEATURE_VI_UNDO
+ int undo = 0;
+# endif
# endif
// F points to the "find" pattern
@@ -3098,7 +3159,6 @@ static void colon(char *buf)
*flags++ = '\0'; // terminate "replace"
gflag = *flags;
}
- len_R = strlen(R);
if (len_F) { // save "find" as last search pattern
free(last_search_pattern);
@@ -3120,33 +3180,67 @@ static void colon(char *buf)
b = e;
}
+# if ENABLE_FEATURE_VI_REGEX_SEARCH
+ Rorig = R;
+ cflags = 0;
+ if (ignorecase)
+ cflags = REG_ICASE;
+ memset(&preg, 0, sizeof(preg));
+ if (regcomp(&preg, F, cflags) != 0) {
+ status_line(":s bad search pattern");
+ goto regex_search_end;
+ }
+# else
+ len_R = strlen(R);
+# endif
+
for (i = b; i <= e; i++) { // so, :20,23 s \0 find \0 replace \0
char *ls = q; // orig line start
char *found;
vc4:
+# if ENABLE_FEATURE_VI_REGEX_SEARCH
+ found = regex_search(q, &preg, Rorig, &len_F, &len_R, &R);
+# else
found = char_search(q, F, (FORWARD << 1) | LIMITED); // search cur line only for "find"
+# endif
if (found) {
uintptr_t bias;
// we found the "find" pattern - delete it
// For undo support, the first item should not be chained
- text_hole_delete(found, found + len_F - 1,
- subs ? ALLOW_UNDO_CHAIN: ALLOW_UNDO);
- // can't do this above, no undo => no third argument
- subs++;
-# if ENABLE_FEATURE_VI_VERBOSE_STATUS
- if (last_line != i) {
- last_line = i;
- ++lines;
- }
+ // This needs to be handled differently depending on
+ // whether or not regex support is enabled.
+# if ENABLE_FEATURE_VI_REGEX_SEARCH
+# define TEST_LEN_F len_F // len_F may be zero
+# define TEST_UNDO1 undo++
+# define TEST_UNDO2 undo++
+# else
+# define TEST_LEN_F 1 // len_F is never zero
+# define TEST_UNDO1 subs
+# define TEST_UNDO2 1
# endif
- // insert the "replace" pattern, if required
- if (len_R) {
- bias = string_insert(found, R, ALLOW_UNDO_CHAIN);
+ if (TEST_LEN_F) // match can be empty, no delete needed
+ text_hole_delete(found, found + len_F - 1,
+ TEST_UNDO1 ? ALLOW_UNDO_CHAIN: ALLOW_UNDO);
+ if (len_R) { // insert the "replace" pattern, if required
+ bias = string_insert(found, R,
+ TEST_UNDO2 ? ALLOW_UNDO_CHAIN: ALLOW_UNDO);
found += bias;
ls += bias;
//q += bias; - recalculated anyway
}
- dot = ls;
+# if ENABLE_FEATURE_VI_REGEX_SEARCH
+ free(R);
+# endif
+ if (TEST_LEN_F || len_R) {
+ dot = ls;
+ subs++;
+# if ENABLE_FEATURE_VI_VERBOSE_STATUS
+ if (last_line != i) {
+ last_line = i;
+ ++lines;
+ }
+# endif
+ }
// check for "global" :s/foo/bar/g
if (gflag == 'g') {
if ((found + len_R) < end_line(ls)) {
@@ -3166,6 +3260,10 @@ static void colon(char *buf)
status_line("%d substitutions on %d lines", subs, lines);
# endif
}
+# if ENABLE_FEATURE_VI_REGEX_SEARCH
+ regex_search_end:
+ regfree(&preg);
+# endif
# endif /* FEATURE_VI_SEARCH */
} else if (strncmp(cmd, "version", i) == 0) { // show software version
status_line(BB_VER);
--
2.31.1
More information about the busybox
mailing list