[PATCH] vi: Implementation of REGEX-compatible :s command

Andrey Dobrovolsky andrey.dobrovolsky.odessa at gmail.com
Wed Feb 17 23:27:48 UTC 2021


If busybox is compiled with ENABLE_FEATURE_VI_REGEX_SEARCH, :s command
must be REGEX compatible too, char-by-char search and replace is not applicable.

Added:
        function do_subtitution()
        couple of local variables in colon() function

Additional code and variables conditioned by ENABLE_FEATURE_VI_REGEX_SEARCH.
---
 editors/vi.c | 113 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 113 insertions(+)

diff --git a/editors/vi.c b/editors/vi.c
index 458ca62..eb85071 100644
--- a/editors/vi.c
+++ b/editors/vi.c
@@ -2354,6 +2354,64 @@ static void setops(const char *args, const char
*opname, int flg_no,

 #endif /* FEATURE_VI_COLON */

+
+#if ENABLE_FEATURE_VI_REGEX_SEARCH
+
+#define MAX_SUBPATTERN 10              // subpatterns \0 .. \9
+#define MAX_SUBSTITUTED_LEN (SIZE_MAX / 2)
+
+/*
+** orig   - input string
+** regm   - array of subpatterns bounds
+** s      - replace pattern
+** result - buffer to place the substitution result
+*/
+
+static size_t do_substitution (char *orig, regmatch_t *regm, char *s,
char *result)
+{
+       char *from;     // memcpy source pointer
+       int step;       // number of s symbols proceeded
+       size_t len, total_len=0;
+       regmatch_t *cur_match;
+
+       while ( *s ) {
+               len = 1;        // assuming we will copy the single
char form replace pattern
+               from = s;
+               step = 1;
+               if (*s == '\\') {
+                       if ((s[1] >= '0') && (s[1] < ('0' + MAX_SUBPATTERN))) {
+                               cur_match = regm + (s[1] - '0');
+                               if (cur_match->rm_so >= 0) {
+                                       len = cur_match->rm_eo -
cur_match->rm_so ;
+                                       if (len > MAX_SUBSTITUTED_LEN)
+                                               return (len);
+                                       from = orig + cur_match->rm_so;
+                                       step = 2;       // subpattern
envocation consists 2 chars
+                               }
+                       } else if (s[1] == '\\')        // escaped backslash?
+                               step = 2;
+               }
+               total_len += len;
+               if (total_len > MAX_SUBSTITUTED_LEN)
+                       break;
+               if (result) {
+                       memcpy (result, from, len);
+                       result += len;
+                       *result = '\0';
+               }
+               s += step;
+       }
+
+       return (total_len);
+}
+
+// do_substitution dry run
+
+#define get_substituted_size(x,y) do_substitution(NULL,x,y,NULL)
+
+#endif  /* ENABLE_FEATURE_VI_REGEX_SEARCH */
+
+
 // buf must be no longer than MAX_INPUT_LEN!
 static void colon(char *buf)
 {
@@ -2759,6 +2817,14 @@ static void colon(char *buf)
                int dont_chain_first_item = ALLOW_UNDO;
 #  endif

+# if ENABLE_FEATURE_VI_REGEX_SEARCH
+               regex_t preg;
+               int reg_err;
+               int cflags;
+               regmatch_t regmatch[MAX_SUBPATTERN];
+               char *Rsubst = NULL, *Rorig;
+# endif /* ENABLE_FEATURE_VI_REGEX_SEARCH */
+
                // F points to the "find" pattern
                // R points to the "replace" pattern
                // replace the cmd line delimiters "/" with NULs
@@ -2784,11 +2850,46 @@ static void colon(char *buf)
                if (e < 0)
                        e = b;          // maybe :.s/foo/bar/

+# if ENABLE_FEATURE_VI_REGEX_SEARCH
+               Rorig = R;
+               cflags=REG_EXTENDED;
+               if (ignorecase)
+                       cflags = REG_EXTENDED | REG_ICASE;
+               memset(&preg, 0, sizeof(preg));
+               reg_err=regcomp(&preg,F,cflags);
+               if (reg_err) {
+                       status_line(":s bad search pattern");
+                       goto regex_search_end;
+               }
+# endif /* ENABLE_FEATURE_VI_REGEX_SEARCH */
+
                for (i = b; i <= e; i++) {      // so, :20,23 s \0
find \0 replace \0
                        char *ls = q;           // orig line start
                        char *found;
  vc4:
+
+# if ENABLE_FEATURE_VI_REGEX_SEARCH
+                       found = NULL;
+                       regmatch[0].rm_so = 0;
+                       regmatch[0].rm_eo = end_line(q) - q;
+                       if (regexec(&preg, q, MAX_SUBPATTERN,
regmatch, REG_STARTEND) == 0) {
+                               found = q + regmatch[0].rm_so;
+                               len_F = regmatch[0].rm_eo - regmatch[0].rm_so;
+                               len_R = get_substituted_size(regmatch, Rorig);
+                               if (len_R > MAX_SUBSTITUTED_LEN) {
+                                       status_line("Substituted
string length exceeded limit.");
+                                       break;
+                               }
+                               Rsubst = xmalloc(len_R + 1);
+                               do_substitution(q,regmatch,Rorig,Rsubst);
+                               R = Rsubst;
+                       }
+# else
+
                        found = char_search(q, F, (FORWARD << 1) |
LIMITED);    // search cur line only for "find"
+
+# endif /* ENABLE_FEATURE_VI_REGEX_SEARCH */
+
                        if (found) {
                                uintptr_t bias;
                                // we found the "find" pattern - delete it
@@ -2802,6 +2903,12 @@ static void colon(char *buf)
                                found += bias;
                                ls += bias;
                                //q += bias; - recalculated anyway
+
+# if ENABLE_FEATURE_VI_REGEX_SEARCH
+                               free(Rsubst);
+                               Rsubst=NULL;
+# endif /* ENABLE_FEATURE_VI_REGEX_SEARCH */
+
                                // check for "global"  :s/foo/bar/g
                                if (gflag == 'g') {
                                        if ((found + len_R) < end_line(ls)) {
@@ -2812,6 +2919,12 @@ static void colon(char *buf)
                        }
                        q = next_line(ls);
                }
+
+# if ENABLE_FEATURE_VI_REGEX_SEARCH
+  regex_search_end:
+               regfree(&preg);
+# endif /* ENABLE_FEATURE_VI_REGEX_SEARCH */
+
 # endif /* FEATURE_VI_SEARCH */
        } else if (strncmp(cmd, "version", i) == 0) {  // show software version
                status_line(BB_VER);
--
2.9.2


More information about the busybox mailing list