vi: substitution command (:s) REGEX implementation

Andrey Dobrovolsky andrey.dobrovolsky.odessa at gmail.com
Tue Feb 9 23:07:34 UTC 2021


commit 87baae194e360e67865aeb3bc7a47da600607b04
Author: AndreyDobrovolskyOdessa <andrey.dobrovolsky.odessa at gmail.com>
Date:   Wed Feb 10 00:17:06 2021 +0200

    Implementing :s command utilizing full REGEXes power

    Additional code and variables conditioned by ENABLE_FEATURE_VI_REGEX_SEARCH.

    Added:
            function do_subtitution()
            couple of local variables in colon() function

diff --git a/editors/vi.c b/editors/vi.c
index 458ca62..7c86f10 100644
--- a/editors/vi.c
+++ b/editors/vi.c
@@ -2354,6 +2354,61 @@ static void setops(const char *args, const char
*opname, int flg_no,

 #endif /* FEATURE_VI_COLON */

+
+#if ENABLE_FEATURE_VI_REGEX_SEARCH
+
+#define MAX_SUBPATTERN 10              // subpatterns \0 .. \9
+#define MAX_SUBSTITUTED_LEN (SIZE_MAX / 2)
+
+/*
+** orig   - input string
+** regm   - array of subpatterns bounds
+** s      - replace pattern
+** result - buffer to place the substitution result
+*/
+
+static size_t do_substitution(char *orig, regmatch_t *regm, char *s,
char *result)
+{
+char *from;   // memcpy source pointer
+       int step;     // number of s symbols proceeded
+       size_t len=0, total_len=0;
+       regmatch_t *cur_match;
+
+       while( *s ){
+               len = 1;   // assuming we will copy the single char
form replace pattern
+               from = s;
+               step = 1;
+               if((*s == '\\') && (s[1] >= '0') && (s[1] < ('0' +
MAX_SUBPATTERN))){
+                       cur_match = regm + (s[1] - '0');
+                       if(cur_match->rm_so >= 0){
+                               len = cur_match->rm_eo - cur_match->rm_so ;
+                               if(len >= MAX_SUBSTITUTED_LEN)
+                                       return (len);
+                               from = orig + cur_match->rm_so;
+                               step = 2;   // subpattern envocation
consists 2 chars
+                       }
+               }
+               total_len += len;
+               if (total_len >= MAX_SUBSTITUTED_LEN)
+                       break;
+               if( result ){
+                       memcpy( result, from, len);
+                       result += len;
+                       *result = '\0';
+               }
+               s += step;
+       }
+
+       return (total_len);
+}
+
+// do_substitution dry run
+
+#define get_substituted_size(x,y) do_substitution(NULL,x,y,NULL)
+
+#endif  /* ENABLE_FEATURE_VI_REGEX_SEARCH */
+
+
 // buf must be no longer than MAX_INPUT_LEN!
 static void colon(char *buf)
 {
@@ -2759,6 +2814,14 @@ static void colon(char *buf)
                int dont_chain_first_item = ALLOW_UNDO;
 #  endif

+# if ENABLE_FEATURE_VI_REGEX_SEARCH
+               regex_t preg;
+               int reg_err;
+               int cflags;
+               regmatch_t regmatch[MAX_SUBPATTERN];
+               char *Rsubst = NULL, *Rorig;
+# endif /* ENABLE_FEATURE_VI_REGEX_SEARCH */
+
                // F points to the "find" pattern
                // R points to the "replace" pattern
                // replace the cmd line delimiters "/" with NULs
@@ -2784,11 +2847,46 @@ static void colon(char *buf)
                if (e < 0)
                        e = b;          // maybe :.s/foo/bar/
+# if ENABLE_FEATURE_VI_REGEX_SEARCH
+               Rorig = R;
+               cflags=REG_EXTENDED;
+               if (ignorecase)
+                       cflags = REG_EXTENDED | REG_ICASE;
+               memset(&preg, 0, sizeof(preg));
+               reg_err=regcomp(&preg,F,cflags);
+               if (reg_err) {
+                       status_line(":s bad search pattern");
+                       goto regex_search_end;
+               }
+# endif /* ENABLE_FEATURE_VI_REGEX_SEARCH */
+
                for (i = b; i <= e; i++) {      // so, :20,23 s \0
find \0 replace \0
                        char *ls = q;           // orig line start
                        char *found;
  vc4:
+
+# if ENABLE_FEATURE_VI_REGEX_SEARCH
+                       found = NULL;
+                       regmatch[0].rm_so = 0;
+                       regmatch[0].rm_eo = end_line(q) - q;
+
if(regexec(&preg,q,MAX_SUBPATTERN,regmatch,REG_STARTEND)==0){
+                               found = q + regmatch[0].rm_so;
+                               len_F = regmatch[0].rm_eo - regmatch[0].rm_so;
+                               len_R = get_substituted_size(regmatch,Rorig);
+                               if(len_R > MAX_SUBSTITUTED_LEN){
+                                       status_line("Substituted
string length exceeded limit.");
+                                       break;
+                               }
+                               Rsubst = xmalloc(len_R + 1);
+                               do_substitution(q,regmatch,Rorig,Rsubst);
+                               R = Rsubst;
+                       }
+# else
+
                        found = char_search(q, F, (FORWARD << 1) |
LIMITED);    // search cur line only for "find"
+
+# endif /* ENABLE_FEATURE_VI_REGEX_SEARCH */
+
                        if (found) {
                                uintptr_t bias;
                                // we found the "find" pattern - delete it
@@ -2802,6 +2900,12 @@ static void colon(char *buf)
                                found += bias;
                                ls += bias;
                                //q += bias; - recalculated anyway
+
+# if ENABLE_FEATURE_VI_REGEX_SEARCH
+                               free(Rsubst);
+                               Rsubst=NULL;
+# endif /* ENABLE_FEATURE_VI_REGEX_SEARCH */
+
                                // check for "global"  :s/foo/bar/g
                                if (gflag == 'g') {
                                        if ((found + len_R) < end_line(ls)) {
@@ -2812,6 +2916,12 @@ static void colon(char *buf)
                        }
                        q = next_line(ls);
                }
+
+# if ENABLE_FEATURE_VI_REGEX_SEARCH
+  regex_search_end:
+               regfree(&preg);
+# endif /* ENABLE_FEATURE_VI_REGEX_SEARCH */
+
 # endif /* FEATURE_VI_SEARCH */
        } else if (strncmp(cmd, "version", i) == 0) {  // show software version
                status_line(BB_VER);

Best regards!
-------------- next part --------------
A non-text attachment was scrubbed...
Name: vi_regex_subst
Type: application/octet-stream
Size: 4620 bytes
Desc: not available
URL: <http://lists.busybox.net/pipermail/busybox/attachments/20210210/e25563b4/attachment.obj>


More information about the busybox mailing list