[git commit] libbb: smaller and faster decode_base64()

Denys Vlasenko vda.linux at googlemail.com
Fri Nov 27 19:45:15 UTC 2020


commit: https://git.busybox.net/busybox/commit/?id=170b8628fabff2d81606cac052a35f8cf91cc7b2
branch: https://git.busybox.net/busybox/commit/?id=refs/heads/master

function                                             old     new   delta
decode_base64                                        195     180     -15

Signed-off-by: Denys Vlasenko <vda.linux at googlemail.com>
---
 libbb/uuencode.c   | 100 +++++++++++++++++++----------------------------------
 networking/httpd.c |  37 --------------------
 2 files changed, 36 insertions(+), 101 deletions(-)

diff --git a/libbb/uuencode.c b/libbb/uuencode.c
index b4ee20c20..2e9edb219 100644
--- a/libbb/uuencode.c
+++ b/libbb/uuencode.c
@@ -82,7 +82,7 @@ void FAST_FUNC bb_uuencode(char *p, const void *src, int length, const char *tbl
 }
 
 /*
- * Decode base64 encoded string. Stops on '\0'.
+ * Decode base64 encoded string. Stops on NUL after terminating "=" or "==".
  *
  * Returns: pointer to the undecoded part of source.
  * If points to '\0', then the source was fully decoded.
@@ -91,76 +91,48 @@ void FAST_FUNC bb_uuencode(char *p, const void *src, int length, const char *tbl
 const char* FAST_FUNC decode_base64(char **pp_dst, const char *src)
 {
 	char *dst = *pp_dst;
-	const char *src_tail;
-
-	while (1) {
-		unsigned char six_bit[4];
-		int count = 0;
+	unsigned ch = 0;
+	int i = 0;
 
-		/* Fetch up to four 6-bit values */
-		src_tail = src;
-		while (count < 4) {
-			char *table_ptr;
-			int ch;
+	while (*src) {
+		int t = (unsigned char)*src++;
 
-			/* Get next _valid_ character.
-			 * bb_uuenc_tbl_base64[] contains this string:
-			 *  0         1         2         3         4         5         6
-			 *  01234567890123456789012345678901234567890123456789012345678901234
-			 * "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/="
-			 */
-			do {
-				ch = *src;
-				if (ch == '\0') {
-					if (count == 0) {
-						/* Example:
-						 * If we decode "QUJD <NUL>", we want
-						 * to return ptr to NUL, not to ' ',
-						 * because we did fully decode
-						 * the string (to "ABC").
-						 */
-						src_tail = src;
-					}
-					goto ret;
-				}
-				src++;
-				table_ptr = strchr(bb_uuenc_tbl_base64, ch);
+		/* "if" forest is faster than strchr(bb_uuenc_tbl_base64, t) */
+		if (t >= '0' && t <= '9')
+			t = t - '0' + 52;
+		else if (t >= 'A' && t <= 'Z')
+			t = t - 'A';
+		else if (t >= 'a' && t <= 'z')
+			t = t - 'a' + 26;
+		else if (t == '+')
+			t = 62;
+		else if (t == '/')
+			t = 63;
+		else if (t == '=' && (i == 3 || (i == 2 && *src == '=')))
+			/* the above disallows "==AA", "A===", "AA=A" etc */
+			t = 0x1000000;
+		else
 //TODO: add BASE64_FLAG_foo to die on bad char?
-			} while (!table_ptr);
-
-			/* Convert encoded character to decimal */
-			ch = table_ptr - bb_uuenc_tbl_base64;
+			continue;
 
-			/* ch is 64 if char was '=', otherwise 0..63 */
-			if (ch == 64)
+		ch = (ch << 6) | t;
+		if (++i == 4) {
+			*dst++ = (char) (ch >> 16);
+			*dst++ = (char) (ch >> 8);
+			*dst++ = (char) ch;
+			i = 0;
+			if (ch & 0x1000000) { /* was last input char '='? */
+				dst--;
+				if (ch & (0x1000000 << 6)) /* was it "=="? */
+					dst--;
 				break;
-			six_bit[count] = ch;
-			count++;
+			}
+			ch = 0;
 		}
-
-		/* Transform 6-bit values to 8-bit ones.
-		 * count can be < 4 when we decode the tail:
-		 * "eQ==" -> "y", not "y NUL NUL".
-		 * Note that (count > 1) is always true,
-		 * "x===" encoding is not valid:
-		 * even a single zero byte encodes as "AA==".
-		 * However, with current logic we come here with count == 1
-		 * when we decode "==" tail.
-		 */
-		if (count > 1)
-			*dst++ = six_bit[0] << 2 | six_bit[1] >> 4;
-		if (count > 2)
-			*dst++ = six_bit[1] << 4 | six_bit[2] >> 2;
-		if (count > 3)
-			*dst++ = six_bit[2] << 6 | six_bit[3];
-		/* Note that if we decode "AA==" and ate first '=',
-		 * we just decoded one char (count == 2) and now we'll
-		 * do the loop once more to decode second '='.
-		 */
-	} /* while (1) */
- ret:
+	}
 	*pp_dst = dst;
-	return src_tail;
+	/* i should be zero here if full 4-char block was decoded */
+	return src - i; /* -i rejects truncations: e.g. "MQ" and "MQ=" (correct encoding is "MQ==" -> "1") */
 }
 
 #if ENABLE_BASE32
diff --git a/networking/httpd.c b/networking/httpd.c
index 961f8cab4..4ffd89c48 100644
--- a/networking/httpd.c
+++ b/networking/httpd.c
@@ -1017,46 +1017,9 @@ static char *encodeString(const char *string)
  */
 static void decodeBase64(char *Data)
 {
-# if ENABLE_BASE64 || ENABLE_UUDECODE
-	/* Call decode_base64() from uuencode.c */
 	char *eptr = Data;
 	decode_base64(&eptr, Data);
 	*eptr = '\0';
-# else
-	const unsigned char *in = (const unsigned char *)Data;
-	/* The decoded size will be at most 3/4 the size of the encoded */
-	unsigned ch = 0;
-	int i = 0;
-
-	while (*in) {
-		int t = *in++;
-
-		if (t >= '0' && t <= '9')
-			t = t - '0' + 52;
-		else if (t >= 'A' && t <= 'Z')
-			t = t - 'A';
-		else if (t >= 'a' && t <= 'z')
-			t = t - 'a' + 26;
-		else if (t == '+')
-			t = 62;
-		else if (t == '/')
-			t = 63;
-		else if (t == '=')
-			t = 0;
-		else
-			continue;
-
-		ch = (ch << 6) | t;
-		i++;
-		if (i == 4) {
-			*Data++ = (char) (ch >> 16);
-			*Data++ = (char) (ch >> 8);
-			*Data++ = (char) ch;
-			i = 0;
-		}
-	}
-	*Data = '\0';
-# endif
 }
 #endif
 


More information about the busybox-cvs mailing list