[git commit] libbb/yescrypt: de-unroll salsa20()

Denys Vlasenko vda.linux at googlemail.com
Sun Jul 20 06:42:51 UTC 2025


commit: https://git.busybox.net/busybox/commit/?id=77a49a61b2957f532c4a736fd3ddf1154aecc176
branch: https://git.busybox.net/busybox/commit/?id=refs/heads/master

function                                             old     new   delta
salsa20                                              760     296    -464

Signed-off-by: Denys Vlasenko <vda.linux at googlemail.com>
---
 libbb/yescrypt/alg-yescrypt-kdf.c | 96 ++++++++++++++++++++++++++++++---------
 libbb/yescrypt/alg-yescrypt.h     |  3 +-
 2 files changed, 76 insertions(+), 23 deletions(-)

diff --git a/libbb/yescrypt/alg-yescrypt-kdf.c b/libbb/yescrypt/alg-yescrypt-kdf.c
index 01503c6e0..a9a1bd591 100644
--- a/libbb/yescrypt/alg-yescrypt-kdf.c
+++ b/libbb/yescrypt/alg-yescrypt-kdf.c
@@ -129,30 +129,82 @@ static void salsa20(salsa20_blk_t *restrict B,
 	do {
 #define R(a,b) (((a) << (b)) | ((a) >> (32 - (b))))
 		/* Operate on columns */
-		x[ 4] ^= R(x[ 0]+x[12], 7);  x[ 8] ^= R(x[ 4]+x[ 0], 9);
-		x[12] ^= R(x[ 8]+x[ 4],13);  x[ 0] ^= R(x[12]+x[ 8],18);
-
-		x[ 9] ^= R(x[ 5]+x[ 1], 7);  x[13] ^= R(x[ 9]+x[ 5], 9);
-		x[ 1] ^= R(x[13]+x[ 9],13);  x[ 5] ^= R(x[ 1]+x[13],18);
-
-		x[14] ^= R(x[10]+x[ 6], 7);  x[ 2] ^= R(x[14]+x[10], 9);
-		x[ 6] ^= R(x[ 2]+x[14],13);  x[10] ^= R(x[ 6]+x[ 2],18);
-
-		x[ 3] ^= R(x[15]+x[11], 7);  x[ 7] ^= R(x[ 3]+x[15], 9);
-		x[11] ^= R(x[ 7]+x[ 3],13);  x[15] ^= R(x[11]+x[ 7],18);
-
+#if KDF_UNROLL_SALSA20
+		x[ 4] ^= R(x[ 0]+x[12], 7); // x[j] ^= R(x[k]+x[l], CONST)
+		x[ 8] ^= R(x[ 4]+x[ 0], 9);
+		x[12] ^= R(x[ 8]+x[ 4],13);
+		x[ 0] ^= R(x[12]+x[ 8],18);
+
+		x[ 9] ^= R(x[ 5]+x[ 1], 7);
+		x[13] ^= R(x[ 9]+x[ 5], 9);
+		x[ 1] ^= R(x[13]+x[ 9],13);
+		x[ 5] ^= R(x[ 1]+x[13],18);
+
+		x[14] ^= R(x[10]+x[ 6], 7);
+		x[ 2] ^= R(x[14]+x[10], 9);
+		x[ 6] ^= R(x[ 2]+x[14],13);
+		x[10] ^= R(x[ 6]+x[ 2],18);
+
+		x[ 3] ^= R(x[15]+x[11], 7);
+		x[ 7] ^= R(x[ 3]+x[15], 9);
+		x[11] ^= R(x[ 7]+x[ 3],13);
+		x[15] ^= R(x[11]+x[ 7],18);
+#else
+		{
+			unsigned j, k, l;
+			j = 4; k = 0; l = 12;
+			for (;;) {
+				uint32_t t;
+				x[j] ^= ({ t = x[k] + x[l]; R(t, 7); }); l = k; k = j; j = (j+4) & 0xf;
+				x[j] ^= ({ t = x[k] + x[l]; R(t, 9); }); l = k; k = j; j = (j+4) & 0xf;
+				x[j] ^= ({ t = x[k] + x[l]; R(t,13); }); l = k; k = j; j = (j+4) & 0xf;
+				x[j] ^= ({ t = x[k] + x[l]; R(t,18); });
+				if (j == 15) break;
+				l = j + 1; k = j + 5; j = (j+9) & 0xf;
+			}
+		}
+#endif
 		/* Operate on rows */
-		x[ 1] ^= R(x[ 0]+x[ 3], 7);  x[ 2] ^= R(x[ 1]+x[ 0], 9);
-		x[ 3] ^= R(x[ 2]+x[ 1],13);  x[ 0] ^= R(x[ 3]+x[ 2],18);
-
-		x[ 6] ^= R(x[ 5]+x[ 4], 7);  x[ 7] ^= R(x[ 6]+x[ 5], 9);
-		x[ 4] ^= R(x[ 7]+x[ 6],13);  x[ 5] ^= R(x[ 4]+x[ 7],18);
-
-		x[11] ^= R(x[10]+x[ 9], 7);  x[ 8] ^= R(x[11]+x[10], 9);
-		x[ 9] ^= R(x[ 8]+x[11],13);  x[10] ^= R(x[ 9]+x[ 8],18);
+#if KDF_UNROLL_SALSA20
+// i=0 n=0
+		x[ 1] ^= R(x[ 0]+x[ 3], 7); // [i + (n+1)&3] [i + (n+0)&3] [i + (n+3)&3]
+		x[ 2] ^= R(x[ 1]+x[ 0], 9); // [i + (n+2)&3] [i + (n+1)&3] [i + (n+0)&3]
+		x[ 3] ^= R(x[ 2]+x[ 1],13); // [i + (n+3)&3] [i + (n+2)&3] [i + (n+1)&3]
+		x[ 0] ^= R(x[ 3]+x[ 2],18); // [i + (n+0)&3] [i + (n+3)&3] [i + (n+2)&3]
+// i=4 n=1                                          ^^^j^^^       ^^^k^^^       ^^^l^^^
+		x[ 6] ^= R(x[ 5]+x[ 4], 7); // [i + (n+1)&3] [i + (n+0)&3] [i + (n+3)&3]
+		x[ 7] ^= R(x[ 6]+x[ 5], 9); // [i + (n+2)&3] [i + (n+1)&3] [i + (n+0)&3]
+		x[ 4] ^= R(x[ 7]+x[ 6],13); // [i + (n+3)&3] [i + (n+2)&3] [i + (n+1)&3]
+		x[ 5] ^= R(x[ 4]+x[ 7],18); // [i + (n+0)&3] [i + (n+3)&3] [i + (n+2)&3]
+// i=8 n=2
+		x[11] ^= R(x[10]+x[ 9], 7); // [i + (n+1)&3] [i + (n+0)&3] [i + (n+3)&3]
+		x[ 8] ^= R(x[11]+x[10], 9); // [i + (n+2)&3] [i + (n+1)&3] [i + (n+0)&3]
+		x[ 9] ^= R(x[ 8]+x[11],13); // [i + (n+3)&3] [i + (n+2)&3] [i + (n+1)&3]
+		x[10] ^= R(x[ 9]+x[ 8],18); // [i + (n+0)&3] [i + (n+3)&3] [i + (n+2)&3]
+// i=12 n=3
+		x[12] ^= R(x[15]+x[14], 7); // [i + (n+1)&3] [i + (n+0)&3] [i + (n+3)&3]
+		x[13] ^= R(x[12]+x[15], 9); // [i + (n+2)&3] [i + (n+1)&3] [i + (n+0)&3]
+		x[14] ^= R(x[13]+x[12],13); // [i + (n+3)&3] [i + (n+2)&3] [i + (n+1)&3]
+		x[15] ^= R(x[14]+x[13],18); // [i + (n+0)&3] [i + (n+3)&3] [i + (n+2)&3]
+#else
+		{
+			unsigned j, k, l;
+			uint32_t *xrow;
+			j = 1; k = 0; l = 3;
+			xrow = &x[0];
+			for (;;) {
+				uint32_t t;
+				xrow[j] ^= ({ t = xrow[k] + xrow[l]; R(t, 7); }); l = k; k = j; j = (j+1) & 3;
+				xrow[j] ^= ({ t = xrow[k] + xrow[l]; R(t, 9); }); l = k; k = j; j = (j+1) & 3;
+				xrow[j] ^= ({ t = xrow[k] + xrow[l]; R(t,13); }); l = k; k = j; j = (j+1) & 3;
+				xrow[j] ^= ({ t = xrow[k] + xrow[l]; R(t,18); });
+				if (j == 3) break;
+				l = j; k = j + 1; j = (j+2) & 3;
+				xrow += 4;
+			}
+		}
+#endif
 
-		x[12] ^= R(x[15]+x[14], 7);  x[13] ^= R(x[12]+x[15], 9);
-		x[14] ^= R(x[13]+x[12],13);  x[15] ^= R(x[14]+x[13],18);
 #undef R
 	} while (--doublerounds);
 #undef x
diff --git a/libbb/yescrypt/alg-yescrypt.h b/libbb/yescrypt/alg-yescrypt.h
index 5051efbb4..b69843f5d 100644
--- a/libbb/yescrypt/alg-yescrypt.h
+++ b/libbb/yescrypt/alg-yescrypt.h
@@ -44,7 +44,6 @@
 # define TEST_DECODE64  0
 #endif
 
-
 // Only accept one-char parameters in salt, and only first three?
 // Almost any reasonable yescrypt hashes in /etc/shadow should
 // only ever use "jXY" parameters which set N and r.
@@ -115,6 +114,8 @@
 // -4864 bytes if 0:
 #define KDF_UNROLL_PWXFORM 0
 // if both this ^^^^^^^^^^ and PWXFORM_ROUND set to 0: -7666 bytes
+// -464 bytes:
+#define KDF_UNROLL_SALSA20 0
 
 /**
  * Type and possible values for the flags argument of yescrypt_kdf(),


More information about the busybox-cvs mailing list