[git commit] libbb/yescrypt: de-unroll salsa20()
Denys Vlasenko
vda.linux at googlemail.com
Sun Jul 20 06:42:51 UTC 2025
commit: https://git.busybox.net/busybox/commit/?id=77a49a61b2957f532c4a736fd3ddf1154aecc176
branch: https://git.busybox.net/busybox/commit/?id=refs/heads/master
function old new delta
salsa20 760 296 -464
Signed-off-by: Denys Vlasenko <vda.linux at googlemail.com>
---
libbb/yescrypt/alg-yescrypt-kdf.c | 96 ++++++++++++++++++++++++++++++---------
libbb/yescrypt/alg-yescrypt.h | 3 +-
2 files changed, 76 insertions(+), 23 deletions(-)
diff --git a/libbb/yescrypt/alg-yescrypt-kdf.c b/libbb/yescrypt/alg-yescrypt-kdf.c
index 01503c6e0..a9a1bd591 100644
--- a/libbb/yescrypt/alg-yescrypt-kdf.c
+++ b/libbb/yescrypt/alg-yescrypt-kdf.c
@@ -129,30 +129,82 @@ static void salsa20(salsa20_blk_t *restrict B,
do {
#define R(a,b) (((a) << (b)) | ((a) >> (32 - (b))))
/* Operate on columns */
- x[ 4] ^= R(x[ 0]+x[12], 7); x[ 8] ^= R(x[ 4]+x[ 0], 9);
- x[12] ^= R(x[ 8]+x[ 4],13); x[ 0] ^= R(x[12]+x[ 8],18);
-
- x[ 9] ^= R(x[ 5]+x[ 1], 7); x[13] ^= R(x[ 9]+x[ 5], 9);
- x[ 1] ^= R(x[13]+x[ 9],13); x[ 5] ^= R(x[ 1]+x[13],18);
-
- x[14] ^= R(x[10]+x[ 6], 7); x[ 2] ^= R(x[14]+x[10], 9);
- x[ 6] ^= R(x[ 2]+x[14],13); x[10] ^= R(x[ 6]+x[ 2],18);
-
- x[ 3] ^= R(x[15]+x[11], 7); x[ 7] ^= R(x[ 3]+x[15], 9);
- x[11] ^= R(x[ 7]+x[ 3],13); x[15] ^= R(x[11]+x[ 7],18);
-
+#if KDF_UNROLL_SALSA20
+ x[ 4] ^= R(x[ 0]+x[12], 7); // x[j] ^= R(x[k]+x[l], CONST)
+ x[ 8] ^= R(x[ 4]+x[ 0], 9);
+ x[12] ^= R(x[ 8]+x[ 4],13);
+ x[ 0] ^= R(x[12]+x[ 8],18);
+
+ x[ 9] ^= R(x[ 5]+x[ 1], 7);
+ x[13] ^= R(x[ 9]+x[ 5], 9);
+ x[ 1] ^= R(x[13]+x[ 9],13);
+ x[ 5] ^= R(x[ 1]+x[13],18);
+
+ x[14] ^= R(x[10]+x[ 6], 7);
+ x[ 2] ^= R(x[14]+x[10], 9);
+ x[ 6] ^= R(x[ 2]+x[14],13);
+ x[10] ^= R(x[ 6]+x[ 2],18);
+
+ x[ 3] ^= R(x[15]+x[11], 7);
+ x[ 7] ^= R(x[ 3]+x[15], 9);
+ x[11] ^= R(x[ 7]+x[ 3],13);
+ x[15] ^= R(x[11]+x[ 7],18);
+#else
+ {
+ unsigned j, k, l;
+ j = 4; k = 0; l = 12;
+ for (;;) {
+ uint32_t t;
+ x[j] ^= ({ t = x[k] + x[l]; R(t, 7); }); l = k; k = j; j = (j+4) & 0xf;
+ x[j] ^= ({ t = x[k] + x[l]; R(t, 9); }); l = k; k = j; j = (j+4) & 0xf;
+ x[j] ^= ({ t = x[k] + x[l]; R(t,13); }); l = k; k = j; j = (j+4) & 0xf;
+ x[j] ^= ({ t = x[k] + x[l]; R(t,18); });
+ if (j == 15) break;
+ l = j + 1; k = j + 5; j = (j+9) & 0xf;
+ }
+ }
+#endif
/* Operate on rows */
- x[ 1] ^= R(x[ 0]+x[ 3], 7); x[ 2] ^= R(x[ 1]+x[ 0], 9);
- x[ 3] ^= R(x[ 2]+x[ 1],13); x[ 0] ^= R(x[ 3]+x[ 2],18);
-
- x[ 6] ^= R(x[ 5]+x[ 4], 7); x[ 7] ^= R(x[ 6]+x[ 5], 9);
- x[ 4] ^= R(x[ 7]+x[ 6],13); x[ 5] ^= R(x[ 4]+x[ 7],18);
-
- x[11] ^= R(x[10]+x[ 9], 7); x[ 8] ^= R(x[11]+x[10], 9);
- x[ 9] ^= R(x[ 8]+x[11],13); x[10] ^= R(x[ 9]+x[ 8],18);
+#if KDF_UNROLL_SALSA20
+// i=0 n=0
+ x[ 1] ^= R(x[ 0]+x[ 3], 7); // [i + (n+1)&3] [i + (n+0)&3] [i + (n+3)&3]
+ x[ 2] ^= R(x[ 1]+x[ 0], 9); // [i + (n+2)&3] [i + (n+1)&3] [i + (n+0)&3]
+ x[ 3] ^= R(x[ 2]+x[ 1],13); // [i + (n+3)&3] [i + (n+2)&3] [i + (n+1)&3]
+ x[ 0] ^= R(x[ 3]+x[ 2],18); // [i + (n+0)&3] [i + (n+3)&3] [i + (n+2)&3]
+// i=4 n=1 ^^^j^^^ ^^^k^^^ ^^^l^^^
+ x[ 6] ^= R(x[ 5]+x[ 4], 7); // [i + (n+1)&3] [i + (n+0)&3] [i + (n+3)&3]
+ x[ 7] ^= R(x[ 6]+x[ 5], 9); // [i + (n+2)&3] [i + (n+1)&3] [i + (n+0)&3]
+ x[ 4] ^= R(x[ 7]+x[ 6],13); // [i + (n+3)&3] [i + (n+2)&3] [i + (n+1)&3]
+ x[ 5] ^= R(x[ 4]+x[ 7],18); // [i + (n+0)&3] [i + (n+3)&3] [i + (n+2)&3]
+// i=8 n=2
+ x[11] ^= R(x[10]+x[ 9], 7); // [i + (n+1)&3] [i + (n+0)&3] [i + (n+3)&3]
+ x[ 8] ^= R(x[11]+x[10], 9); // [i + (n+2)&3] [i + (n+1)&3] [i + (n+0)&3]
+ x[ 9] ^= R(x[ 8]+x[11],13); // [i + (n+3)&3] [i + (n+2)&3] [i + (n+1)&3]
+ x[10] ^= R(x[ 9]+x[ 8],18); // [i + (n+0)&3] [i + (n+3)&3] [i + (n+2)&3]
+// i=12 n=3
+ x[12] ^= R(x[15]+x[14], 7); // [i + (n+1)&3] [i + (n+0)&3] [i + (n+3)&3]
+ x[13] ^= R(x[12]+x[15], 9); // [i + (n+2)&3] [i + (n+1)&3] [i + (n+0)&3]
+ x[14] ^= R(x[13]+x[12],13); // [i + (n+3)&3] [i + (n+2)&3] [i + (n+1)&3]
+ x[15] ^= R(x[14]+x[13],18); // [i + (n+0)&3] [i + (n+3)&3] [i + (n+2)&3]
+#else
+ {
+ unsigned j, k, l;
+ uint32_t *xrow;
+ j = 1; k = 0; l = 3;
+ xrow = &x[0];
+ for (;;) {
+ uint32_t t;
+ xrow[j] ^= ({ t = xrow[k] + xrow[l]; R(t, 7); }); l = k; k = j; j = (j+1) & 3;
+ xrow[j] ^= ({ t = xrow[k] + xrow[l]; R(t, 9); }); l = k; k = j; j = (j+1) & 3;
+ xrow[j] ^= ({ t = xrow[k] + xrow[l]; R(t,13); }); l = k; k = j; j = (j+1) & 3;
+ xrow[j] ^= ({ t = xrow[k] + xrow[l]; R(t,18); });
+ if (j == 3) break;
+ l = j; k = j + 1; j = (j+2) & 3;
+ xrow += 4;
+ }
+ }
+#endif
- x[12] ^= R(x[15]+x[14], 7); x[13] ^= R(x[12]+x[15], 9);
- x[14] ^= R(x[13]+x[12],13); x[15] ^= R(x[14]+x[13],18);
#undef R
} while (--doublerounds);
#undef x
diff --git a/libbb/yescrypt/alg-yescrypt.h b/libbb/yescrypt/alg-yescrypt.h
index 5051efbb4..b69843f5d 100644
--- a/libbb/yescrypt/alg-yescrypt.h
+++ b/libbb/yescrypt/alg-yescrypt.h
@@ -44,7 +44,6 @@
# define TEST_DECODE64 0
#endif
-
// Only accept one-char parameters in salt, and only first three?
// Almost any reasonable yescrypt hashes in /etc/shadow should
// only ever use "jXY" parameters which set N and r.
@@ -115,6 +114,8 @@
// -4864 bytes if 0:
#define KDF_UNROLL_PWXFORM 0
// if both this ^^^^^^^^^^ and PWXFORM_ROUND set to 0: -7666 bytes
+// -464 bytes:
+#define KDF_UNROLL_SALSA20 0
/**
* Type and possible values for the flags argument of yescrypt_kdf(),
More information about the busybox-cvs
mailing list