[git commit] tls: document PSTM_64BIT + PSTM_X86_64 optimizations better

Denys Vlasenko vda.linux at googlemail.com
Sun Feb 8 07:30:03 UTC 2026


commit: https://git.busybox.net/busybox/commit/?id=8289b346265ef63a809c68b608cd7689bbf0a342
branch: https://git.busybox.net/busybox/log/?h=master

Signed-off-by: Denys Vlasenko <vda.linux at googlemail.com>
---
 networking/tls.h                        | 18 +++++++++---------
 networking/tls_pstm_montgomery_reduce.c |  4 ++--
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/networking/tls.h b/networking/tls.h
index 9751d30ff..167f8baf1 100644
--- a/networking/tls.h
+++ b/networking/tls.h
@@ -10,7 +10,6 @@
  */
 #include "libbb.h"
 
-
 /* Config tweaks */
 #define HAVE_NATIVE_INT64
 #undef  USE_1024_KEY_SPEED_OPTIMIZATIONS
@@ -31,16 +30,18 @@
 # define PSTM_32BIT
 # define PSTM_X86
 #endif
-//#if defined(__GNUC__) && defined(__x86_64__)
-//  /* PSTM_X86_64 works correctly, but +782 bytes. */
-//  /* Looks like most of the growth is because of PSTM_64BIT. */
+#if defined(__GNUC__) && defined(__x86_64__)
+  /* PSTM_64BIT + PSTM_X86_64 works correctly, but:
+   * +928 bytes if PSTM_64BIT but !PSTM_X86_64
+   * +1003 bytes with INNERMUL8 (loop unrolling in pstm_montgomery_reduce())
+   * +664 bytes without INNERMUL8
+   */
 //# define PSTM_64BIT
 //# define PSTM_X86_64
-//#endif
+#endif
 //#if SOME_COND #define PSTM_MIPS, #define PSTM_32BIT
 //#if SOME_COND #define PSTM_ARM,  #define PSTM_32BIT
 
-
 #define PS_SUCCESS              0
 #define PS_FAILURE              -1
 #define PS_ARG_FAIL             -6      /* Failure due to bad function param */
@@ -51,14 +52,14 @@
 #define PS_TRUE         1
 #define PS_FALSE        0
 
+#undef ENDIAN_BIG
+#undef ENDIAN_LITTLE
 #if BB_BIG_ENDIAN
 # define ENDIAN_BIG     1
-# undef  ENDIAN_LITTLE
 //#????  ENDIAN_32BITWORD
 // controls only STORE32L, which we don't use
 #else
 # define ENDIAN_LITTLE  1
-# undef  ENDIAN_BIG
 #endif
 
 typedef uint64_t uint64;
@@ -98,7 +99,6 @@ void tls_get_random(void *buf, unsigned len) FAST_FUNC;
 #undef  min
 #define min(x, y) ((x) < (y) ? (x) : (y))
 
-
 #include "tls_pstm.h"
 #include "tls_aes.h"
 #include "tls_aesgcm.h"
diff --git a/networking/tls_pstm_montgomery_reduce.c b/networking/tls_pstm_montgomery_reduce.c
index 4181a0590..e63e590db 100644
--- a/networking/tls_pstm_montgomery_reduce.c
+++ b/networking/tls_pstm_montgomery_reduce.c
@@ -135,7 +135,7 @@ asm(                                                       \
 	:"0"(_c[LO]), "1"(cy), "r"(mu), "r"(*tmpm++)           \
 	: "%rax", "%rdx", "cc")
 
-#define INNERMUL8				\
+#define INNERMUL8_disabled_for_bbox				\
 asm(							\
 	"movq 0(%5),%%rax    \n\t"  \
 	"movq 0(%2),%%r10    \n\t"  \
@@ -398,7 +398,7 @@ int32 FAST_FUNC pstm_montgomery_reduce(psPool_t *pool, pstm_int *a, pstm_int *m,
 		_c   = c + x;
 		tmpm = m->dp;
 		y = 0;
-#ifdef PSTM_X86_64
+#ifdef INNERMUL8 //bbox: PSTM_X86_64
 		for (; y < (pa & ~7); y += 8) {
 			INNERMUL8;
 			_c   += 8;


More information about the busybox-cvs mailing list