[git commit] tls: P256: x86-64 optimized sp_256_sub_8_p256_mod

Denys Vlasenko vda.linux at googlemail.com
Wed Oct 6 17:59:39 UTC 2021


commit: https://git.busybox.net/busybox/commit/?id=87e3f2e9f8a1c99b223b316fbefb5ae49c2a8fe2
branch: https://git.busybox.net/busybox/commit/?id=refs/heads/master

function                                             old     new   delta
sp_256_sub_8_p256_mod                                  -      53     +53
sp_256_mont_reduce_8                                 223     217      -6
sp_256_mont_dbl_8                                     38      32      -6
sp_256_ecc_mulmod_8                                 1535    1529      -6
sp_256_proj_point_dbl_8                              469     454     -15
------------------------------------------------------------------------------
(add/remove: 1/0 grow/shrink: 0/4 up/down: 53/-33)             Total: 20 bytes

Signed-off-by: Denys Vlasenko <vda.linux at googlemail.com>
---
 networking/tls_sp_c32.c | 32 +++++++++++++++++++++++++++++++-
 1 file changed, 31 insertions(+), 1 deletion(-)

diff --git a/networking/tls_sp_c32.c b/networking/tls_sp_c32.c
index 14a7c7066..1391cb405 100644
--- a/networking/tls_sp_c32.c
+++ b/networking/tls_sp_c32.c
@@ -346,8 +346,8 @@ static int sp_256_sub_8(sp_digit* r, const sp_digit* a, const sp_digit* b)
 #endif
 }
 
-#if ALLOW_ASM && defined(__GNUC__) && defined(__i386__)
 /* Sub p256_mod from a into r. (r = a - p256_mod). */
+#if ALLOW_ASM && defined(__GNUC__) && defined(__i386__)
 static void sp_256_sub_8_p256_mod(sp_digit* r, const sp_digit* a)
 {
 	sp_digit reg;
@@ -390,6 +390,36 @@ static void sp_256_sub_8_p256_mod(sp_digit* r, const sp_digit* a)
 		: "memory"
 	);
 }
+#elif ALLOW_ASM && defined(__GNUC__) && defined(__x86_64__)
+static void sp_256_sub_8_p256_mod(sp_digit* r, const sp_digit* a)
+{
+	uint64_t reg;
+	uint64_t ooff;
+//p256_mod[3..0] = ffffffff00000001 0000000000000000 00000000ffffffff ffffffffffffffff
+	asm volatile (
+"\n		movq	(%0), %3"
+"\n		addq	$1, %3"		// adding 1 is the same as subtracting ffffffffffffffff
+"\n		movq	%3, (%1)"	//
+"\n		cmc"			// only carry bit needs inverting
+"\n"
+"\n		movq	1*8(%0), %3"
+"\n		sbbq	%2, %3"		// %2 holds 00000000ffffffff
+"\n		movq	%3, 1*8(%1)"
+"\n"
+"\n		movq	2*8(%0), %3"
+"\n		sbbq	$0, %3"
+"\n		movq	%3, 2*8(%1)"
+"\n"
+"\n		movq	3*8(%0), %3"
+"\n		sbbq	$0, %3"		// adding 00000000ffffffff (in %2)
+"\n		addq	%2, %3"		// is the same as subtracting ffffffff00000001
+"\n		movq	%3, 3*8(%1)"
+"\n"
+		: "=r" (a), "=r" (r), "=r" (ooff), "=r" (reg)
+		: "0" (a), "1" (r), "2" (0x00000000ffffffff)
+		: "memory"
+	);
+}
 #else
 # define sp_256_sub_8_p256_mod(r, a) sp_256_sub_8((r), (a), p256_mod)
 #endif


More information about the busybox-cvs mailing list