[git commit] factor: detect squares

Denys Vlasenko vda.linux at googlemail.com
Tue Dec 22 19:24:30 UTC 2020


commit: https://git.busybox.net/busybox/commit/?id=6452c300366cf0e322227ad39991ec2fdfd6beed
branch: https://git.busybox.net/busybox/commit/?id=refs/heads/master

If we have a square, the speedup can be extremely large
(in best case example below, it's ~40000 times faster):

$ time ./busybox_old factor 18446743988964486098
18446743988964486098: 2 3037000493 3037000493
real	0m4.246s
$ time ./busybox factor 18446743988964486098
18446743988964486098: 2 3037000493 3037000493
real	0m0.000s

function                                             old     new   delta
isqrt_odd                                              -      57     +57
print_w                                                -      36     +36
factorize                                            218     236     +18
print_h                                                -       7      +7
factorize_numstr                                      65      72      +7
------------------------------------------------------------------------------
(add/remove: 3/0 grow/shrink: 2/0 up/down: 125/0)             Total: 125 bytes

Signed-off-by: Denys Vlasenko <vda.linux at googlemail.com>
---
 coreutils/factor.c     | 42 +++++++++++++++++++++++++++++++++++++++---
 testsuite/factor.tests | 23 +++++++++++++++++++++++
 2 files changed, 62 insertions(+), 3 deletions(-)

diff --git a/coreutils/factor.c b/coreutils/factor.c
index 27f26fe3f..729bd36a0 100644
--- a/coreutils/factor.c
+++ b/coreutils/factor.c
@@ -85,7 +85,8 @@ static const uint64_t packed_wheel[] = {
 #undef R
 #define WHEEL_START 5
 #define WHEEL_SIZE (5 + 24 * 20)
-#define wheel_tab ((uint8_t*)&bb_common_bufsiz1)
+#define square_count (((uint8_t*)&bb_common_bufsiz1)[0])
+#define wheel_tab    (((uint8_t*)&bb_common_bufsiz1) + 1)
 /*
  * Why, you ask?
  * plain byte array:
@@ -119,9 +120,39 @@ static void unpack_wheel(void)
 	}
 }
 
+/* Prevent inlining, factorize() needs all help it can get with reducing register pressure */
+static NOINLINE void print_w(wide_t n)
+{
+	unsigned rep = square_count;
+	do
+		printf(" %llu", n);
+	while (--rep != 0);
+}
+static NOINLINE void print_h(half_t n)
+{
+	print_w(n);
+}
+
+static void factorize(wide_t N);
+
 static half_t isqrt_odd(wide_t N)
 {
 	half_t s = isqrt(N);
+	/* s^2 is <= N, (s+1)^2 > N */
+
+	/* If s^2 in fact is EQUAL to N, it's very lucky.
+	 * Examples:
+	 * factor 18446743988964486098 = 2 * 3037000493 * 3037000493
+	 * factor 18446743902517389507 = 3 * 2479700513 * 2479700513
+	 */
+	if ((wide_t)s * s == N) {
+		/* factorize sqrt(N), printing each factor twice */
+		square_count *= 2;
+		factorize(s);
+		/* Let caller know we recursed */
+		return 0;
+	}
+
 	/* Subtract 1 from even s, odd s won't change: */
 	/* (doesnt work for zero, but we know that s != 0 here) */
 	s = (s - 1) | 1;
@@ -152,6 +183,8 @@ static NOINLINE void factorize(wide_t N)
 	 * factor 18446744073709551557 (0xffffffffffffffc5).
 	 */
 	max_factor = isqrt_odd(N);
+	if (!max_factor)
+		return; /* square was detected and recursively factored */
 	factor = 2;
 	w = 0;
 	for (;;) {
@@ -162,8 +195,10 @@ static NOINLINE void factorize(wide_t N)
 		 */
 		while ((N % factor) == 0) { /* not likely */
 			N = N / factor;
-			printf(" %"HALF_FMT"u", factor);
+			print_h(factor);
 			max_factor = isqrt_odd(N);
+			if (!max_factor)
+				return; /* square was detected */
 		}
 		if (factor >= max_factor)
 			break;
@@ -178,7 +213,7 @@ static NOINLINE void factorize(wide_t N)
 	}
  end:
 	if (N > 1)
-		printf(" %llu", N);
+		print_w(N);
 	bb_putchar('\n');
 }
 
@@ -193,6 +228,7 @@ static void factorize_numstr(const char *numstr)
 	if (errno)
 		bb_show_usage();
 	printf("%llu:", N);
+	square_count = 1;
 	factorize(N);
 }
 
diff --git a/testsuite/factor.tests b/testsuite/factor.tests
index 2cf4a54ce..e404e29c1 100755
--- a/testsuite/factor.tests
+++ b/testsuite/factor.tests
@@ -45,4 +45,27 @@ testing "factor \$((2*3*5*7*11*13*17*19*23*29*31*37*41*43*47))" \
 	"614889782588491410: 2 3 5 7 11 13 17 19 23 29 31 37 41 43 47\n" \
 	"" ""
 
+# Test that square-detection code is not buggy
+testing "factor 2 * 3037000493 * 3037000493" \
+	"factor 18446743988964486098" \
+	"18446743988964486098: 2 3037000493 3037000493\n" \
+	"" ""
+testing "factor 3 * 2479700513 * 2479700513" \
+	"factor 18446743902517389507" \
+	"18446743902517389507: 3 2479700513 2479700513\n" \
+	"" ""
+# including square-of-square cases:
+testing "factor 3 * 37831 * 37831 * 37831 * 37831" \
+	"factor 6144867742934288163" \
+	"6144867742934288163: 3 37831 37831 37831 37831\n" \
+	"" ""
+testing "factor 3 * 13^16" \
+	"factor 1996249827549539523" \
+	"1996249827549539523: 3 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13\n" \
+	"" ""
+testing "factor 13^16" \
+	"factor 665416609183179841" \
+	"665416609183179841: 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13\n" \
+	"" ""
+
 exit $FAILCOUNT


More information about the busybox-cvs mailing list