[git commit] factor: much faster, and very slightly larger isqrt()
Denys Vlasenko
vda.linux at googlemail.com
Tue Apr 11 05:02:42 UTC 2017
commit: https://git.busybox.net/busybox/commit/?id=10673c44f11045a0c99b19f32930097e9b3ae148
branch: https://git.busybox.net/busybox/commit/?id=refs/heads/master
function old new delta
isqrt_odd 70 88 +18
Signedoffby: Denys Vlasenko <vda.linux at googlemail.com>

coreutils/factor.c  44 ++++++++++
1 file changed, 10 insertions(+), 34 deletions()
diff git a/coreutils/factor.c b/coreutils/factor.c
index 11097c1..f910fdb 100644
 a/coreutils/factor.c
+++ b/coreutils/factor.c
@@ 48,41 +48,17 @@ typedef unsigned long half_t;
static inline half_t isqrt(wide_t N)
{
half_t x;
+ unsigned shift;
 // Never called with N < 1
 //if (N == 0)
 // return 0;

 x = HALF_MAX;
 /* First approximation of x+1 > sqrt(N)  allones, half as many bits:
 * 1xxxxx > 111 (six bits to three)
 * 01xxxx > 111
 * 001xxx > 011
 * 0001xx > 011 and so on.
 */
 // It is actually not performancecritical at all.
 // Can simply start Newton loop with very conservative x=0xffffffff.
 //wide_t mask_2bits;
 //mask_2bits = TOPMOST_WIDE_BIT  (TOPMOST_WIDE_BIT >> 1);
 //while (!(N & mask_2bits)) {
 // x >>= 1;
 // mask_2bits >>= 2;
 //}
 dbg("x:%"HALF_FMT"x", x);

 for (;;) {
 half_t y = (x + N/x) / 2;
 dbg("y:%x y^2:%llx", y, (wide_t)y * y);
 /*
 * "real" y may be one bit wider: 0x100000000 and get truncated to 0.
 * In this case, "real" y is > x. The first check below is for this case:
 */
 if (y == 0  y >= x) {
 dbg("isqrt(%llx)=%"HALF_FMT"x", N, x);
 return x;
 }
 x = y;
 }
+ shift = WIDE_BITS  2;
+ x = 0;
+ do {
+ x = (x << 1) + 1;
+ if ((wide_t)x * x > (N >> shift))
+ x; /* whoops, that +1 was too much */
+ shift = 2;
+ } while ((int)shift >= 0);
+ return x;
}
static NOINLINE half_t isqrt_odd(wide_t N)
More information about the busyboxcvs
mailing list