[git commit] libbb/bitops.c: add inlining comment

Denys Vlasenko vda.linux at googlemail.com
Wed Jul 9 06:21:47 UTC 2025


commit: https://git.busybox.net/busybox/commit/?id=11d4c08d7541408e4fbb7daaaf63aba1d07685ea
branch: https://git.busybox.net/busybox/commit/?id=refs/heads/master

Signed-off-by: Denys Vlasenko <vda.linux at googlemail.com>
---
 libbb/bitops.c | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/libbb/bitops.c b/libbb/bitops.c
index 5f239676c..467e1a2d9 100644
--- a/libbb/bitops.c
+++ b/libbb/bitops.c
@@ -58,6 +58,26 @@ void FAST_FUNC xorbuf16_aligned_long(void *dst, const void *src)
 # endif
 #endif
 }
+// The above can be inlined in libbb.h, in a way where compiler
+// is even free to use better addressing modes than (%reg), and
+// to keep the result in a register
+// (to not store it to memory after each XOR):
+//#if defined(__SSE__)
+//#include <xmmintrin.h>
+//^^^ or just: typedef float __m128_u attribute((__vector_size__(16),__may_alias__,__aligned__(1)));
+//static ALWAYS_INLINE void xorbuf16_aligned_long(void *dst, const void *src)
+//{
+//	__m128_u xmm0, xmm1;
+//	asm volatile(
+//"\n		xorps	%1,%0"
+//		: "=x" (xmm0), "=x" (xmm1)
+//		: "0" (*(__m128_u*)dst), "1" (*(__m128_u*)src)
+//	);
+//	*(__m128_u*)dst = xmm0; // this store may be optimized out!
+//}
+//#endif
+// but I don't trust gcc optimizer enough to not generate some monstrosity.
+// See GMULT() function in TLS code as an example.
 
 void FAST_FUNC xorbuf64_3_aligned64(void *dst, const void *src1, const void *src2)
 {


More information about the busybox-cvs mailing list