[git commit prelink 1/1] libm_sh: add optimised assembly implementation of lroundf and lrintf
Christian Bruel
christian.bruel at st.com
Mon Jan 3 13:45:09 UTC 2011
commit: http://git.uclibc.org/uClibc/commit/?id=6ac247452e646c2187f2f559143c8c087b0542e0
branch: http://git.uclibc.org/uClibc/commit/?id=refs/heads/prelink
* libc/sysdeps/linux/sh/sysdep.h: Add LOCAL macro
* libm/sh/sh4/Makefile.arch: Include asm source in the build
* libm/sh/sh4/s_lrintf.S [NEW]: optimised asm lrintf
* libm/sh/sh4/s_lroundf.S [NEW]: optimised asm lroundf
Signed-off-by: Christian Bruel <christian.bruel at st.com>
Signed-off-by: Carmelo Amoroso <carmelo.amoroso at st.com>
---
libc/sysdeps/linux/sh/sysdep.h | 1 +
libm/sh/sh4/Makefile.arch | 8 ++++--
libm/sh/sh4/s_lrintf.S | 52 ++++++++++++++++++++++++++++++++++++++++
libm/sh/sh4/s_lroundf.S | 39 ++++++++++++++++++++++++++++++
4 files changed, 97 insertions(+), 3 deletions(-)
create mode 100644 libm/sh/sh4/s_lrintf.S
create mode 100644 libm/sh/sh4/s_lroundf.S
diff --git a/libc/sysdeps/linux/sh/sysdep.h b/libc/sysdeps/linux/sh/sysdep.h
index 2ef0a33..8b3c682 100644
--- a/libc/sysdeps/linux/sh/sysdep.h
+++ b/libc/sysdeps/linux/sh/sysdep.h
@@ -26,6 +26,7 @@
/* Syntactic details of assembler. */
+#define LOCAL(X) .L_##X
#define ALIGNARG(log2) log2
/* For ELF we need the `.type' directive to make shared libs work right. */
#define ASM_TYPE_DIRECTIVE(name,typearg) .type name,@##typearg;
diff --git a/libm/sh/sh4/Makefile.arch b/libm/sh/sh4/Makefile.arch
index 122d84d..e38e99c 100644
--- a/libm/sh/sh4/Makefile.arch
+++ b/libm/sh/sh4/Makefile.arch
@@ -7,11 +7,13 @@
#
ifeq ($(UCLIBC_HAS_FENV),y)
-libm_ARCH_SRC:=$(wildcard $(libm_SUBARCH_DIR)/*.c)
-libm_ARCH_OBJ:=$(patsubst $(libm_SUBARCH_DIR)/%.c,$(libm_SUBARCH_OUT)/%.o,$(libm_ARCH_SRC))
+libm_ARCH_CSRC:=$(wildcard $(libm_SUBARCH_DIR)/*.c)
+libm_ARCH_COBJ:=$(patsubst $(libm_SUBARCH_DIR)/%.c,$(libm_SUBARCH_OUT)/%.o,$(libm_ARCH_SRC))
+libm_ARCH_SSRC:=$(wildcard $(libm_SUBARCH_DIR)/*.S)
+libm_ARCH_SOBJ:=$(patsubst $(libm_SUBARCH_DIR)/%.S,$(libm_SUBARCH_OUT)/%.o,$(libm_ARCH_SSRC))
endif
-libm_ARCH_OBJS:=$(libm_ARCH_OBJ)
+libm_ARCH_OBJS:=$(libm_ARCH_COBJ) $(libm_ARCH_SOBJ)
ifeq ($(DOPIC),y)
libm-a-y+=$(libm_ARCH_OBJS:.o=.os)
diff --git a/libm/sh/sh4/s_lrintf.S b/libm/sh/sh4/s_lrintf.S
new file mode 100644
index 0000000..d8cec32
--- /dev/null
+++ b/libm/sh/sh4/s_lrintf.S
@@ -0,0 +1,52 @@
+/* Round argument to nearest integer value. SH4 version.
+ * According to ISO/IEC 9899:1999. This version doesn't handle range error.
+ * If arg is not finite or if the result cannot be represented into a long,
+ * return an unspecified value. No exception raised.
+ *
+ * Copyright (C) 2010 STMicroelectronics Ltd.
+ *
+ * Author: Christian Bruel <christian.bruel at st.com>
+ *
+ * Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball.
+ */
+
+#include <sysdep.h>
+
+ENTRY(lrintf)
+ mov #0,r0
+ sts fpscr,r3
+ lds r0,fpscr
+ flds fr5,fpul
+ mov.l LOCAL(mask),r1
+ sts fpul,r2
+ and r2,r1
+ mov.l LOCAL(midway),r2
+ or r1,r2
+ lds r2,fpul
+ fsts fpul,fr2
+ fadd fr2,fr5
+ ftrc fr5,fpul
+ sts fpul,r0
+ float fpul,fr2
+ fcmp/eq fr5,fr2
+ bf/s 0f
+ mov #1,r2
+ tst r1,r1
+ and r0,r2
+ movt r1
+ shal r1
+ tst r2,r2
+ add #-1,r1
+ bt 0f
+ sub r1,r0
+0:
+ rts
+ lds r3,fpscr
+
+ .align 2
+LOCAL(mask):
+ .long 0x80000000
+LOCAL(midway):
+ .long 1056964608
+
+END(lrintf)
diff --git a/libm/sh/sh4/s_lroundf.S b/libm/sh/sh4/s_lroundf.S
new file mode 100644
index 0000000..fda3a4b
--- /dev/null
+++ b/libm/sh/sh4/s_lroundf.S
@@ -0,0 +1,39 @@
+/* Round argument toward 0. SH4 version.
+ * According to ISO/IEC 9899:1999. This version doesn't handle range error.
+ * If arg is not finite or if the result cannot be represented into a long,
+ * return an unspecified value. No exception raised.
+ *
+ * Copyright (C) 2010 STMicroelectronics Ltd.
+ *
+ * Author: Christian Bruel <christian.bruel at st.com>
+ *
+ * Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball.
+ */
+
+#include <sysdep.h>
+
+ENTRY(lroundf)
+ mov #0,r0
+ sts fpscr,r3
+ lds r0,fpscr
+ flds fr5,fpul
+ mov.l LOCAL(mask),r1
+ sts fpul,r2
+ and r2,r1
+ mov.l LOCAL(midway),r2
+ or r1,r2
+ lds r2,fpul
+ fsts fpul,fr2
+ fadd fr2,fr5
+ ftrc fr5,fpul
+ sts fpul,r0
+ rts
+ lds r3,fpscr
+
+ .align 2
+LOCAL(mask):
+ .long 0x80000000
+LOCAL(midway):
+ .long 1056964608
+
+END(lroundf)
--
1.7.2.2
More information about the uClibc-cvs
mailing list