[PATCH 3/6] ldso/: tls support for dynamic linker

Austin Foxley austinf at cetoncorp.com
Sun Sep 20 19:29:21 UTC 2009


Signed-off-by: Austin Foxley <austinf at cetoncorp.com>
---
 ldso/include/dl-hash.h        |   61 ++-
 ldso/include/ldso.h           |    6 +
 ldso/include/ldsodefs.h       |  147 ++++++
 ldso/ldso/Makefile.in         |   11 +
 ldso/ldso/arm/aeabi_read_tp.S |   64 +++
 ldso/ldso/arm/dl-debug.h      |    4 +-
 ldso/ldso/arm/dl-sysdep.h     |   21 +-
 ldso/ldso/arm/elfinterp.c     |   52 ++-
 ldso/ldso/arm/resolve.S       |    4 +
 ldso/ldso/arm/thumb_atomics.S |   79 +++
 ldso/ldso/dl-elf.c            |   67 +++
 ldso/ldso/dl-hash.c           |   87 ++--
 ldso/ldso/dl-startup.c        |   14 +
 ldso/ldso/dl-tls.c            | 1045 +++++++++++++++++++++++++++++++++++++++++
 ldso/ldso/i386/elfinterp.c    |    4 +-
 ldso/ldso/ldso.c              |  158 ++++++-
 ldso/ldso/mips/elfinterp.c    |   71 +++-
 ldso/ldso/sh/dl-debug.h       |    2 +
 ldso/ldso/sh/dl-sysdep.h      |    9 +
 ldso/ldso/sh/elfinterp.c      |   39 ++-
 ldso/ldso/sparc/dl-sysdep.h   |    4 +-
 ldso/ldso/sparc/elfinterp.c   |   75 ++--
 ldso/libdl/libdl.c            |  325 +++++++++++++-
 23 files changed, 2204 insertions(+), 145 deletions(-)
 create mode 100644 ldso/include/ldsodefs.h
 create mode 100644 ldso/ldso/arm/aeabi_read_tp.S
 create mode 100644 ldso/ldso/arm/thumb_atomics.S
 create mode 100644 ldso/ldso/dl-tls.c

diff --git a/ldso/include/dl-hash.h b/ldso/include/dl-hash.h
index e7ca4ab..1b28a34 100644
--- a/ldso/include/dl-hash.h
+++ b/ldso/include/dl-hash.h
@@ -34,7 +34,32 @@ struct elf_resolve {
   struct elf_resolve * next;
   struct elf_resolve * prev;
   /* Nothing after this address is used by gdb. */
-  ElfW(Addr) mapaddr;    /* Address at which ELF segments (either main app and DSO) are mapped into */
+
+#if USE_TLS
+  /* Thread-local storage related info.  */
+
+  /* Start of the initialization image.  */
+  void *l_tls_initimage;
+  /* Size of the initialization image.  */
+  size_t l_tls_initimage_size;
+  /* Size of the TLS block.  */
+  size_t l_tls_blocksize;
+  /* Alignment requirement of the TLS block.  */
+  size_t l_tls_align;
+  /* Offset of first byte module alignment.  */
+  size_t l_tls_firstbyte_offset;
+# ifndef NO_TLS_OFFSET
+#  define NO_TLS_OFFSET	0
+# endif
+  /* For objects present at startup time: offset in the static TLS block.  */
+  ptrdiff_t l_tls_offset;
+  /* Index of the module in the dtv array.  */
+  size_t l_tls_modid;
+  /* Nonzero if _dl_init_static_tls should be called for this module */
+  unsigned int l_need_tls_init:1;
+#endif
+
+  ElfW(Addr) mapaddr;
   enum {elf_lib, elf_executable,program_interpreter, loaded_file} libtype;
   struct dyn_elf * symbol_scope;
   unsigned short usage_count;
@@ -106,26 +131,31 @@ struct elf_resolve {
 
 extern struct dyn_elf     * _dl_symbol_tables;
 extern struct elf_resolve * _dl_loaded_modules;
-extern struct dyn_elf     * _dl_handles;
+extern struct dyn_elf 	  * _dl_handles;
 
 extern struct elf_resolve * _dl_add_elf_hash_table(const char * libname,
 	DL_LOADADDR_TYPE loadaddr, unsigned long * dynamic_info,
 	unsigned long dynamic_addr, unsigned long dynamic_size);
 
-extern char * _dl_lookup_hash(const char * name, struct dyn_elf * rpnt,
-			      struct elf_resolve *mytpnt, int type_class
-#ifdef __FDPIC__
-			      , struct elf_resolve **tpntp
+#if USE_TLS || defined __FDPIC__
+#define _DL_LOOKUP_HASH_NEEDS_EXTRA_TPNT
+#define _DL_LOOKUP_HASH_EXTRA_TPNT 	,struct elf_resolve **tpntp
+#else
+#undef _DL_LOOKUP_HASH_NEEDS_EXTRA_TPNT
+#define _DL_LOOKUP_HASH_EXTRA_TPNT
 #endif
-			      );
 
+extern char * _dl_lookup_hash(const char * name, struct dyn_elf * rpnt, 
+			    struct elf_resolve *mytpnt, int type_class _DL_LOOKUP_HASH_EXTRA_TPNT);
+				
 static __always_inline char *_dl_find_hash(const char *name, struct dyn_elf *rpnt,
-					   struct elf_resolve *mytpnt, int type_class)
+					struct elf_resolve *mytpnt, int type_class,
+					struct elf_resolve **tpntp)
 {
-#ifdef __FDPIC__
-	return _dl_lookup_hash(name, rpnt, mytpnt, type_class, NULL);
+#ifdef _DL_LOOKUP_HASH_NEEDS_EXTRA_TPNT
+        return _dl_lookup_hash(name, rpnt, mytpnt, type_class, tpntp);
 #else
-	return _dl_lookup_hash(name, rpnt, mytpnt, type_class);
+        return _dl_lookup_hash(name, rpnt, mytpnt, type_class);
 #endif
 }
 
@@ -148,8 +178,11 @@ static __inline__ int _dl_symbol(char * name)
 #define LD_ERROR_NOTDYN 5
 #define LD_ERROR_MMAP_FAILED 6
 #define LD_ERROR_NODYNAMIC 7
-#define LD_WRONG_RELOCS 8
-#define LD_BAD_HANDLE 9
-#define LD_NO_SYMBOL 10
+#define LD_ERROR_TLS_FAILED 8
+#define LD_WRONG_RELOCS 9
+#define LD_BAD_HANDLE 10
+#define LD_NO_SYMBOL 11
+
+
 
 #endif /* _LD_HASH_H_ */
diff --git a/ldso/include/ldso.h b/ldso/include/ldso.h
index dc4d92d..1dd35fe 100644
--- a/ldso/include/ldso.h
+++ b/ldso/include/ldso.h
@@ -38,6 +38,10 @@
 #include <dl-string.h>
 /* Now the ldso specific headers */
 #include <dl-elf.h>
+#ifdef __UCLIBC_HAS_TLS__
+/* Defines USE_TLS */
+#include <tls.h>
+#endif
 #include <dl-hash.h>
 
 /* common align masks, if not specified by sysdep headers */
@@ -113,6 +117,8 @@ extern int   _dl_debug_file;
 #endif
 
 extern void *_dl_malloc(size_t size);
+extern void * _dl_calloc(size_t __nmemb, size_t __size);
+extern void * _dl_realloc(void * __ptr, size_t __size);
 extern void _dl_free(void *);
 extern char *_dl_getenv(const char *symbol, char **envp);
 extern void _dl_unsetenv(const char *symbol, char **envp);
diff --git a/ldso/include/ldsodefs.h b/ldso/include/ldsodefs.h
new file mode 100644
index 0000000..d9fe5bb
--- /dev/null
+++ b/ldso/include/ldsodefs.h
@@ -0,0 +1,147 @@
+#ifndef _LDSODEFS_H
+#define _LDSODEFS_H     1
+
+#include <bits/kernel-features.h>
+
+#include <features.h>
+#include <tls.h>
+
+#ifdef __mips__
+/* The MIPS ABI specifies that the dynamic section has to be read-only.  */
+
+#define DL_RO_DYN_SECTION 1
+
+/* TODO: Import in 64-bit relocations from glibc. */
+#endif
+
+#ifndef SHARED
+# define EXTERN extern
+#else
+# ifdef IS_IN_rtld
+#  define EXTERN
+# else
+#  define EXTERN extern
+# endif
+#endif
+
+/* Non-shared code has no support for multiple namespaces.  */
+#ifdef SHARED
+# define DL_NNS 16
+#else
+# define DL_NNS 1
+#endif
+
+#define GL(x) _##x
+#define GLRO(x) _##x
+
+/* Variable pointing to the end of the stack (or close to it).  This value
+   must be constant over the runtime of the application.  Some programs
+   might use the variable which results in copy relocations on some
+   platforms.  But this does not matter, ld.so can always use the local
+   copy.  */
+extern void *__libc_stack_end;
+
+/* Determine next available module ID.  */
+extern size_t _dl_next_tls_modid (void) internal_function attribute_hidden;
+
+/* Calculate offset of the TLS blocks in the static TLS block.  */
+extern void _dl_determine_tlsoffset (void) internal_function attribute_hidden;
+
+/* Set up the data structures for TLS, when they were not set up at startup.
+   Returns nonzero on malloc failure.
+   This is called from _dl_map_object_from_fd or by libpthread.  */
+extern int _dl_tls_setup (void) internal_function;
+rtld_hidden_proto (_dl_tls_setup)
+
+/* Allocate memory for static TLS block (unless MEM is nonzero) and dtv.  */
+extern void *_dl_allocate_tls (void *mem) internal_function;
+
+/* Get size and alignment requirements of the static TLS block.  */
+extern void _dl_get_tls_static_info (size_t *sizep, size_t *alignp)
+     internal_function;
+
+extern void _dl_allocate_static_tls (struct link_map *map)
+     internal_function attribute_hidden;
+
+/* Taken from glibc/elf/dl-reloc.c */
+#define CHECK_STATIC_TLS(sym_map)											\
+	do {																	\
+		if (__builtin_expect ((sym_map)->l_tls_offset == NO_TLS_OFFSET, 0))	\
+			_dl_allocate_static_tls (sym_map);								\
+	} while (0)
+
+/* These are internal entry points to the two halves of _dl_allocate_tls,
+   only used within rtld.c itself at startup time.  */
+extern void *_dl_allocate_tls_storage (void)
+     internal_function attribute_hidden;
+extern void *_dl_allocate_tls_init (void *) internal_function;
+
+/* Deallocate memory allocated with _dl_allocate_tls.  */
+extern void _dl_deallocate_tls (void *tcb, bool dealloc_tcb) internal_function;
+
+extern void _dl_nothread_init_static_tls (struct link_map *) attribute_hidden;
+
+/* Highest dtv index currently needed.  */
+EXTERN size_t _dl_tls_max_dtv_idx;
+/* Flag signalling whether there are gaps in the module ID allocation.  */
+EXTERN bool _dl_tls_dtv_gaps;
+/* Information about the dtv slots.  */
+EXTERN struct dtv_slotinfo_list
+{
+  size_t len;
+  struct dtv_slotinfo_list *next;
+  struct dtv_slotinfo
+  {
+    size_t gen;
+    bool is_static;
+    struct link_map *map;
+  } slotinfo[0];
+} *_dl_tls_dtv_slotinfo_list;
+/* Number of modules in the static TLS block.  */
+EXTERN size_t _dl_tls_static_nelem;
+/* Size of the static TLS block.  */
+EXTERN size_t _dl_tls_static_size;
+/* Size actually allocated in the static TLS block.  */
+EXTERN size_t _dl_tls_static_used;
+/* Alignment requirement of the static TLS block.  */
+EXTERN size_t _dl_tls_static_align;
+/* Function pointer for catching TLS errors.  */
+EXTERN void **(*_dl_error_catch_tsd) (void) __attribute__ ((const));
+
+/* Number of additional entries in the slotinfo array of each slotinfo
+   list element.  A large number makes it almost certain take we never
+   have to iterate beyond the first element in the slotinfo list.  */
+# define TLS_SLOTINFO_SURPLUS (62)
+
+/* Number of additional slots in the dtv allocated.  */
+# define DTV_SURPLUS	(14)
+
+  /* Initial dtv of the main thread, not allocated with normal malloc.  */
+  EXTERN void *_dl_initial_dtv;
+  /* Generation counter for the dtv.  */
+  EXTERN size_t _dl_tls_generation;
+
+  EXTERN void (*_dl_init_static_tls) (struct link_map *);
+
+/* We have the auxiliary vector.  */
+#define HAVE_AUX_VECTOR
+
+/* We can assume that the kernel always provides the AT_UID, AT_EUID,
+   AT_GID, and AT_EGID values in the auxiliary vector from 2.4.0 or so on.  */
+#if __ASSUME_AT_XID
+# define HAVE_AUX_XID
+#endif
+
+/* We can assume that the kernel always provides the AT_SECURE value
+   in the auxiliary vector from 2.5.74 or so on.  */
+#if __ASSUME_AT_SECURE
+# define HAVE_AUX_SECURE
+#endif
+
+/* Starting with one of the 2.4.0 pre-releases the Linux kernel passes
+   up the page size information.  */
+#if __ASSUME_AT_PAGESIZE
+# define HAVE_AUX_PAGESIZE
+#endif
+
+#endif
diff --git a/ldso/ldso/Makefile.in b/ldso/ldso/Makefile.in
index a74c36e..350cc81 100644
--- a/ldso/ldso/Makefile.in
+++ b/ldso/ldso/Makefile.in
@@ -15,6 +15,17 @@ CFLAGS-ldso += -fno-omit-frame-pointer
 CFLAGS-ldso += -I$(top_srcdir)ldso/ldso/$(TARGET_ARCH) -I$(top_srcdir)ldso/include -I$(top_srcdir)ldso/ldso
 CFLAGS-ldso += -DUCLIBC_RUNTIME_PREFIX=\"$(RUNTIME_PREFIX)\" -DUCLIBC_LDSO=\"$(UCLIBC_LDSO)\"
 
+ifeq ($(DODEBUG),y)
+# Not really much point in including debugging info, since gdb
+# can't really debug ldso, since gdb requires help from ldso to
+# debug things....
+# On arm, gcc-4.3.x onwards -Os emits calls to libgcc, which calls _div0,
+# which tries to call raise(). And raise comes from libc so a catch 22.
+# Using -O2 instead. We could have use -fno-early-inlining with -Os too.
+
+CFLAGS-ldso += -O2 -g
+endif
+
 CFLAGS-ldso/ldso/$(TARGET_ARCH)/ := $(CFLAGS-ldso)
 
 CFLAGS-ldso.c := -DLDSO_ELFINTERP=\"$(TARGET_ARCH)/elfinterp.c\" $(CFLAGS-ldso)
diff --git a/ldso/ldso/arm/aeabi_read_tp.S b/ldso/ldso/arm/aeabi_read_tp.S
new file mode 100644
index 0000000..f81bae6
--- /dev/null
+++ b/ldso/ldso/arm/aeabi_read_tp.S
@@ -0,0 +1,64 @@
+/* Copyright (C) 2005 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   In addition to the permissions in the GNU Lesser General Public
+   License, the Free Software Foundation gives you unlimited
+   permission to link the compiled version of this file with other
+   programs, and to distribute those programs without any restriction
+   coming from the use of this file. (The GNU Lesser General Public
+   License restrictions do apply in other respects; for example, they
+   cover modification of the file, and distribution when not linked
+   into another program.)
+
+   Note that people who make modified versions of this file are not
+   obligated to grant this special exception for their modified
+   versions; it is their choice whether to do so. The GNU Lesser
+   General Public License gives permission to release a modified
+   version without this exception; this exception also makes it
+   possible to release a modified version which carries forward this
+   exception.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <features.h>
+
+#ifdef __UCLIBC_HAS_THREADS_NATIVE__
+
+#include <sysdep.h>
+#include <tls.h>
+
+/* GCC will emit calls to this routine under -mtp=soft.  Linux has an
+   equivalent helper function (which clobbers fewer registers than
+   a normal function call) in a high page of memory; tail call to the
+   helper.
+
+   This function is exported from libc for use by user code.  libpthread, librt,
+   and the dynamic linker get their own private copies, for
+   performance (and in the case of ld.so, out of necessity); those are
+   all hidden.  */
+
+#ifndef NOT_IN_libc
+	.global __aeabi_read_tp
+#else
+        .hidden __aeabi_read_tp
+#endif
+ENTRY (__aeabi_read_tp)
+	mov	r0, #0xffff0fff
+	sub	pc, r0, #31
+END (__aeabi_read_tp)
+
+#endif /* __UCLIBC_HAS_THREADS_NATIVE__ */
+
diff --git a/ldso/ldso/arm/dl-debug.h b/ldso/ldso/arm/dl-debug.h
index d510320..1bca6ff 100644
--- a/ldso/ldso/arm/dl-debug.h
+++ b/ldso/ldso/arm/dl-debug.h
@@ -33,12 +33,14 @@ static const char *_dl_reltypes_tab[] =
   [4]	"R_ARM_PC13",	    "R_ARM_ABS16",	"R_ARM_ABS12",		"R_ARM_THM_ABS5",
   [8]	"R_ARM_ABS8",		"R_ARM_SBREL32","R_ARM_THM_PC22",	"R_ARM_THM_PC8",
   [12]	"R_ARM_AMP_VCALL9",	"R_ARM_SWI24",	"R_ARM_THM_SWI8",	"R_ARM_XPC25",
-  [16]	"R_ARM_THM_XPC22",
+  [16]	"R_ARM_THM_XPC22",  "R_ARM_TLS_DTPMOD32",   "R_ARM_TLS_DTPOFF32",   "R_ARM_TLS_TPOFF32",
   [20]	"R_ARM_COPY",		"R_ARM_GLOB_DAT","R_ARM_JUMP_SLOT",	"R_ARM_RELATIVE",
   [24]	"R_ARM_GOTOFF",		"R_ARM_GOTPC",	 "R_ARM_GOT32",		"R_ARM_PLT32",
   [32]	"R_ARM_ALU_PCREL_7_0","R_ARM_ALU_PCREL_15_8","R_ARM_ALU_PCREL_23_15","R_ARM_LDR_SBREL_11_0",
   [36]	"R_ARM_ALU_SBREL_19_12","R_ARM_ALU_SBREL_27_20",
   [100]	"R_ARM_GNU_VTENTRY","R_ARM_GNU_VTINHERIT","R_ARM_THM_PC11","R_ARM_THM_PC9",
+  [104] "R_ARM_TLS_GD32","R_ARM_TLS_LDM32","R_ARM_TLS_LDO32","R_ARM_TLS_IE32",
+  [108] "R_ARM_TLS_LE32","R_ARM_TLS_LDO12","R_ARM_TLS_LE12","R_ARM_TLS_IE12GP",
   [249] "R_ARM_RXPC25", "R_ARM_RSBREL32", "R_ARM_THM_RPC22", "R_ARM_RREL32",
   [253] "R_ARM_RABS22", "R_ARM_RPC24", "R_ARM_RBASE",
 };
diff --git a/ldso/ldso/arm/dl-sysdep.h b/ldso/ldso/arm/dl-sysdep.h
index 75c58b0..5a2912a 100644
--- a/ldso/ldso/arm/dl-sysdep.h
+++ b/ldso/ldso/arm/dl-sysdep.h
@@ -5,6 +5,9 @@
  * Copyright (C) 2000-2004 by Erik Andersen <andersen at codepoet.org>
  */
 
+#ifndef _ARCH_DL_SYSDEP
+#define _ARCH_DL_SYSDEP
+
 /* Define this if the system uses RELOCA.  */
 #undef ELF_USES_RELOCA
 #include <elf.h>
@@ -55,12 +58,21 @@ static __always_inline unsigned long arm_modulus(unsigned long m, unsigned long
 struct elf_resolve;
 unsigned long _dl_linux_resolver(struct elf_resolve * tpnt, int reloc_entry);
 
-/* ELF_RTYPE_CLASS_PLT iff TYPE describes relocation of a PLT entry, so
-   PLT entries should not be allowed to define the value.
+/* 4096 bytes alignment */
+#define PAGE_ALIGN 0xfffff000
+#define ADDR_ALIGN 0xfff
+#define OFFS_ALIGN 0x7ffff000
+
+/* ELF_RTYPE_CLASS_PLT iff TYPE describes relocation of a PLT entry or
+   TLS variable, so undefined references should not be allowed to
+   define the value.
+
    ELF_RTYPE_CLASS_NOCOPY iff TYPE should not be allowed to resolve to one
    of the main executable's symbols, as for a COPY reloc.  */
-#define elf_machine_type_class(type) \
-  ((((type) == R_ARM_JUMP_SLOT) * ELF_RTYPE_CLASS_PLT)	\
+#define elf_machine_type_class(type)									\
+  ((((type) == R_ARM_JUMP_SLOT || (type) == R_ARM_TLS_DTPMOD32			\
+     || (type) == R_ARM_TLS_DTPOFF32 || (type) == R_ARM_TLS_TPOFF32)	\
+    * ELF_RTYPE_CLASS_PLT)												\
    | (((type) == R_ARM_COPY) * ELF_RTYPE_CLASS_COPY))
 
 /* Return the link-time address of _DYNAMIC.  Conveniently, this is the
@@ -136,6 +148,7 @@ elf_machine_relative (Elf32_Addr load_off, const Elf32_Addr rel_addr,
 		*reloc_addr += load_off;
 	} while (--relative_count);
 }
+#endif /* !_ARCH_DL_SYSDEP */
 
 #ifdef __ARM_EABI__
 #define DL_MALLOC_ALIGN 8	/* EABI needs 8 byte alignment for STRD LDRD */
diff --git a/ldso/ldso/arm/elfinterp.c b/ldso/ldso/arm/elfinterp.c
index 197975e..1469df0 100644
--- a/ldso/ldso/arm/elfinterp.c
+++ b/ldso/ldso/arm/elfinterp.c
@@ -50,7 +50,7 @@ unsigned long _dl_linux_resolver(struct elf_resolve *tpnt, int reloc_entry)
 	Elf32_Sym *symtab;
 	ELF_RELOC *rel_addr;
 	int symtab_index;
-	char *new_addr;
+	unsigned long new_addr;
 	char **got_addr;
 	unsigned long instr_addr;
 
@@ -70,7 +70,7 @@ unsigned long _dl_linux_resolver(struct elf_resolve *tpnt, int reloc_entry)
 
 	/* Get the address of the GOT entry */
 	new_addr = _dl_find_hash(symname, tpnt->symbol_scope,
-				 tpnt, ELF_RTYPE_CLASS_PLT);
+				 tpnt, ELF_RTYPE_CLASS_PLT, NULL);
 	if (unlikely(!new_addr)) {
 		_dl_dprintf(2, "%s: can't resolve symbol '%s'\n",
 			_dl_progname, symname);
@@ -89,13 +89,13 @@ unsigned long _dl_linux_resolver(struct elf_resolve *tpnt, int reloc_entry)
 		}
 	}
 	if (!_dl_debug_nofixups) {
-		*got_addr = new_addr;
+		*got_addr = (char*)new_addr;
 	}
 #else
-	*got_addr = new_addr;
+	*got_addr = (char*)new_addr;
 #endif
 
-	return (unsigned long) new_addr;
+	return new_addr;
 }
 
 static int
@@ -188,28 +188,40 @@ _dl_do_reloc (struct elf_resolve *tpnt,struct dyn_elf *scope,
 	int symtab_index;
 	unsigned long *reloc_addr;
 	unsigned long symbol_addr;
+	const Elf32_Sym *def = 0;
+	struct elf_resolve *def_mod = 0;
 	int goof = 0;
 
-	reloc_addr = (unsigned long *) (tpnt->loadaddr + (unsigned long) rpnt->r_offset);
+	reloc_addr = (unsigned long *) (tpnt->loadaddr
+                                    + (unsigned long) rpnt->r_offset);
+
 	reloc_type = ELF32_R_TYPE(rpnt->r_info);
 	symtab_index = ELF32_R_SYM(rpnt->r_info);
 	symbol_addr = 0;
 
 	if (symtab_index) {
-
-		symbol_addr = (unsigned long) _dl_find_hash(strtab + symtab[symtab_index].st_name,
-				scope, tpnt, elf_machine_type_class(reloc_type));
+		symbol_addr = _dl_find_hash(strtab + symtab[symtab_index].st_name,
+                                scope, tpnt,
+                                elf_machine_type_class(reloc_type),
+                                &def_mod);
 
 		/*
 		 * We want to allow undefined references to weak symbols - this might
 		 * have been intentional.  We should not be linking local symbols
 		 * here, so all bases should be covered.
 		 */
-		if (!symbol_addr && ELF32_ST_BIND(symtab[symtab_index].st_info) != STB_WEAK) {
-			_dl_dprintf (2, "%s: can't resolve symbol '%s'\n",
-				     _dl_progname, strtab + symtab[symtab_index].st_name);
-			_dl_exit (1);
+		if (!symbol_addr && (ELF_ST_TYPE(symtab[symtab_index].st_info) != STT_TLS)
+			&& (ELF32_ST_BIND(symtab[symtab_index].st_info) != STB_WEAK)) {
+			/* This may be non-fatal if called from dlopen.  */
+			return 1;
+
 		}
+	} else {
+        /* Relocs against STN_UNDEF are usually treated as using a
+           symbol value of zero, and using the module containing the
+           reloc itself.  */
+		symbol_addr = symtab[symtab_index].st_value;
+		def_mod = tpnt;
 	}
 
 #if defined (__SUPPORT_LD_DEBUG__)
@@ -265,6 +277,20 @@ _dl_do_reloc (struct elf_resolve *tpnt,struct dyn_elf *scope,
 				_dl_memcpy((void *) reloc_addr,
 					   (void *) symbol_addr, symtab[symtab_index].st_size);
 				break;
+#if USE_TLS
+			case R_ARM_TLS_DTPMOD32:
+				*reloc_addr = def_mod->l_tls_modid;
+				break;
+
+			case R_ARM_TLS_DTPOFF32:
+				*reloc_addr += symbol_addr;
+				break;
+
+			case R_ARM_TLS_TPOFF32:
+				CHECK_STATIC_TLS ((struct link_map *) def_mod);
+				*reloc_addr += (symbol_addr + def_mod->l_tls_offset);
+				break;
+#endif
 			default:
 				return -1; /*call _dl_exit(1) */
 		}
diff --git a/ldso/ldso/arm/resolve.S b/ldso/ldso/arm/resolve.S
index b422c33..08889d0 100644
--- a/ldso/ldso/arm/resolve.S
+++ b/ldso/ldso/arm/resolve.S
@@ -95,6 +95,10 @@
 
 #include <features.h>
 
+#define sl r10
+#define fp r11
+#define ip r12
+
  .text
  .align 4      @ 16 byte boundary and there are 32 bytes below (arm case)
  #if !defined(__thumb__) || defined(__thumb2__)
diff --git a/ldso/ldso/arm/thumb_atomics.S b/ldso/ldso/arm/thumb_atomics.S
new file mode 100644
index 0000000..f6ae3db
--- /dev/null
+++ b/ldso/ldso/arm/thumb_atomics.S
@@ -0,0 +1,79 @@
+/* Copyright (C) 2006 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   In addition to the permissions in the GNU Lesser General Public
+   License, the Free Software Foundation gives you unlimited
+   permission to link the compiled version of this file with other
+   programs, and to distribute those programs without any restriction
+   coming from the use of this file. (The GNU Lesser General Public
+   License restrictions do apply in other respects; for example, they
+   cover modification of the file, and distribution when not linked
+   into another program.)
+
+   Note that people who make modified versions of this file are not
+   obligated to grant this special exception for their modified
+   versions; it is their choice whether to do so. The GNU Lesser
+   General Public License gives permission to release a modified
+   version without this exception; this exception also makes it
+   possible to release a modified version which carries forward this
+   exception.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <features.h>
+
+#ifdef __UCLIBC_HAS_THREADS_NATIVE__
+
+#include <sysdep.h>
+
+#if defined __thumb__
+
+/* Out-of-line atomic operations that we can't do in Thumb mode.
+   This ends up in various libraries where it is needed (and
+   a few .a archives where it isn't).  */
+
+	.hidden __thumb_swpb
+ENTRY (__thumb_swpb)
+	swpb	r0, r0, [r1]
+	bx	lr
+END (__thumb_swpb)
+
+	.hidden __thumb_swp
+ENTRY (__thumb_swp)
+	swp	r0, r0, [r1]
+	bx	lr
+END (__thumb_swp)
+
+	.hidden __thumb_cmpxchg
+ENTRY (__thumb_cmpxchg)
+	stmdb	sp!, {r4, lr}
+	mov	r4, r0
+0:	ldr	r3, [r2]
+	cmp	r3, r4
+	bne	1f
+	mov	r0, r4
+	mov	r3, #0xffff0fff
+	mov	lr, pc
+	add	pc, r3, #(0xffff0fc0 - 0xffff0fff)
+	bcc	0b
+	mov	r3, r4
+1:	mov	r0, r3
+	ldmia	sp!, {r4, pc}
+END (__thumb_cmpxchg)
+
+#endif /* __thumb__ */
+#endif /* __UCLIBC_HAS_THREADS_NATIVE__ */
+
diff --git a/ldso/ldso/dl-elf.c b/ldso/ldso/dl-elf.c
index 8970849..75e8f71 100644
--- a/ldso/ldso/dl-elf.c
+++ b/ldso/ldso/dl-elf.c
@@ -329,6 +329,9 @@ struct elf_resolve *_dl_load_elf_shared_library(int secure,
 	ElfW(Dyn) *dpnt;
 	struct elf_resolve *tpnt;
 	ElfW(Phdr) *ppnt;
+#if USE_TLS
+	ElfW(Phdr) *tlsppnt = NULL;
+#endif
 	char *status, *header;
 	unsigned long dynamic_info[DYNAMIC_SIZE];
 	unsigned long *lpnt;
@@ -433,6 +436,29 @@ struct elf_resolve *_dl_load_elf_shared_library(int secure,
 				maxvma = ppnt->p_vaddr + ppnt->p_memsz;
 			}
 		}
+		if (ppnt->p_type == PT_TLS)
+		{
+#if USE_TLS
+			if (ppnt->p_memsz == 0)
+				/* Nothing to do for an empty segment.  */
+				continue;
+			else
+				/* Save for after 'tpnt' is actually allocated. */
+				tlsppnt = ppnt;
+#else
+			/*
+			 * Yup, the user was an idiot and tried to sneak in a library with
+			 * TLS in it and we don't support it. Let's fall on our own sword
+			 * and scream at the luser while we die.
+			 */
+			_dl_dprintf(2, "%s: '%s' library contains unsupported TLS\n",
+				_dl_progname, libname);
+			_dl_internal_error_number = LD_ERROR_TLS_FAILED;
+			_dl_close(infile);
+			_dl_munmap(header, _dl_pagesize);
+			return NULL;
+#endif
+		}
 		ppnt++;
 	}
 
@@ -708,6 +734,37 @@ struct elf_resolve *_dl_load_elf_shared_library(int secure,
 	tpnt->ppnt = (ElfW(Phdr) *) DL_RELOC_ADDR(tpnt->loadaddr, epnt->e_phoff);
 	tpnt->n_phent = epnt->e_phnum;
 
+#if USE_TLS
+	if (tlsppnt)
+	{
+		_dl_debug_early("Found TLS header for %s\n", libname);
+#if NO_TLS_OFFSET != 0
+		tpnt->l_tls_offset = NO_TLS_OFFSET;
+#endif
+		tpnt->l_tls_blocksize = tlsppnt->p_memsz;
+		tpnt->l_tls_align = tlsppnt->p_align;
+		if (tlsppnt->p_align == 0)
+			tpnt->l_tls_firstbyte_offset = 0;
+		else
+			tpnt->l_tls_firstbyte_offset = tlsppnt->p_vaddr &
+				(tlsppnt->p_align - 1);
+		tpnt->l_tls_initimage_size = tlsppnt->p_filesz;
+		tpnt->l_tls_initimage = (void *) tlsppnt->p_vaddr;
+
+		/* Assign the next available module ID.  */
+		tpnt->l_tls_modid = _dl_next_tls_modid ();
+
+		/* We know the load address, so add it to the offset. */
+		if (tpnt->l_tls_initimage != NULL)
+		{
+			unsigned int tmp = (unsigned int) tpnt->l_tls_initimage;
+			tpnt->l_tls_initimage = (char *) tlsppnt->p_vaddr + tpnt->loadaddr;
+			_dl_debug_early("Relocated TLS initial image from %x to %x (size = %x)\n", tmp, tpnt->l_tls_initimage, tpnt->l_tls_initimage_size);
+			tmp = 0;
+		}
+	}
+#endif
+
 	/*
 	 * Add this object into the symbol chain
 	 */
@@ -816,6 +873,16 @@ int _dl_fixup(struct dyn_elf *rpnt, int now_flag)
 		}
 		tpnt->init_flag |= JMP_RELOCS_DONE;
 	}
+
+#if 0
+/* _dl_add_to_slotinfo is called by init_tls() for initial DSO 
+   or by dlopen() for dynamically loaded DSO. */
+#if USE_TLS
+	/* Add object to slot information data if necessasy. */
+	if (tpnt->l_tls_blocksize != 0 && tls_init_tp_called)
+		_dl_add_to_slotinfo ((struct link_map *) tpnt);
+#endif
+#endif
 	return goof;
 }
 
diff --git a/ldso/ldso/dl-hash.c b/ldso/ldso/dl-hash.c
index 4809c43..3103d9f 100644
--- a/ldso/ldso/dl-hash.c
+++ b/ldso/ldso/dl-hash.c
@@ -157,18 +157,29 @@ struct elf_resolve *_dl_add_elf_hash_table(const char *libname,
 static __attribute_noinline__ const ElfW(Sym) *
 check_match (const ElfW(Sym) *sym, char *strtab, const char* undef_name, int type_class)
 {
-	if (type_class & (sym->st_shndx == SHN_UNDEF))
-		/* undefined symbol itself */
-		return NULL;
 
-#ifdef __mips__
-    if (sym->st_shndx == SHN_UNDEF && !(sym->st_other & STO_MIPS_PLT))
-        return NULL;
-#endif
-
-	if (sym->st_value == 0)
-		/* No value */
-		return NULL;
+#if USE_TLS
+		if((sym->st_value == 0 && (ELF_ST_TYPE(sym->st_info) != STT_TLS))
+			      || (type_class & (sym->st_shndx == SHN_UNDEF)))
+			/* No value or undefined symbol itself */				  
+			return NULL;
+			
+		if(ELF_ST_TYPE(sym->st_info) > STT_FUNC 
+			&& ELF_ST_TYPE(sym->st_info) != STT_COMMON
+			&& ELF_ST_TYPE(sym->st_info) != STT_TLS)
+			/* Ignore all but STT_NOTYPE, STT_OBJECT, STT_FUNC and STT_COMMON
+			 * entries (and STT_TLS if TLS is supported) since these
+			 * are no code/data definitions.
+			 */
+			return NULL;
+#else			  
+		if (type_class & (sym->st_shndx == SHN_UNDEF))
+			/* undefined symbol itself */
+			return NULL;
+
+		if (sym->st_value == 0)
+			/* No value */
+			return NULL;
 
 	if (ELF_ST_TYPE(sym->st_info) > STT_FUNC
 		&& ELF_ST_TYPE(sym->st_info) != STT_COMMON)
@@ -177,7 +188,7 @@ check_match (const ElfW(Sym) *sym, char *strtab, const char* undef_name, int typ
 		 * code/data definitions
 		 */
 		return NULL;
-
+#endif
 	if (_dl_strcmp(strtab + sym->st_name, undef_name) != 0)
 		return NULL;
 
@@ -257,12 +268,11 @@ _dl_lookup_sysv_hash(struct elf_resolve *tpnt, ElfW(Sym) *symtab, unsigned long
  * This function resolves externals, and this is either called when we process
  * relocations or when we call an entry in the PLT table for the first time.
  */
-char *_dl_lookup_hash(const char *name, struct dyn_elf *rpnt,
-		      struct elf_resolve *mytpnt, int type_class
-#ifdef __FDPIC__
-		      , struct elf_resolve **tpntp
-#endif
-		      )
+char *_dl_lookup_hash(const char *name, struct dyn_elf *rpnt, struct elf_resolve *mytpnt, int type_class
+#if USE_TLS
+,struct elf_resolve **tls_tpnt
+#endif			    
+)
 {
 	struct elf_resolve *tpnt = NULL;
 	ElfW(Sym) *symtab;
@@ -270,8 +280,7 @@ char *_dl_lookup_hash(const char *name, struct dyn_elf *rpnt,
 	unsigned long elf_hash_number = 0xffffffff;
 	const ElfW(Sym) *sym = NULL;
 
-	const ElfW(Sym) *weak_sym = 0;
-	struct elf_resolve *weak_tpnt = 0;
+	char *weak_result = NULL;
 
 #ifdef __LDSO_GNU_HASH_SUPPORT__
 	unsigned long gnu_hash_number = _dl_gnu_hash((const unsigned char *)name);
@@ -329,37 +338,29 @@ char *_dl_lookup_hash(const char *name, struct dyn_elf *rpnt,
 
 	if (sym) {
 		/* At this point we have found the requested symbol, do binding */
+#if USE_TLS	
+		if(ELF_ST_TYPE(sym->st_info) == STT_TLS) {
+			_dl_assert((tls_tpnt != NULL));
+			*tls_tpnt = tpnt;
+
+			return (char*)sym->st_value;
+		}
+#endif
+
 		switch (ELF_ST_BIND(sym->st_info)) {
 			case STB_WEAK:
 #if 0
-/* Perhaps we should support old style weak symbol handling
- * per what glibc does when you export LD_DYNAMIC_WEAK */
-				if (!weak_sym) {
-					weak_tpnt = tpnt;
-					weak_sym = sym;
-				}
+	/* Perhaps we should support old style weak symbol handling
+	* per what glibc does when you export LD_DYNAMIC_WEAK */
+				if (!weak_result)
+					weak_result = (char *)tpnt->loadaddr + sym->st_value;
 				break;
 #endif
 			case STB_GLOBAL:
-#ifdef __FDPIC__
-				if (tpntp)
-					*tpntp = tpnt;
-#endif
-				return (char *) DL_FIND_HASH_VALUE (tpnt, type_class, sym);
+				return (char*)tpnt->loadaddr + sym->st_value;
 			default:	/* Local symbols not handled here */
 				break;
 		}
 	}
-	if (weak_sym) {
-#ifdef __FDPIC__
-		if (tpntp)
-			*tpntp = weak_tpnt;
-#endif
-		return (char *) DL_FIND_HASH_VALUE (weak_tpnt, type_class, weak_sym);
-	}
-#ifdef __FDPIC__
-	if (tpntp)
-		*tpntp = NULL;
-#endif
-	return NULL;
+	return weak_result;
 }
diff --git a/ldso/ldso/dl-startup.c b/ldso/ldso/dl-startup.c
index de9c8bc..6f07b96 100644
--- a/ldso/ldso/dl-startup.c
+++ b/ldso/ldso/dl-startup.c
@@ -209,6 +209,20 @@ DL_START(unsigned long args)
 	_dl_parse_dynamic_info(dpnt, tpnt->dynamic_info, NULL, load_addr);
 #endif
 
+	/*
+	 * BIG ASSUMPTION: We assume that the dynamic loader does not
+	 *                 have any TLS data itself. If this ever occurs
+	 *                 more work than what is done below for the
+	 *                 loader will have to happen.
+	 */
+#if USE_TLS
+	/* This was done by _dl_memset above. */
+	/* tpnt->l_tls_modid = 0; */
+# if NO_TLS_OFFSET != 0
+	tpnt->l_tls_offset = NO_TLS_OFFSET;
+# endif 
+#endif 
+
 	SEND_EARLY_STDERR_DEBUG("Done scanning DYNAMIC section\n");
 
 #if defined(PERFORM_BOOTSTRAP_GOT)
diff --git a/ldso/ldso/dl-tls.c b/ldso/ldso/dl-tls.c
new file mode 100644
index 0000000..4ad9c73
--- /dev/null
+++ b/ldso/ldso/dl-tls.c
@@ -0,0 +1,1045 @@
+/* vi: set sw=4 ts=4: */
+/*
+ * Thread-local storage handling in the ELF dynamic linker.
+ *
+ * Copyright (C) 2005 by Steven J. Hill <sjhill at realitydiluted.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. The name of the above contributors may not be
+ *    used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <tls.h>
+#include <dl-tls.h>
+#include <ldsodefs.h>
+
+void *(*_dl_calloc_function) (size_t __nmemb, size_t __size) = NULL;
+void *(*_dl_realloc_function) (void *__ptr, size_t __size) = NULL;
+void *(*_dl_memalign_function) (size_t __boundary, size_t __size) = NULL;
+
+void (*_dl_free_function) (void *__ptr);
+void *_dl_memalign (size_t __boundary, size_t __size);
+struct link_map *_dl_update_slotinfo (unsigned long int req_modid);
+
+/* Round up N to the nearest multiple of P, where P is a power of 2
+   --- without using libgcc division routines.  */
+#define roundup_pow2(n, p) (((n) + (p) - 1) & ~((p) - 1))
+
+void *
+_dl_calloc (size_t __nmemb, size_t __size)
+{
+	void *result;
+	size_t size = (__size * __nmemb); 
+
+	if (_dl_calloc_function)
+		return (*_dl_calloc_function) (__nmemb, __size);
+
+	if ((result = _dl_malloc(size)) != NULL) {
+		_dl_memset(result, 0, size);
+	}
+
+	return result;
+}
+
+void *
+_dl_realloc (void * __ptr, size_t __size)
+{
+	if (_dl_realloc_function)
+		return (*_dl_realloc_function) (__ptr, __size);
+
+	_dl_debug_early("NOT IMPLEMENTED PROPERLY!!!\n");
+	return NULL;
+}
+
+void
+_dl_free (void *__ptr)
+{
+	if (_dl_free_function)
+		(*_dl_free_function) (__ptr);
+
+#if 0
+	_dl_debug_early("NOT IMPLEMENTED PROPERLY!!!\n");
+#endif
+}
+
+
+/* The __tls_get_addr function has two basic forms which differ in the
+   arguments.  The IA-64 form takes two parameters, the module ID and
+   offset.  The form used, among others, on IA-32 takes a reference to
+   a special structure which contain the same information.  The second
+   form seems to be more often used (in the moment) so we default to
+   it.  Users of the IA-64 form have to provide adequate definitions
+   of the following macros.  */
+#ifndef GET_ADDR_ARGS
+# define GET_ADDR_ARGS		tls_index *ti
+#endif
+#ifndef GET_ADDR_MODULE
+# define GET_ADDR_MODULE	ti->ti_module
+#endif
+#ifndef GET_ADDR_OFFSET
+# define GET_ADDR_OFFSET	ti->ti_offset
+#endif
+
+/*
+ * Amount of excess space to allocate in the static TLS area
+ * to allow dynamic loading of modules defining IE-model TLS data.
+ */
+#define TLS_STATIC_SURPLUS	64 + DL_NNS * 100
+
+/* Value used for dtv entries for which the allocation is delayed. */
+#define TLS_DTV_UNALLOCATED	((void *) -1l)
+
+/*
+ * We are trying to perform a static TLS relocation in MAP, but it was
+ * dynamically loaded.  This can only work if there is enough surplus in
+ * the static TLS area already allocated for each running thread.  If this
+ * object's TLS segment is too big to fit, we fail.  If it fits,
+ * we set MAP->l_tls_offset and return.
+ * This function intentionally does not return any value but signals error
+ * directly, as static TLS should be rare and code handling it should
+ * not be inlined as much as possible.
+ */
+void
+internal_function __attribute_noinline__
+_dl_allocate_static_tls (struct link_map *map)
+{
+	/* If the alignment requirements are too high fail.  */
+	if (map->l_tls_align > _dl_tls_static_align)
+	{
+fail:
+		_dl_dprintf(2, "cannot allocate memory in static TLS block");
+		_dl_exit(30);
+	}
+
+# ifdef TLS_TCB_AT_TP
+	size_t freebytes;
+	size_t n;
+	size_t blsize;
+
+	freebytes = _dl_tls_static_size - _dl_tls_static_used - TLS_TCB_SIZE;
+
+	blsize = map->l_tls_blocksize + map->l_tls_firstbyte_offset;
+	if (freebytes < blsize)
+		goto fail;
+
+	n = (freebytes - blsize) & ~(map->l_tls_align - 1);
+
+	size_t offset = _dl_tls_static_used + (freebytes - n
+		- map->l_tls_firstbyte_offset);
+
+	map->l_tls_offset = _dl_tls_static_used = offset;
+# elif TLS_DTV_AT_TP
+	size_t used;
+	size_t check;
+
+	size_t offset = roundup_pow2 (_dl_tls_static_used, map->l_tls_align);
+	used = offset + map->l_tls_blocksize;
+	check = used;
+
+	/* dl_tls_static_used includes the TCB at the beginning. */
+	if (check > _dl_tls_static_size)
+		goto fail;
+
+	map->l_tls_offset = offset;
+	_dl_tls_static_used = used;
+# else
+#  error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined"
+# endif
+
+	/*
+	 * If the object is not yet relocated we cannot initialize the
+	 * static TLS region.  Delay it.
+	 */
+	if (((struct elf_resolve *) map)->init_flag & RELOCS_DONE)
+    {
+#ifdef SHARED
+		/*
+		 * Update the slot information data for at least the generation of
+		 * the DSO we are allocating data for.
+		 */
+		if (__builtin_expect (THREAD_DTV()[0].counter != _dl_tls_generation, 0))
+			(void) _dl_update_slotinfo (map->l_tls_modid);
+#endif
+		_dl_init_static_tls (map);
+	}
+	else
+		map->l_need_tls_init = 1;
+}
+
+#ifdef SHARED
+/* Initialize static TLS area and DTV for current (only) thread.
+   libpthread implementations should provide their own hook
+   to handle all threads.  */
+void
+internal_function __attribute_noinline__
+_dl_nothread_init_static_tls (struct link_map *map)
+{
+# ifdef TLS_TCB_AT_TP
+	void *dest = (char *) THREAD_SELF - map->l_tls_offset;
+# elif TLS_DTV_AT_TP
+	void *dest = (char *) THREAD_SELF + map->l_tls_offset + TLS_PRE_TCB_SIZE;
+# else
+#  error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined"
+# endif
+
+	/* Fill in the DTV slot so that a later LD/GD access will find it.  */
+	dtv_t *dtv = THREAD_DTV ();
+	if (!(map->l_tls_modid <= dtv[-1].counter)) {
+		_dl_dprintf(2, "map->l_tls_modid <= dtv[-1].counter FAILED!\n");
+		_dl_exit(30);
+	}
+	dtv[map->l_tls_modid].pointer.val = dest;
+	dtv[map->l_tls_modid].pointer.is_static = true;
+
+	/* Initialize the memory.  */
+	_dl_memcpy(dest, map->l_tls_initimage, map->l_tls_initimage_size);
+	_dl_memset((dest + map->l_tls_initimage_size), '\0',
+		map->l_tls_blocksize - map->l_tls_initimage_size);
+}
+#endif
+
+/* Taken from glibc/sysdeps/generic/dl-tls.c */
+static void
+oom (void)
+{
+	_dl_debug_early("cannot allocate thread-local memory: ABORT\n");
+	_dl_exit(30);
+}
+
+size_t
+internal_function
+_dl_next_tls_modid (void)
+{
+  size_t result;
+
+  if (__builtin_expect (_dl_tls_dtv_gaps, false))
+    {
+      size_t disp = 0;
+      struct dtv_slotinfo_list *runp = _dl_tls_dtv_slotinfo_list;
+
+      /* Note that this branch will never be executed during program
+	 start since there are no gaps at that time.  Therefore it
+	 does not matter that the dl_tls_dtv_slotinfo is not allocated
+	 yet when the function is called for the first times.
+
+	 NB: the offset +1 is due to the fact that DTV[0] is used
+	 for something else.  */
+      result = _dl_tls_static_nelem + 1;
+      if (result <= _dl_tls_max_dtv_idx)
+	do
+	  {
+	    while (result - disp < runp->len)
+	      {
+		if (runp->slotinfo[result - disp].map == NULL)
+		  break;
+
+		++result;
+		_dl_assert (result <= _dl_tls_max_dtv_idx + 1);
+	      }
+
+	    if (result - disp < runp->len)
+	      break;
+
+	    disp += runp->len;
+	  }
+	while ((runp = runp->next) != NULL);
+
+      if (result > _dl_tls_max_dtv_idx)
+	{
+	  /* The new index must indeed be exactly one higher than the
+	     previous high.  */
+	  _dl_assert (result == _dl_tls_max_dtv_idx + 1);
+	  /* There is no gap anymore.  */
+	  _dl_tls_dtv_gaps = false;
+
+	  goto nogaps;
+	}
+    }
+  else
+    {
+      /* No gaps, allocate a new entry.  */
+    nogaps:
+
+      result = ++_dl_tls_max_dtv_idx;
+    }
+
+  return result;
+}
+
+void
+internal_function
+_dl_determine_tlsoffset (void)
+{
+  size_t max_align = TLS_TCB_ALIGN;
+  size_t freetop = 0;
+  size_t freebottom = 0;
+
+  /* The first element of the dtv slot info list is allocated.  */
+  _dl_assert (_dl_tls_dtv_slotinfo_list != NULL);
+  /* There is at this point only one element in the
+     dl_tls_dtv_slotinfo_list list.  */
+  _dl_assert (_dl_tls_dtv_slotinfo_list->next == NULL);
+
+  struct dtv_slotinfo *slotinfo = _dl_tls_dtv_slotinfo_list->slotinfo;
+
+  /* Determining the offset of the various parts of the static TLS
+     block has several dependencies.  In addition we have to work
+     around bugs in some toolchains.
+
+     Each TLS block from the objects available at link time has a size
+     and an alignment requirement.  The GNU ld computes the alignment
+     requirements for the data at the positions *in the file*, though.
+     I.e, it is not simply possible to allocate a block with the size
+     of the TLS program header entry.  The data is layed out assuming
+     that the first byte of the TLS block fulfills
+
+       p_vaddr mod p_align == &TLS_BLOCK mod p_align
+
+     This means we have to add artificial padding at the beginning of
+     the TLS block.  These bytes are never used for the TLS data in
+     this module but the first byte allocated must be aligned
+     according to mod p_align == 0 so that the first byte of the TLS
+     block is aligned according to p_vaddr mod p_align.  This is ugly
+     and the linker can help by computing the offsets in the TLS block
+     assuming the first byte of the TLS block is aligned according to
+     p_align.
+
+     The extra space which might be allocated before the first byte of
+     the TLS block need not go unused.  The code below tries to use
+     that memory for the next TLS block.  This can work if the total
+     memory requirement for the next TLS block is smaller than the
+     gap.  */
+
+# ifdef TLS_TCB_AT_TP
+  /* We simply start with zero.  */
+  size_t offset = 0;
+
+  for (size_t cnt = 1; slotinfo[cnt].map != NULL; ++cnt)
+    {
+      _dl_assert (cnt < _dl_tls_dtv_slotinfo_list->len);
+
+      size_t firstbyte = (-slotinfo[cnt].map->l_tls_firstbyte_offset
+			  & (slotinfo[cnt].map->l_tls_align - 1));
+      size_t off;
+      max_align = MAX (max_align, slotinfo[cnt].map->l_tls_align);
+
+      if (freebottom - freetop >= slotinfo[cnt].map->l_tls_blocksize)
+	{
+	  off = roundup_pow2 (freetop + slotinfo[cnt].map->l_tls_blocksize
+                          - firstbyte, slotinfo[cnt].map->l_tls_align)
+		+ firstbyte;
+	  if (off <= freebottom)
+	    {
+	      freetop = off;
+
+	      /* XXX For some architectures we perhaps should store the
+		 negative offset.  */
+	      slotinfo[cnt].map->l_tls_offset = off;
+	      continue;
+	    }
+	}
+
+      off = roundup_pow2 (offset + slotinfo[cnt].map->l_tls_blocksize
+                          - firstbyte, slotinfo[cnt].map->l_tls_align)
+            + firstbyte;
+      if (off > offset + slotinfo[cnt].map->l_tls_blocksize
+		+ (freebottom - freetop))
+	{
+	  freetop = offset;
+	  freebottom = off - slotinfo[cnt].map->l_tls_blocksize;
+	}
+      offset = off;
+
+      /* XXX For some architectures we perhaps should store the
+	 negative offset.  */
+      slotinfo[cnt].map->l_tls_offset = off;
+    }
+
+  _dl_tls_static_used = offset;
+  _dl_tls_static_size = (roundup_pow2 (offset + TLS_STATIC_SURPLUS, max_align)
+			    + TLS_TCB_SIZE);
+# elif TLS_DTV_AT_TP
+  /* The TLS blocks start right after the TCB.  */
+  size_t offset = TLS_TCB_SIZE;
+  size_t cnt;
+
+  for (cnt = 1; slotinfo[cnt].map != NULL; ++cnt)
+    {
+      _dl_assert (cnt < _dl_tls_dtv_slotinfo_list->len);
+
+      size_t firstbyte = (-slotinfo[cnt].map->l_tls_firstbyte_offset
+			  & (slotinfo[cnt].map->l_tls_align - 1));
+      size_t off;
+      max_align = MAX (max_align, slotinfo[cnt].map->l_tls_align);
+
+      if (slotinfo[cnt].map->l_tls_blocksize <= freetop - freebottom)
+	{
+	  off = roundup_pow2 (freebottom, slotinfo[cnt].map->l_tls_align);
+	  if (off - freebottom < firstbyte)
+	    off += slotinfo[cnt].map->l_tls_align;
+	  if (off + slotinfo[cnt].map->l_tls_blocksize - firstbyte <= freetop)
+	    {
+	      slotinfo[cnt].map->l_tls_offset = off - firstbyte;
+	      freebottom = (off + slotinfo[cnt].map->l_tls_blocksize
+			    - firstbyte);
+	      continue;
+	    }
+	}
+
+      off = roundup_pow2 (offset, slotinfo[cnt].map->l_tls_align);
+      if (off - offset < firstbyte)
+	off += slotinfo[cnt].map->l_tls_align;
+
+      slotinfo[cnt].map->l_tls_offset = off - firstbyte;
+      if (off - firstbyte - offset > freetop - freebottom)
+	{
+	  freebottom = offset;
+	  freetop = off - firstbyte;
+	}
+
+      offset = off + slotinfo[cnt].map->l_tls_blocksize - firstbyte;
+    }
+
+  _dl_tls_static_used = offset;
+  _dl_tls_static_size = roundup_pow2 (offset + TLS_STATIC_SURPLUS,
+                                      TLS_TCB_ALIGN);
+# else
+#  error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined"
+# endif
+
+  /* The alignment requirement for the static TLS block.  */
+  _dl_tls_static_align = max_align;
+}
+
+/* This is called only when the data structure setup was skipped at startup,
+   when there was no need for it then.  Now we have dynamically loaded
+   something needing TLS, or libpthread needs it.  */
+rtld_hidden_proto(_dl_tls_setup)
+int
+internal_function
+_dl_tls_setup (void)
+{
+  _dl_assert (_dl_tls_dtv_slotinfo_list == NULL);
+  _dl_assert (_dl_tls_max_dtv_idx == 0);
+
+  const size_t nelem = 2 + TLS_SLOTINFO_SURPLUS;
+
+  _dl_tls_dtv_slotinfo_list
+    = _dl_calloc (1, (sizeof (struct dtv_slotinfo_list)
+		  + nelem * sizeof (struct dtv_slotinfo)));
+  if (_dl_tls_dtv_slotinfo_list == NULL)
+    return -1;
+
+  _dl_tls_dtv_slotinfo_list->len = nelem;
+
+  /* Number of elements in the static TLS block.  It can't be zero
+     because of various assumptions.  The one element is null.  */
+  _dl_tls_static_nelem = _dl_tls_max_dtv_idx = 1;
+
+  /* This initializes more variables for us.  */
+  _dl_determine_tlsoffset ();
+
+  return 0;
+}
+rtld_hidden_def (_dl_tls_setup)
+
+static void *
+internal_function
+allocate_dtv (void *result)
+{
+  dtv_t *dtv;
+  size_t dtv_length;
+
+  /* We allocate a few more elements in the dtv than are needed for the
+     initial set of modules.  This should avoid in most cases expansions
+     of the dtv.  */
+  dtv_length = _dl_tls_max_dtv_idx + DTV_SURPLUS;
+  dtv = _dl_calloc (dtv_length + 2, sizeof (dtv_t));
+  if (dtv != NULL)
+    {
+      /* This is the initial length of the dtv.  */
+      dtv[0].counter = dtv_length;
+
+      /* The rest of the dtv (including the generation counter) is
+	 Initialize with zero to indicate nothing there.  */
+
+      /* Add the dtv to the thread data structures.  */
+      INSTALL_DTV (result, dtv);
+    }
+  else
+    result = NULL;
+
+  return result;
+}
+
+/* Get size and alignment requirements of the static TLS block.  */
+void
+internal_function
+_dl_get_tls_static_info (size_t *sizep, size_t *alignp)
+{
+  *sizep = _dl_tls_static_size;
+  *alignp = _dl_tls_static_align;
+}
+
+void *
+internal_function
+_dl_allocate_tls_storage (void)
+{
+  void *result;
+  size_t size = _dl_tls_static_size;
+
+# if TLS_DTV_AT_TP
+  /* Memory layout is:
+     [ TLS_PRE_TCB_SIZE ] [ TLS_TCB_SIZE ] [ TLS blocks ]
+			  ^ This should be returned.  */
+  size += (TLS_PRE_TCB_SIZE + _dl_tls_static_align - 1)
+	  & ~(_dl_tls_static_align - 1);
+# endif
+
+  /* Allocate a correctly aligned chunk of memory.  */
+  result = _dl_memalign (_dl_tls_static_align, size);
+  if (__builtin_expect (result != NULL, 1))
+    {
+      /* Allocate the DTV.  */
+      void *allocated = result;
+
+# ifdef TLS_TCB_AT_TP
+      /* The TCB follows the TLS blocks.  */
+      result = (char *) result + size - TLS_TCB_SIZE;
+
+      /* Clear the TCB data structure.  We can't ask the caller (i.e.
+	 libpthread) to do it, because we will initialize the DTV et al.  */
+      _dl_memset (result, '\0', TLS_TCB_SIZE);
+# elif TLS_DTV_AT_TP
+      result = (char *) result + size - _dl_tls_static_size;
+
+      /* Clear the TCB data structure and TLS_PRE_TCB_SIZE bytes before it.
+	 We can't ask the caller (i.e. libpthread) to do it, because we will
+	 initialize the DTV et al.  */
+      _dl_memset ((char *) result - TLS_PRE_TCB_SIZE, '\0',
+	      TLS_PRE_TCB_SIZE + TLS_TCB_SIZE);
+# endif
+
+      result = allocate_dtv (result);
+      if (result == NULL)
+	_dl_free (allocated);
+    }
+
+  return result;
+}
+
+void *
+internal_function
+_dl_allocate_tls_init (void *result)
+{
+  if (result == NULL)
+    /* The memory allocation failed.  */
+    return NULL;
+
+  dtv_t *dtv = GET_DTV (result);
+  struct dtv_slotinfo_list *listp;
+  size_t total = 0;
+  size_t maxgen = 0;
+
+  /* We have to prepare the dtv for all currently loaded modules using
+     TLS.  For those which are dynamically loaded we add the values
+     indicating deferred allocation.  */
+  listp = _dl_tls_dtv_slotinfo_list;
+  while (1)
+    {
+      size_t cnt;
+
+      for (cnt = total == 0 ? 1 : 0; cnt < listp->len; ++cnt)
+	{
+	  struct link_map *map;
+	  void *dest;
+
+	  /* Check for the total number of used slots.  */
+	  if (total + cnt > _dl_tls_max_dtv_idx)
+	    break;
+
+	  map = listp->slotinfo[cnt].map;
+	  if (map == NULL)
+	    /* Unused entry.  */
+	    continue;
+
+	  /* Keep track of the maximum generation number.  This might
+	     not be the generation counter.  */
+	  maxgen = MAX (maxgen, listp->slotinfo[cnt].gen);
+
+	  if (map->l_tls_offset == NO_TLS_OFFSET)
+	    {
+	      /* For dynamically loaded modules we simply store
+		 the value indicating deferred allocation.  */
+	      dtv[map->l_tls_modid].pointer.val = TLS_DTV_UNALLOCATED;
+	      dtv[map->l_tls_modid].pointer.is_static = false;
+	      continue;
+	    }
+
+	  _dl_assert (map->l_tls_modid == cnt);
+	  _dl_assert (map->l_tls_blocksize >= map->l_tls_initimage_size);
+# ifdef TLS_TCB_AT_TP
+	  _dl_assert ((size_t) map->l_tls_offset >= map->l_tls_blocksize);
+	  dest = (char *) result - map->l_tls_offset;
+# elif TLS_DTV_AT_TP
+	  dest = (char *) result + map->l_tls_offset;
+# else
+#  error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined"
+# endif
+
+	  /* Copy the initialization image and clear the BSS part.  */
+	  dtv[map->l_tls_modid].pointer.val = dest;
+	  dtv[map->l_tls_modid].pointer.is_static = true;
+	  _dl_memcpy(dest, map->l_tls_initimage, map->l_tls_initimage_size);
+	  _dl_memset((dest + map->l_tls_initimage_size), '\0',
+		  map->l_tls_blocksize - map->l_tls_initimage_size);
+
+	}
+
+      total += cnt;
+      if (total >= _dl_tls_max_dtv_idx)
+	break;
+
+      listp = listp->next;
+      _dl_assert (listp != NULL);
+    }
+
+  /* The DTV version is up-to-date now.  */
+  dtv[0].counter = maxgen;
+
+  return result;
+}
+
+void *
+internal_function
+_dl_allocate_tls (void *mem)
+{
+  return _dl_allocate_tls_init (mem == NULL
+				? _dl_allocate_tls_storage ()
+				: allocate_dtv (mem));
+}
+
+void
+internal_function
+_dl_deallocate_tls (void *tcb, bool dealloc_tcb)
+{
+  dtv_t *dtv = GET_DTV (tcb);
+  size_t cnt;
+
+  /* We need to free the memory allocated for non-static TLS.  */
+  for (cnt = 0; cnt < dtv[-1].counter; ++cnt)
+    if (! dtv[1 + cnt].pointer.is_static
+	&& dtv[1 + cnt].pointer.val != TLS_DTV_UNALLOCATED)
+      _dl_free (dtv[1 + cnt].pointer.val);
+
+  /* The array starts with dtv[-1].  */
+  if (dtv != _dl_initial_dtv)
+    _dl_free (dtv - 1);
+
+  if (dealloc_tcb)
+    {
+# ifdef TLS_TCB_AT_TP
+      /* The TCB follows the TLS blocks.  Back up to free the whole block.  */
+      tcb -= _dl_tls_static_size - TLS_TCB_SIZE;
+# elif TLS_DTV_AT_TP
+      /* Back up the TLS_PRE_TCB_SIZE bytes.  */
+      tcb -= (TLS_PRE_TCB_SIZE + _dl_tls_static_align - 1)
+	     & ~(_dl_tls_static_align - 1);
+# endif
+      _dl_free (tcb);
+    }
+}
+
+static void *
+allocate_and_init (struct link_map *map)
+{
+	void *newp;
+
+	newp = _dl_memalign (map->l_tls_align, map->l_tls_blocksize);
+	if (newp == NULL)
+	{
+		_dl_dprintf(2, "%s:%d: Out of memory!!!\n", __FUNCTION__, __LINE__);
+		_dl_exit(1);
+	}
+
+	/* Initialize the memory.  */
+	_dl_memcpy (newp, map->l_tls_initimage, map->l_tls_initimage_size);
+	_dl_memset ((newp + map->l_tls_initimage_size), '\0',
+		map->l_tls_blocksize - map->l_tls_initimage_size);
+
+	return newp;
+}
+
+struct link_map *
+_dl_update_slotinfo (unsigned long int req_modid)
+{
+  struct link_map *the_map = NULL;
+  dtv_t *dtv = THREAD_DTV ();
+
+  /* The global dl_tls_dtv_slotinfo array contains for each module
+     index the generation counter current when the entry was created.
+     This array never shrinks so that all module indices which were
+     valid at some time can be used to access it.  Before the first
+     use of a new module index in this function the array was extended
+     appropriately.  Access also does not have to be guarded against
+     modifications of the array.  It is assumed that pointer-size
+     values can be read atomically even in SMP environments.  It is
+     possible that other threads at the same time dynamically load
+     code and therefore add to the slotinfo list.  This is a problem
+     since we must not pick up any information about incomplete work.
+     The solution to this is to ignore all dtv slots which were
+     created after the one we are currently interested.  We know that
+     dynamic loading for this module is completed and this is the last
+     load operation we know finished.  */
+  unsigned long int idx = req_modid;
+  struct dtv_slotinfo_list *listp = _dl_tls_dtv_slotinfo_list;
+
+  _dl_debug_early ("Updating slotinfo for module %d\n", req_modid);
+
+  while (idx >= listp->len)
+    {
+      idx -= listp->len;
+      listp = listp->next;
+    }
+
+  if (dtv[0].counter < listp->slotinfo[idx].gen)
+    {
+      /* The generation counter for the slot is higher than what the
+	 current dtv implements.  We have to update the whole dtv but
+	 only those entries with a generation counter <= the one for
+	 the entry we need.  */
+      size_t new_gen = listp->slotinfo[idx].gen;
+      size_t total = 0;
+
+      /* We have to look through the entire dtv slotinfo list.  */
+      listp =  _dl_tls_dtv_slotinfo_list;
+      do
+	{
+	  size_t cnt;
+
+	  for (cnt = total == 0 ? 1 : 0; cnt < listp->len; ++cnt)
+	    {
+	      size_t gen = listp->slotinfo[cnt].gen;
+
+	      if (gen > new_gen)
+		/* This is a slot for a generation younger than the
+		   one we are handling now.  It might be incompletely
+		   set up so ignore it.  */
+		continue;
+
+	      /* If the entry is older than the current dtv layout we
+		 know we don't have to handle it.  */
+	      if (gen <= dtv[0].counter)
+		continue;
+
+	      /* If there is no map this means the entry is empty.  */
+	      struct link_map *map = listp->slotinfo[cnt].map;
+	      if (map == NULL)
+		{
+		  /* If this modid was used at some point the memory
+		     might still be allocated.  */
+		  if (! dtv[total + cnt].pointer.is_static
+		      && dtv[total + cnt].pointer.val != TLS_DTV_UNALLOCATED)
+		    {
+		      _dl_free (dtv[total + cnt].pointer.val);
+		      dtv[total + cnt].pointer.val = TLS_DTV_UNALLOCATED;
+		    }
+
+		  continue;
+		}
+
+	      /* Check whether the current dtv array is large enough.  */
+	      size_t modid = map->l_tls_modid;
+	      _dl_assert (total + cnt == modid);
+	      if (dtv[-1].counter < modid)
+		{
+		  /* Reallocate the dtv.  */
+		  dtv_t *newp;
+		  size_t newsize = _dl_tls_max_dtv_idx + DTV_SURPLUS;
+		  size_t oldsize = dtv[-1].counter;
+
+		  _dl_assert (map->l_tls_modid <= newsize);
+
+		  if (dtv == _dl_initial_dtv)
+		    {
+		      /* This is the initial dtv that was allocated
+			 during rtld startup using the dl-minimal.c
+			 malloc instead of the real malloc.  We can't
+			 free it, we have to abandon the old storage.  */
+
+		      newp = _dl_malloc ((2 + newsize) * sizeof (dtv_t));
+		      if (newp == NULL)
+			oom ();
+		      _dl_memcpy (newp, &dtv[-1], oldsize * sizeof (dtv_t));
+		    }
+		  else
+		    {
+		      newp = _dl_realloc (&dtv[-1],
+				      (2 + newsize) * sizeof (dtv_t));
+		      if (newp == NULL)
+			oom ();
+		    }
+
+		  newp[0].counter = newsize;
+
+		  /* Clear the newly allocated part.  */
+		  _dl_memset (newp + 2 + oldsize, '\0',
+			  (newsize - oldsize) * sizeof (dtv_t));
+
+		  /* Point dtv to the generation counter.  */
+		  dtv = &newp[1];
+
+		  /* Install this new dtv in the thread data
+		     structures.  */
+		  INSTALL_NEW_DTV (dtv);
+		}
+
+	      /* If there is currently memory allocate for this
+		 dtv entry free it.  */
+	      /* XXX Ideally we will at some point create a memory
+		 pool.  */
+	      if (! dtv[modid].pointer.is_static
+		  && dtv[modid].pointer.val != TLS_DTV_UNALLOCATED)
+		/* Note that free is called for NULL is well.  We
+		   deallocate even if it is this dtv entry we are
+		   supposed to load.  The reason is that we call
+		   memalign and not malloc.  */
+		_dl_free (dtv[modid].pointer.val);
+
+	      /* This module is loaded dynamically- We defer memory
+		 allocation.  */
+	      dtv[modid].pointer.is_static = false;
+	      dtv[modid].pointer.val = TLS_DTV_UNALLOCATED;
+
+	      if (modid == req_modid)
+		the_map = map;
+	    }
+
+	  total += listp->len;
+	}
+      while ((listp = listp->next) != NULL);
+
+      /* This will be the new maximum generation counter.  */
+      dtv[0].counter = new_gen;
+    }
+
+  return the_map;
+}
+
+
+/* The generic dynamic and local dynamic model cannot be used in
+   statically linked applications.  */
+void *
+__tls_get_addr (GET_ADDR_ARGS)
+{
+  dtv_t *dtv = THREAD_DTV ();
+  struct link_map *the_map = NULL;
+  void *p;
+
+  if (__builtin_expect (dtv[0].counter != _dl_tls_generation, 0))
+    the_map = _dl_update_slotinfo (GET_ADDR_MODULE);
+
+  p = dtv[GET_ADDR_MODULE].pointer.val;
+
+  if (__builtin_expect (p == TLS_DTV_UNALLOCATED, 0))
+    {
+      /* The allocation was deferred.  Do it now.  */
+      if (the_map == NULL)
+	{
+	  /* Find the link map for this module.  */
+	  size_t idx = GET_ADDR_MODULE;
+	  struct dtv_slotinfo_list *listp = _dl_tls_dtv_slotinfo_list;
+
+	  while (idx >= listp->len)
+	    {
+	      idx -= listp->len;
+	      listp = listp->next;
+	    }
+
+	  the_map = listp->slotinfo[idx].map;
+	}
+
+      p = dtv[GET_ADDR_MODULE].pointer.val = allocate_and_init (the_map);
+      dtv[GET_ADDR_MODULE].pointer.is_static = false;
+    }
+
+  return (char *) p + GET_ADDR_OFFSET;
+}
+
+void
+_dl_add_to_slotinfo (struct link_map  *l)
+{
+  /* Now that we know the object is loaded successfully add
+     modules containing TLS data to the dtv info table.  We
+     might have to increase its size.  */
+  struct dtv_slotinfo_list *listp;
+  struct dtv_slotinfo_list *prevp;
+  size_t idx = l->l_tls_modid;
+
+  _dl_debug_early("Adding to slotinfo for %s\n", l->l_name);
+
+  /* Find the place in the dtv slotinfo list.  */
+  listp = _dl_tls_dtv_slotinfo_list;
+  prevp = NULL;		/* Needed to shut up gcc.  */
+  do
+    {
+      /* Does it fit in the array of this list element?  */
+      if (idx < listp->len)
+	break;
+      idx -= listp->len;
+      prevp = listp;
+      listp = listp->next;
+    }
+  while (listp != NULL);
+
+  if (listp == NULL)
+    {
+      /* When we come here it means we have to add a new element
+	 to the slotinfo list.  And the new module must be in
+	 the first slot.  */
+      _dl_assert (idx == 0);
+
+      listp = prevp->next = (struct dtv_slotinfo_list *)
+	_dl_malloc (sizeof (struct dtv_slotinfo_list)
+		+ TLS_SLOTINFO_SURPLUS * sizeof (struct dtv_slotinfo));
+      if (listp == NULL)
+	{
+	  /* We ran out of memory.  We will simply fail this
+	     call but don't undo anything we did so far.  The
+	     application will crash or be terminated anyway very
+	     soon.  */
+
+	  /* We have to do this since some entries in the dtv
+	     slotinfo array might already point to this
+	     generation.  */
+	  ++_dl_tls_generation;
+
+	  _dl_dprintf (_dl_debug_file,
+			"cannot create TLS data structures: ABORT\n");
+	  _dl_exit (127);
+	}
+
+      listp->len = TLS_SLOTINFO_SURPLUS;
+      listp->next = NULL;
+      _dl_memset (listp->slotinfo, '\0',
+	      TLS_SLOTINFO_SURPLUS * sizeof (struct dtv_slotinfo));
+    }
+
+  /* Add the information into the slotinfo data structure.  */
+  listp->slotinfo[idx].map = l;
+  listp->slotinfo[idx].gen = _dl_tls_generation + 1;
+  /* ??? ideally this would be done once per call to dlopen.  However there's
+     no easy way to indicate whether a library used TLS, so do it here
+	 instead. */
+  /* Bump the TLS generation number.  */
+  _dl_tls_generation++;
+}
+
+/* Taken from glibc/elf/rtld.c */
+static bool tls_init_tp_called;
+
+/* _dl_error_catch_tsd points to this for the single-threaded case.
+   It's reset by the thread library for multithreaded programs.  */
+void ** __attribute__ ((const))
+_dl_initial_error_catch_tsd (void)
+{
+	static void *data;
+	return &data;
+}
+
+#ifdef SHARED
+void *init_tls (void);
+rtld_hidden_proto(init_tls)
+void *
+internal_function
+init_tls (void)
+{
+	/* Number of elements in the static TLS block.  */
+	_dl_tls_static_nelem = _dl_tls_max_dtv_idx;
+
+	/* Do not do this twice.  The audit interface might have required
+	   the DTV interfaces to be set up early.  */
+	if (_dl_initial_dtv != NULL)
+		return NULL;
+
+	/* Allocate the array which contains the information about the
+	   dtv slots.  We allocate a few entries more than needed to
+	   avoid the need for reallocation.  */
+	size_t nelem = _dl_tls_max_dtv_idx + 1 + TLS_SLOTINFO_SURPLUS;
+
+	/* Allocate.  */
+	_dl_assert (_dl_tls_dtv_slotinfo_list == NULL);
+	_dl_tls_dtv_slotinfo_list = (struct dtv_slotinfo_list *)
+		_dl_calloc (sizeof (struct dtv_slotinfo_list)
+			+ nelem * sizeof (struct dtv_slotinfo), 1);
+	/* No need to check the return value.  If memory allocation failed
+	   the program would have been terminated.  */
+
+	struct dtv_slotinfo *slotinfo = _dl_tls_dtv_slotinfo_list->slotinfo;
+	_dl_tls_dtv_slotinfo_list->len = nelem;
+	_dl_tls_dtv_slotinfo_list->next = NULL;
+
+	/* Fill in the information from the loaded modules.  No namespace
+	   but the base one can be filled at this time.  */
+	int i = 0;
+	struct link_map *l;
+	for (l =  (struct link_map *) _dl_loaded_modules; l != NULL; l = l->l_next)
+		if (l->l_tls_blocksize != 0)
+		{
+			/* This is a module with TLS data.  Store the map reference.
+			   The generation counter is zero.  */
+
+			/* Skeep slot[0]: it will be never used */			
+			slotinfo[++i].map = l;
+		}
+	_dl_assert (i == _dl_tls_max_dtv_idx);
+
+	/* Compute the TLS offsets for the various blocks.  */
+	_dl_determine_tlsoffset ();
+
+	/* Construct the static TLS block and the dtv for the initial
+	   thread.  For some platforms this will include allocating memory
+	   for the thread descriptor.  The memory for the TLS block will
+	   never be freed.  It should be allocated accordingly.  The dtv
+	   array can be changed if dynamic loading requires it.  */
+	void *tcbp = _dl_allocate_tls_storage ();
+	if (tcbp == NULL) {
+		_dl_debug_early("\ncannot allocate TLS data structures for initial thread");
+		_dl_exit(30);
+	}
+
+	/* Store for detection of the special case by __tls_get_addr
+	   so it knows not to pass this dtv to the normal realloc.  */
+	_dl_initial_dtv = GET_DTV (tcbp);
+
+	/* And finally install it for the main thread.  If ld.so itself uses
+	   TLS we know the thread pointer was initialized earlier.  */
+	const char *lossage = TLS_INIT_TP (tcbp, USE___THREAD);
+	if(__builtin_expect (lossage != NULL, 0)) {
+		_dl_debug_early("cannot set up thread-local storage: %s\n", lossage);
+		_dl_exit(30);
+	}
+	tls_init_tp_called = true;
+
+	return tcbp;
+}
+rtld_hidden_def (init_tls)
+#endif
+
diff --git a/ldso/ldso/i386/elfinterp.c b/ldso/ldso/i386/elfinterp.c
index 94f7405..9a0885f 100644
--- a/ldso/ldso/i386/elfinterp.c
+++ b/ldso/ldso/i386/elfinterp.c
@@ -71,7 +71,7 @@ _dl_linux_resolver(struct elf_resolve *tpnt, int reloc_entry)
 	got_addr = (char **)instr_addr;
 
 	/* Get the address of the GOT entry. */
-	new_addr = _dl_find_hash(symname, tpnt->symbol_scope, tpnt, ELF_RTYPE_CLASS_PLT);
+	new_addr = _dl_find_hash(symname, tpnt->symbol_scope, tpnt, ELF_RTYPE_CLASS_PLT, NULL);
 	if (unlikely(!new_addr)) {
 		_dl_dprintf(2, "%s: can't resolve symbol '%s' in lib '%s'.\n", _dl_progname, symname, tpnt->libname);
 		_dl_exit(1);
@@ -176,7 +176,7 @@ _dl_do_reloc(struct elf_resolve *tpnt, struct dyn_elf *scope,
 
 	if (symtab_index) {
 		symbol_addr = (unsigned long)_dl_find_hash(symname, scope, tpnt,
-							   elf_machine_type_class(reloc_type));
+							   elf_machine_type_class(reloc_type), NULL);
 
 		/*
 		 * We want to allow undefined references to weak symbols - this
diff --git a/ldso/ldso/ldso.c b/ldso/ldso/ldso.c
index 786775a..4852042 100644
--- a/ldso/ldso/ldso.c
+++ b/ldso/ldso/ldso.c
@@ -38,6 +38,10 @@
 
 #define ALLOW_ZERO_PLTGOT
 
+#if USE_TLS
+#include "dl-tls.c"
+#endif
+
 /* Pull in the value of _dl_progname */
 #include LDSO_ELFINTERP
 
@@ -96,13 +100,15 @@ extern void _start(void);
 
 #ifdef __UCLIBC_HAS_SSP__
 # include <dl-osinfo.h>
-static uintptr_t stack_chk_guard;
+uintptr_t stack_chk_guard;
 # ifndef THREAD_SET_STACK_GUARD
 /* Only exported for architectures that don't store the stack guard canary
  * in local thread area.  */
 uintptr_t __stack_chk_guard attribute_relro;
-# endif
-# ifdef __UCLIBC_HAS_SSP_COMPAT__
+#  ifdef __UCLIBC_HAS_SSP_COMPAT__
+strong_alias(__stack_chk_guard,__guard)
+#  endif
+# elif __UCLIBC_HAS_SSP_COMPAT__
 uintptr_t __guard attribute_relro;
 # endif
 #endif
@@ -213,11 +219,31 @@ static void *_dl_zalloc(size_t size)
 	return p;
 }
 
-void _dl_free (void *p)
+#if USE_TLS
+void * _dl_memalign (size_t __boundary, size_t __size)
 {
-	if (_dl_free_function)
-		(*_dl_free_function) (p);
+	void *result;
+	int i = 0;
+	size_t delta;
+	size_t rounded = 0;
+
+	if (_dl_memalign_function)
+		return (*_dl_memalign_function) (__boundary, __size);
+
+	while (rounded < __boundary) {
+		rounded = (1 << i++);
+	}
+
+	delta = (((size_t) _dl_malloc_addr + __size) & (rounded - 1));
+
+	if ((result = _dl_malloc(rounded - delta)) == NULL)
+		return result;
+
+	result = _dl_malloc(__size);
+
+	return result;
 }
+#endif
 
 static void __attribute__ ((destructor)) __attribute_used__ _dl_fini(void)
 {
@@ -262,6 +288,10 @@ void _dl_get_ready_to_run(struct elf_resolve *tpnt, DL_LOADADDR_TYPE load_addr,
 	ElfW(Addr) relro_addr = 0;
 	size_t relro_size = 0;
 	struct stat st;
+#if USE_TLS
+	void *tcbp = NULL;
+#endif
+	
 
 	/* Wahoo!!! We managed to make a function call!  Get malloc
 	 * setup so we can use _dl_dprintf() to print debug noise
@@ -336,18 +366,22 @@ void _dl_get_ready_to_run(struct elf_resolve *tpnt, DL_LOADADDR_TYPE load_addr,
 		unlazy = RTLD_NOW;
 	}
 
-	/* sjhill: your TLS init should go before this */
+#if USE_TLS
+	_dl_error_catch_tsd = &_dl_initial_error_catch_tsd;
+	_dl_init_static_tls = &_dl_nothread_init_static_tls;
+#endif
+
 #ifdef __UCLIBC_HAS_SSP__
 	/* Set up the stack checker's canary.  */
 	stack_chk_guard = _dl_setup_stack_chk_guard ();
 # ifdef THREAD_SET_STACK_GUARD
 	THREAD_SET_STACK_GUARD (stack_chk_guard);
+#  ifdef __UCLIBC_HAS_SSP_COMPAT__
+	__guard = stack_chk_guard;
+#  endif
 # else
 	__stack_chk_guard = stack_chk_guard;
 # endif
-# ifdef __UCLIBC_HAS_SSP_COMPAT__
-	__guard = stack_chk_guard;
-# endif
 #endif
 
 	/* At this point we are now free to examine the user application,
@@ -461,10 +495,53 @@ void _dl_get_ready_to_run(struct elf_resolve *tpnt, DL_LOADADDR_TYPE load_addr,
 
 			_dl_debug_early("Lib Loader: (%x) %s\n", (unsigned) DL_LOADADDR_BASE(tpnt->loadaddr), tpnt->libname);
 		}
+
+		/* Discover any TLS sections if the target supports them. */
+		if (ppnt->p_type == PT_TLS) {
+#if USE_TLS
+			if (ppnt->p_memsz > 0) {
+				app_tpnt->l_tls_blocksize = ppnt->p_memsz;
+				app_tpnt->l_tls_align = ppnt->p_align;
+				if (ppnt->p_align == 0)
+					app_tpnt->l_tls_firstbyte_offset = 0;
+				else
+					app_tpnt->l_tls_firstbyte_offset =
+						(ppnt->p_vaddr & (ppnt->p_align - 1));
+				app_tpnt->l_tls_initimage_size = ppnt->p_filesz;
+				app_tpnt->l_tls_initimage = (void *) ppnt->p_vaddr;
+
+				/* This image gets the ID one.  */
+				_dl_tls_max_dtv_idx = app_tpnt->l_tls_modid = 1;
+
+			}
+			_dl_debug_early("Found TLS header for appplication program\n");
+			break;
+#else
+			_dl_dprintf(_dl_debug_file, "Program uses unsupported TLS data!\n");
+			_dl_exit(1);
+#endif
+		}
 	}
 	app_tpnt->relro_addr = relro_addr;
 	app_tpnt->relro_size = relro_size;
 
+#if USE_TLS
+	/*
+	 * Adjust the address of the TLS initialization image in
+	 * case the executable is actually an ET_DYN object.
+	 */
+	if (app_tpnt->l_tls_initimage != NULL)
+	{
+#ifdef __SUPPORT_LD_DEBUG_EARLY__
+		unsigned int tmp = (unsigned int) app_tpnt->l_tls_initimage;
+#endif
+		app_tpnt->l_tls_initimage =
+			(char *) app_tpnt->l_tls_initimage + app_tpnt->loadaddr;
+		_dl_debug_early("Relocated TLS initial image from %x to %x (size = %x)\n", tmp, app_tpnt->l_tls_initimage, app_tpnt->l_tls_initimage_size);
+
+	}
+#endif
+
 #ifdef __SUPPORT_LD_DEBUG__
 	_dl_debug = _dl_getenv("LD_DEBUG", envp);
 	if (_dl_debug) {
@@ -603,6 +680,7 @@ void _dl_get_ready_to_run(struct elf_resolve *tpnt, DL_LOADADDR_TYPE load_addr,
 
 #ifdef __LDSO_PRELOAD_FILE_SUPPORT__
 	do {
+		struct stat st;
 		char *preload;
 		int fd;
 		char c, *cp, *cp2;
@@ -850,6 +928,22 @@ void _dl_get_ready_to_run(struct elf_resolve *tpnt, DL_LOADADDR_TYPE load_addr,
 	}
 #endif
 
+#if USE_TLS
+	/* We do not initialize any of the TLS functionality unless any of the
+	 * initial modules uses TLS.  This makes dynamic loading of modules with
+	 * TLS impossible, but to support it requires either eagerly doing setup
+	 * now or lazily doing it later.  Doing it now makes us incompatible with
+	 * an old kernel that can't perform TLS_INIT_TP, even if no TLS is ever
+	 * used.  Trying to do it lazily is too hairy to try when there could be
+	 * multiple threads (from a non-TLS-using libpthread).  */
+	bool was_tls_init_tp_called = tls_init_tp_called;
+	if (tcbp == NULL)
+	{
+		_dl_debug_early("Calling init_tls()!\n");
+		tcbp = init_tls ();
+	}
+#endif
+
 	_dl_debug_early("Beginning relocation fixups\n");
 
 #ifdef __mips__
@@ -875,6 +969,30 @@ void _dl_get_ready_to_run(struct elf_resolve *tpnt, DL_LOADADDR_TYPE load_addr,
 			_dl_protect_relro (tpnt);
 	}
 
+#if USE_TLS
+	if (!was_tls_init_tp_called && _dl_tls_max_dtv_idx > 0)
+		++_dl_tls_generation;
+
+	_dl_debug_early("Calling _dl_allocate_tls_init()!\n");
+
+	/* Now that we have completed relocation, the initializer data
+	   for the TLS blocks has its final values and we can copy them
+	   into the main thread's TLS area, which we allocated above.  */
+	_dl_allocate_tls_init (tcbp);
+
+	/* And finally install it for the main thread.  If ld.so itself uses
+	   TLS we know the thread pointer was initialized earlier.  */
+	if (! tls_init_tp_called)
+	{
+		const char *lossage = (char *) TLS_INIT_TP (tcbp, USE___THREAD);
+		if (__builtin_expect (lossage != NULL, 0))
+		{
+			_dl_debug_early("cannot set up thread-local storage: %s\n", lossage);
+			_dl_exit(30);
+		}
+	}
+#endif /* USE_TLS */
+
 	/* OK, at this point things are pretty much ready to run.  Now we need
 	 * to touch up a few items that are required, and then we can let the
 	 * user application have at it.  Note that the dynamic linker itself
@@ -882,7 +1000,7 @@ void _dl_get_ready_to_run(struct elf_resolve *tpnt, DL_LOADADDR_TYPE load_addr,
 	 * ld.so.1, so we have to look up each symbol individually.
 	 */
 
-	_dl_envp = (unsigned long *) (intptr_t) _dl_find_hash(__C_SYMBOL_PREFIX__ "__environ", _dl_symbol_tables, NULL, 0);
+	_dl_envp = (unsigned long *) (intptr_t) _dl_find_hash(__C_SYMBOL_PREFIX__ "__environ", _dl_symbol_tables, NULL, 0, NULL);
 	if (_dl_envp)
 		*_dl_envp = (unsigned long) envp;
 
@@ -938,7 +1056,23 @@ void _dl_get_ready_to_run(struct elf_resolve *tpnt, DL_LOADADDR_TYPE load_addr,
 
 	/* Find the real malloc function and make ldso functions use that from now on */
 	_dl_malloc_function = (void* (*)(size_t)) (intptr_t) _dl_find_hash(__C_SYMBOL_PREFIX__ "malloc",
-			_dl_symbol_tables, NULL, ELF_RTYPE_CLASS_PLT);
+			_dl_symbol_tables, NULL, ELF_RTYPE_CLASS_PLT, NULL);
+
+#if USE_TLS
+	/* Find the real functions and make ldso functions use them from now on */
+	_dl_calloc_function = (void* (*)(size_t, size_t)) (intptr_t)
+		_dl_find_hash(__C_SYMBOL_PREFIX__ "calloc", _dl_symbol_tables, NULL, ELF_RTYPE_CLASS_PLT, NULL);
+					
+	_dl_realloc_function = (void* (*)(void *, size_t)) (intptr_t)
+		_dl_find_hash(__C_SYMBOL_PREFIX__ "realloc", _dl_symbol_tables, NULL, ELF_RTYPE_CLASS_PLT, NULL);
+										
+	_dl_free_function = (void (*)(void *)) (intptr_t)
+		_dl_find_hash(__C_SYMBOL_PREFIX__ "free", _dl_symbol_tables, NULL, ELF_RTYPE_CLASS_PLT, NULL);
+					
+	_dl_memalign_function = (void* (*)(size_t, size_t)) (intptr_t)
+		_dl_find_hash(__C_SYMBOL_PREFIX__ "memalign", _dl_symbol_tables, NULL, ELF_RTYPE_CLASS_PLT, NULL);
+			
+#endif
 
 	/* Notify the debugger that all objects are now mapped in.  */
 	_dl_debug_addr->r_state = RT_CONSISTENT;
diff --git a/ldso/ldso/mips/elfinterp.c b/ldso/ldso/mips/elfinterp.c
index 8d9b7c4..97a86a1 100644
--- a/ldso/ldso/mips/elfinterp.c
+++ b/ldso/ldso/mips/elfinterp.c
@@ -56,7 +56,7 @@ unsigned long __dl_runtime_resolve(unsigned long sym_index,
 	symname = strtab + sym->st_name;
 
 	new_addr = (unsigned long) _dl_find_hash(symname,
-			tpnt->symbol_scope, tpnt, ELF_RTYPE_CLASS_PLT);
+			tpnt->symbol_scope, tpnt, ELF_RTYPE_CLASS_PLT, NULL);
 	if (unlikely(!new_addr)) {
 		_dl_dprintf (2, "%s: can't resolve symbol '%s'\n",
 				_dl_progname, symname);
@@ -111,7 +111,7 @@ __dl_runtime_pltresolve(struct elf_resolve *tpnt, int reloc_entry)
 	got_addr = (char **)instr_addr;
 
 	/* Get the address of the GOT entry. */
-	new_addr = _dl_find_hash(symname, tpnt->symbol_scope, tpnt, ELF_RTYPE_CLASS_PLT);
+	new_addr = _dl_find_hash(symname, tpnt->symbol_scope, tpnt, ELF_RTYPE_CLASS_PLT, NULL);
 	if (unlikely(!new_addr)) {
 		_dl_dprintf(2, "%s: can't resolve symbol '%s' in lib '%s'.\n", _dl_progname, symname, tpnt->libname);
 		_dl_exit(1);
@@ -188,13 +188,66 @@ int _dl_parse_relocation_information(struct dyn_elf *xpnt,
 			symbol_addr = (unsigned long)_dl_find_hash(symname,
 								   tpnt->symbol_scope,
 								   tpnt,
-								   elf_machine_type_class(reloc_type));
+								   elf_machine_type_class(reloc_type), NULL);
 			if (unlikely(!symbol_addr && ELF32_ST_BIND(symtab[symtab_index].st_info) != STB_WEAK))
 				return 1;
 		}
 
 		switch (reloc_type) {
-#if _MIPS_SIM == _MIPS_SIM_ABI64
+#if USE_TLS
+# if _MIPS_SIM == _MIPS_SIM_ABI64
+		case R_MIPS_TLS_DTPMOD64:
+		case R_MIPS_TLS_DTPREL64:
+		case R_MIPS_TLS_TPREL64:
+# else
+		case R_MIPS_TLS_DTPMOD32:
+		case R_MIPS_TLS_DTPREL32:
+		case R_MIPS_TLS_TPREL32:
+# endif
+			{
+				ElfW(Sym) *sym_tls = &symtab[symtab_index];
+				struct elf_resolve *tpnt_tls = tpnt;
+
+				if (ELF32_ST_BIND(symtab[symtab_index].st_info) != STB_LOCAL) {
+					_dl_find_hash((strtab + symtab[symtab_index].st_name),
+							_dl_symbol_tables, tpnt_tls, 1, &sym_tls);
+				}
+
+				switch (reloc_type)
+	  			{
+					case R_MIPS_TLS_DTPMOD64:
+					case R_MIPS_TLS_DTPMOD32:
+						if (tpnt_tls)
+							*(ElfW(Word) *)reloc_addr = tpnt_tls->l_tls_modid;
+#if defined (__SUPPORT_LD_DEBUG__)
+_dl_dprintf(2, "TLS_DTPMOD : %s, %d, %d\n", (strtab + symtab[symtab_index].st_name), old_val, *((unsigned int *)reloc_addr));
+#endif
+						break;
+
+					case R_MIPS_TLS_DTPREL64:
+					case R_MIPS_TLS_DTPREL32:
+						*(ElfW(Word) *)reloc_addr +=
+							TLS_DTPREL_VALUE (sym_tls);
+#if defined (__SUPPORT_LD_DEBUG__)
+_dl_dprintf(2, "TLS_DTPREL : %s, %x, %x\n", (strtab + symtab[symtab_index].st_name), old_val, *((unsigned int *)reloc_addr));
+#endif
+						break;
+
+					case R_MIPS_TLS_TPREL32:
+					case R_MIPS_TLS_TPREL64:
+						CHECK_STATIC_TLS((struct link_map *)tpnt_tls);
+						*(ElfW(Word) *)reloc_addr +=
+							TLS_TPREL_VALUE (tpnt_tls, sym_tls);
+#if defined (__SUPPORT_LD_DEBUG__)
+_dl_dprintf(2, "TLS_TPREL  : %s, %x, %x\n", (strtab + symtab[symtab_index].st_name), old_val, *((unsigned int *)reloc_addr));
+#endif
+						break;
+				}
+
+				break;
+			}
+#endif /* USE_TLS */
+#if _MIPS_SIM == _MIS_SIM_ABI64
 		case (R_MIPS_64 << 8) | R_MIPS_REL32:
 #else	/* O32 || N32 */
 		case R_MIPS_REL32:
@@ -241,9 +294,9 @@ int _dl_parse_relocation_information(struct dyn_elf *xpnt,
 					_dl_dprintf(2, "symbol '%s': ", strtab + symtab[symtab_index].st_name);
 
 #if defined (__SUPPORT_LD_DEBUG__)
-				_dl_dprintf(2, "can't handle reloc type %s\n ", _dl_reltypes(reloc_type));
+				_dl_dprintf(2, "can't handle reloc type '%s' in lib '%s'\n", _dl_reltypes(reloc_type), tpnt->libname);
 #else
-				_dl_dprintf(2, "can't handle reloc type %x\n", reloc_type);
+				_dl_dprintf(2, "can't handle reloc type %x in lib '%s'\n", reloc_type, tpnt->libname);
 #endif
 				_dl_exit(1);
 			}
@@ -292,12 +345,12 @@ void _dl_perform_mips_global_got_relocations(struct elf_resolve *tpnt, int lazy)
 				}
 				else {
 					*got_entry = (unsigned long) _dl_find_hash(strtab +
-						sym->st_name, tpnt->symbol_scope, tpnt, ELF_RTYPE_CLASS_PLT);
+						sym->st_name, tpnt->symbol_scope, tpnt, ELF_RTYPE_CLASS_PLT, NULL);
 				}
 			}
 			else if (sym->st_shndx == SHN_COMMON) {
 				*got_entry = (unsigned long) _dl_find_hash(strtab +
-					sym->st_name, tpnt->symbol_scope, tpnt, ELF_RTYPE_CLASS_PLT);
+					sym->st_name, tpnt->symbol_scope, tpnt, ELF_RTYPE_CLASS_PLT, NULL);
 			}
 			else if (ELF_ST_TYPE(sym->st_info) == STT_FUNC &&
 				*got_entry != sym->st_value && tmp_lazy) {
@@ -309,7 +362,7 @@ void _dl_perform_mips_global_got_relocations(struct elf_resolve *tpnt, int lazy)
 			}
 			else {
 				*got_entry = (unsigned long) _dl_find_hash(strtab +
-					sym->st_name, tpnt->symbol_scope, tpnt, ELF_RTYPE_CLASS_PLT);
+					sym->st_name, tpnt->symbol_scope, tpnt, ELF_RTYPE_CLASS_PLT, NULL);
 			}
 
 			got_entry++;
diff --git a/ldso/ldso/sh/dl-debug.h b/ldso/ldso/sh/dl-debug.h
index e862da1..e2e74f8 100644
--- a/ldso/ldso/sh/dl-debug.h
+++ b/ldso/ldso/sh/dl-debug.h
@@ -36,6 +36,8 @@ static const char *_dl_reltypes_tab[] =
  [25]	"R_SH_SWITCH16","R_SH_SWITCH32","R_SH_USES",
  [28]	"R_SH_COUNT",	"R_SH_ALIGN",	"R_SH_CODE",	"R_SH_DATA",
  [32]	"R_SH_LABEL",	"R_SH_SWITCH8",	"R_SH_GNU_VTINHERIT","R_SH_GNU_VTENTRY",
+[144]	"R_SH_TLS_GD_32","R_SH_TLS_LD_32", "R_SH_TLS_LDO_32", "R_SH_TLS_IE_32", 
+[148]	"R_SH_TLS_LE_32","R_SH_TLS_DTPMOD32", "R_SH_TLS_DTPOFF32", "R_SH_TLS_TPOFF32",
 [160]	"R_SH_GOT32",	"R_SH_PLT32",	"R_SH_COPY",	"R_SH_GLOB_DAT",
 [164]	"R_SH_JMP_SLOT","R_SH_RELATIVE","R_SH_GOTOFF",	"R_SH_GOTPC",
 };
diff --git a/ldso/ldso/sh/dl-sysdep.h b/ldso/ldso/sh/dl-sysdep.h
index d4fc784..7937ceb 100644
--- a/ldso/ldso/sh/dl-sysdep.h
+++ b/ldso/ldso/sh/dl-sysdep.h
@@ -6,6 +6,7 @@
 /* Define this if the system uses RELOCA.  */
 #define ELF_USES_RELOCA
 #include <elf.h>
+#include <tls.h>
 /*
  * Initialization sequence for a GOT.
  */
@@ -88,9 +89,17 @@ _dl_urem(unsigned int n, unsigned int base)
    define the value.
    ELF_RTYPE_CLASS_NOCOPY iff TYPE should not be allowed to resolve to one
    of the main executable's symbols, as for a COPY reloc.  */
+#if defined USE_TLS
+# define elf_machine_type_class(type) \
+  ((((type) == R_SH_JMP_SLOT || (type) == R_SH_TLS_DTPMOD32		      \
+     || (type) == R_SH_TLS_DTPOFF32 || (type) == R_SH_TLS_TPOFF32)	      \
+    * ELF_RTYPE_CLASS_PLT)						      \
+   | (((type) == R_SH_COPY) * ELF_RTYPE_CLASS_COPY))
+#else
 #define elf_machine_type_class(type) \
   ((((type) == R_SH_JMP_SLOT) * ELF_RTYPE_CLASS_PLT)	\
    | (((type) == R_SH_COPY) * ELF_RTYPE_CLASS_COPY))
+#endif   
 
 /* Return the link-time address of _DYNAMIC.  Conveniently, this is the
    first element of the GOT.  This must be inlined in a function which
diff --git a/ldso/ldso/sh/elfinterp.c b/ldso/ldso/sh/elfinterp.c
index 964b2ea..5f2db41 100644
--- a/ldso/ldso/sh/elfinterp.c
+++ b/ldso/ldso/sh/elfinterp.c
@@ -69,7 +69,8 @@ unsigned long _dl_linux_resolver(struct elf_resolve *tpnt, int reloc_entry)
 	got_addr = (char **) instr_addr;
 
 	/* Get the address of the GOT entry */
-	new_addr = _dl_find_hash(symname, tpnt->symbol_scope, tpnt, ELF_RTYPE_CLASS_PLT);
+	new_addr = _dl_find_hash(symname, tpnt->symbol_scope, tpnt, ELF_RTYPE_CLASS_PLT, NULL);
+	
 	if (unlikely(!new_addr)) {
 		_dl_dprintf(2, "%s: can't resolve symbol '%s'\n", _dl_progname, symname);
 		_dl_exit(1);
@@ -159,6 +160,9 @@ _dl_do_reloc (struct elf_resolve *tpnt,struct dyn_elf *scope,
 	unsigned long old_val;
 #endif
 
+struct elf_resolve *tls_tpnt = NULL;
+
+
 	reloc_addr = (unsigned long *)(intptr_t) (tpnt->loadaddr + (unsigned long) rpnt->r_offset);
 	reloc_type = ELF32_R_TYPE(rpnt->r_info);
 	symtab_index = ELF32_R_SYM(rpnt->r_info);
@@ -167,21 +171,18 @@ _dl_do_reloc (struct elf_resolve *tpnt,struct dyn_elf *scope,
 
 	if (symtab_index) {
 		symbol_addr = (unsigned long) _dl_find_hash(symname, scope, tpnt,
-							    elf_machine_type_class(reloc_type));
-
+							    elf_machine_type_class(reloc_type), &tls_tpnt);
 		/*
 		 * We want to allow undefined references to weak symbols - this might
 		 * have been intentional.  We should not be linking local symbols
 		 * here, so all bases should be covered.
 		 */
-		if (!symbol_addr && ELF32_ST_BIND(symtab[symtab_index].st_info) != STB_WEAK) {
+
+		if (!symbol_addr && (ELF_ST_TYPE(symtab[symtab_index].st_info) != STT_TLS) &&(ELF32_ST_BIND(symtab[symtab_index].st_info) != STB_WEAK)) {
 			_dl_dprintf(2, "%s: can't resolve symbol '%s'\n",
 			            _dl_progname, strtab + symtab[symtab_index].st_name);
 
-			/*
-			 * The caller should handle the error: undefined reference to weak symbols
-			 * are not fatal.
-			 */
+			/* Let the caller to handle the error: it may be non fatal if called from dlopen */				
 			return 1;
 		}
 	}
@@ -189,6 +190,14 @@ _dl_do_reloc (struct elf_resolve *tpnt,struct dyn_elf *scope,
 #if defined (__SUPPORT_LD_DEBUG__)
 	old_val = *reloc_addr;
 #endif
+
+#if USE_TLS
+	/* In case of a TLS reloc, tls_tpnt NULL means we have an 'anonymous' symbol.
+	   This is the casa of a static tls variable, so the lookup module is just
+	   that one is referencing the tls variable. */	   
+	if(!tls_tpnt)
+		tls_tpnt = tpnt;
+#endif
 	switch (reloc_type) {
 		case R_SH_NONE:
 			break;
@@ -215,6 +224,20 @@ _dl_do_reloc (struct elf_resolve *tpnt,struct dyn_elf *scope,
 		case R_SH_RELATIVE:
 			*reloc_addr = (unsigned long) tpnt->loadaddr + rpnt->r_addend;
 			break;
+#if USE_TLS
+		case R_SH_TLS_DTPMOD32:
+			*reloc_addr = tls_tpnt->l_tls_modid;
+			break;
+
+		case R_SH_TLS_DTPOFF32:
+			*reloc_addr = symbol_addr;
+			break;
+			
+		case R_SH_TLS_TPOFF32:
+			CHECK_STATIC_TLS ((struct link_map *) tls_tpnt);
+			*reloc_addr = tls_tpnt->l_tls_offset + symbol_addr + rpnt->r_addend;			
+			break;
+#endif									
 		default:
 
 			return -1;
diff --git a/ldso/ldso/sparc/dl-sysdep.h b/ldso/ldso/sparc/dl-sysdep.h
index fc42de8..d35a391 100644
--- a/ldso/ldso/sparc/dl-sysdep.h
+++ b/ldso/ldso/sparc/dl-sysdep.h
@@ -97,7 +97,9 @@ sparc_mod(unsigned long m, unsigned long p)
    ELF_RTYPE_CLASS_NOCOPY iff TYPE should not be allowed to resolve to one
    of the main executable's symbols, as for a COPY reloc.  */
 #define elf_machine_type_class(type) \
-  ((((type) == R_SPARC_JMP_SLOT) * ELF_RTYPE_CLASS_PLT)			      \
+  ((((type) == R_SPARC_JMP_SLOT || (type) == R_SPARC_TLS_DTPMOD32 \
+     || (type) == R_SPARC_TLS_DTPOFF32 || (type) == R_SPARC_TLS_TPOFF32) \
+    * ELF_RTYPE_CLASS_PLT)			      \
    | (((type) == R_SPARC_COPY) * ELF_RTYPE_CLASS_COPY))
 
 /* The SPARC overlaps DT_RELA and DT_PLTREL.  */
diff --git a/ldso/ldso/sparc/elfinterp.c b/ldso/ldso/sparc/elfinterp.c
index ce3991f..5f3617b 100644
--- a/ldso/ldso/sparc/elfinterp.c
+++ b/ldso/ldso/sparc/elfinterp.c
@@ -80,7 +80,7 @@ _dl_linux_resolver(struct elf_resolve *tpnt, int reloc_entry)
 	got_addr = (char **)instr_addr;
 
 	/* Get the address of the GOT entry */
-	new_addr = _dl_find_hash(symname, tpnt->symbol_scope, tpnt, ELF_RTYPE_CLASS_PLT);
+	new_addr = _dl_find_hash(symname, tpnt->symbol_scope, tpnt, ELF_RTYPE_CLASS_PLT, NULL);
 	if (unlikely(!new_addr)) {
 		_dl_dprintf(2, "%s: Can't resolve symbol '%s'\n", _dl_progname, symname);
 		_dl_exit(1);
@@ -99,8 +99,8 @@ _dl_linux_resolver(struct elf_resolve *tpnt, int reloc_entry)
 	if (!_dl_debug_nofixups)
 #endif
 	{
-		got_addr[1] = (char *) (0x03000000 | (((unsigned int) new_addr >> 10) & 0x3fffff));
-		got_addr[2] = (char *) (0x81c06000 | ((unsigned int) new_addr & 0x3ff));
+		got_addr[1] = (char *) (OPCODE_SETHI_G1 | (((unsigned int) new_addr >> 10) & 0x3fffff));
+		got_addr[2] = (char *) (OPCODE_JMP_G1 | ((unsigned int) new_addr & 0x3ff));
 	}
 
 	return (unsigned long)new_addr;
@@ -170,6 +170,7 @@ _dl_do_reloc(struct elf_resolve *tpnt, struct dyn_elf *scope,
 	int reloc_type;
 	int symtab_index;
 	char *symname;
+    struct elf_resolve *tls_tpnt = 0;
 	ElfW(Sym) *sym;
 	ElfW(Addr) *reloc_addr;
 	ElfW(Addr) symbol_addr;
@@ -186,17 +187,25 @@ _dl_do_reloc(struct elf_resolve *tpnt, struct dyn_elf *scope,
 
 	if (symtab_index) {
 		symbol_addr = (ElfW(Addr))_dl_find_hash(symname, scope, tpnt,
-							    elf_machine_type_class(reloc_type));
+							    elf_machine_type_class(reloc_type), &tls_tpnt);
 		/*
 		 * We want to allow undefined references to weak symbols - this
 		 * might have been intentional.  We should not be linking local
 		 * symbols here, so all bases should be covered.
 		 */
-		if (unlikely(!symbol_addr && ELF_ST_BIND(sym->st_info) != STB_WEAK)) {
-			_dl_dprintf(2, "%s: can't resolve symbol '%s'\n", _dl_progname, symname);
-			_dl_exit(1);
+		if (unlikely(!symbol_addr && (ELF_ST_TYPE(sym->st_info) != STT_TLS)
+            && (ELF_ST_BIND(sym->st_info) != STB_WEAK))) {
+            /* This may be non-fatal if called from dlopen. */
+            return 1;
+
 		}
-	}
+	} else {
+        /* Relocs agfainst STN_UNDEF are usually treated as using a
+         * symbol value of zero, and using the module containing the
+         * reloc itself. */
+        symbol_addr = sym->st_value;
+        tls_tpnt = tpnt;
+    }
 
 #if defined (__SUPPORT_LD_DEBUG__)
 	old_val = *reloc_addr;
@@ -208,21 +217,6 @@ _dl_do_reloc(struct elf_resolve *tpnt, struct dyn_elf *scope,
 		case R_SPARC_NONE:
 			break;
 
-#if 0 /* these dont really seem to be useful */
-		case R_SPARC_8:
-			*(char *) reloc_addr = symbol_addr;
-			break;
-		case R_SPARC_16:
-			*(short *) reloc_addr = symbol_addr;
-			break;
-		case R_SPARC_DISP8:
-			*(char *) reloc_addr = (symbol_addr) - (Elf32_Addr) reloc_addr;
-			break;
-		case R_SPARC_DISP16:
-			*(short *) reloc_addr = (symbol_addr) - (Elf32_Addr) reloc_addr;
-			break;
-#endif
-
 		case R_SPARC_DISP32:
 			*reloc_addr = symbol_addr - (unsigned int) reloc_addr;
 			break;
@@ -232,7 +226,7 @@ _dl_do_reloc(struct elf_resolve *tpnt, struct dyn_elf *scope,
 				symbol_addr = tpnt->loadaddr + rpnt->r_addend;
 			else
 				symbol_addr += rpnt->r_addend;
-			*reloc_addr = (*reloc_addr & ~0x3ff)|(symbol_addr & 0x3ff);
+			*reloc_addr = (*reloc_addr & ~0x3ff) | (symbol_addr & 0x3ff);
 			break;
 
 		case R_SPARC_GLOB_DAT:
@@ -241,17 +235,8 @@ _dl_do_reloc(struct elf_resolve *tpnt, struct dyn_elf *scope,
 			break;
 
 		case R_SPARC_JMP_SLOT:
-/*
-value = symbol_addr;
-value += reloc->r_addend;
-disp = value - reloc_addr;
-reloc_addr[1] = OPCODE_JMP_G1 | (value & 0x3ff);
-reloc_addr[0] = OPCODE_SETHI_G1 | (value >> 10);
-			reloc_addr[1] = OPCODE_JMP_G1 | ((symbol_addr-(Elf32_Addr)reloc_addr) & 0x3ff);
-			reloc_addr[0] = OPCODE_SETHI_G1 | ((symbol_addr-(Elf32_Addr)reloc_addr) >> 10);
-*/
-			reloc_addr[1] = 0x03000000 | ((symbol_addr >> 10) & 0x3fffff);
-			reloc_addr[2] = 0x81c06000 | (symbol_addr & 0x3ff);
+            reloc_addr[1] = OPCODE_SETHI_G1 | (( symbol_addr >> 10 ) & 0x3fffff);
+            reloc_addr[2] = OPCODE_JMP_G1 | ( symbol_addr & 0x3ff );
 			break;
 
 		case R_SPARC_RELATIVE:
@@ -287,6 +272,26 @@ reloc_addr[0] = OPCODE_SETHI_G1 | (value >> 10);
 			} else
 				_dl_dprintf(_dl_debug_file, "no symbol_addr to copy !?\n");
 			break;
+#if USE_TLS
+        case R_SPARC_TLS_DTPMOD32:
+            *reloc_addr = tls_tpnt->l_tls_modid;
+            break;
+
+        case R_SPARC_TLS_DTPOFF32:
+            /* During relocation all TLS symbols are defined and used.
+             * Therefore the offset is already correct.  */
+            *reloc_addr = sym->st_value + rpnt->r_addend;
+            break;
+
+        case R_SPARC_TLS_TPOFF32:
+            /* The offset is negative, forward from the thread pointer.
+             * We know the offset of object the symbol is contained in.
+             * It is a negative value which will be added to the
+             * thread pointer.  */
+            CHECK_STATIC_TLS ((struct link_map *) tls_tpnt);
+            *reloc_addr = sym->st_value - tls_tpnt->l_tls_offset + rpnt->r_addend;
+            break;
+#endif
 		default:
 			return -1;	/* Calls _dl_exit(1). */
 	}
diff --git a/ldso/libdl/libdl.c b/ldso/libdl/libdl.c
index 8646a74..fe84059 100644
--- a/ldso/libdl/libdl.c
+++ b/ldso/libdl/libdl.c
@@ -35,13 +35,25 @@
 #include <string.h> /* Needed for 'strstr' prototype' */
 #include <stdbool.h>
 
+#ifdef __UCLIBC_HAS_TLS__
+#include <tls.h>
+#endif
+
+#if USE_TLS
+#include <ldsodefs.h>
+extern void (*_dl_init_static_tls) (struct link_map *);
+extern void _dl_add_to_slotinfo(struct link_map  *l);
+#endif
 
 #ifdef SHARED
+# if USE_TLS
+# include <dl-tls.h>
+extern struct link_map *_dl_update_slotinfo(unsigned long int req_modid);
+# endif
 
 /* When libdl is loaded as a shared library, we need to load in
  * and use a pile of symbols from ldso... */
 
-extern char *_dl_find_hash(const char *, struct dyn_elf *, struct elf_resolve *, int);
 extern struct elf_resolve * _dl_load_shared_library(int, struct dyn_elf **,
 	struct elf_resolve *, char *, int);
 extern int _dl_fixup(struct dyn_elf *rpnt, int lazy);
@@ -50,6 +62,7 @@ extern int _dl_errno;
 extern struct dyn_elf *_dl_symbol_tables;
 extern struct dyn_elf *_dl_handles;
 extern struct elf_resolve *_dl_loaded_modules;
+extern void _dl_free (void *__ptr);
 extern struct r_debug *_dl_debug_addr;
 extern unsigned long _dl_error_number;
 extern void *(*_dl_malloc_function)(size_t);
@@ -83,7 +96,7 @@ char *_dl_debug_reloc     = NULL;
 char *_dl_debug_detail    = NULL;
 char *_dl_debug_nofixups  = NULL;
 char *_dl_debug_bindings  = NULL;
-int   _dl_debug_file      = NULL;
+int   _dl_debug_file      = 2;
 #endif
 const char *_dl_progname       = "";        /* Program name */
 void *(*_dl_malloc_function)(size_t);
@@ -97,6 +110,15 @@ struct r_debug *_dl_debug_addr = NULL;
 
 #include "../ldso/dl-array.c"
 #include "../ldso/dl-debug.c"
+
+
+# if USE_TLS
+/*
+ * Giving this initialized value preallocates some surplus bytes in the
+ * static TLS area, see __libc_setup_tls (libc-tls.c).
+ */
+size_t _dl_tls_static_size = 2048;
+# endif
 #include LDSO_ELFINTERP
 #include "../ldso/dl-hash.c"
 #define _dl_trace_loaded_objects    0
@@ -133,6 +155,7 @@ static const char *const dl_error_names[] = {
 	"Not an ELF shared library",
 	"Unable to mmap file",
 	"No dynamic section",
+	"Library contains unsupported TLS",
 #ifdef ELF_USES_RELOCA
 	"Unable to process REL relocs",
 #else
@@ -142,6 +165,111 @@ static const char *const dl_error_names[] = {
 	"Unable to resolve symbol"
 };
 
+
+#if USE_TLS
+#ifdef SHARED
+/*
+ * Systems which do not have tls_index also probably have to define
+ * DONT_USE_TLS_INDEX.
+ */
+
+# ifndef __TLS_GET_ADDR
+#  define __TLS_GET_ADDR __tls_get_addr
+# endif
+
+/*
+ * Return the symbol address given the map of the module it is in and
+ *  the symbol record.  This is used in dl-sym.c.
+ */
+static void *
+internal_function
+_dl_tls_symaddr(struct link_map *map, const Elf32_Addr st_value)
+{
+# ifndef DONT_USE_TLS_INDEX
+	tls_index tmp =
+	{
+		.ti_module = map->l_tls_modid,
+		.ti_offset = st_value
+	};
+
+	return __TLS_GET_ADDR (&tmp);
+# else
+	return __TLS_GET_ADDR (map->l_tls_modid, st_value);
+# endif
+}
+#endif
+
+/* Returns true we an non-empty was found.  */
+static bool
+remove_slotinfo(size_t idx, struct dtv_slotinfo_list *listp, size_t disp,
+	 bool should_be_there)
+{
+	if(idx - disp >= listp->len)
+	{
+		if(listp->next == NULL)
+		{
+			/*
+			 * The index is not actually valid in the slotinfo list,
+			 * because this object was closed before it was fully set
+			 * up due to some error.
+			 */
+			_dl_assert(!should_be_there);
+		}
+		else
+		{
+			if(remove_slotinfo(idx, listp->next, disp + listp->len,
+					should_be_there))
+				return true;
+
+			/*
+			 * No non-empty entry. Search from the end of this element's
+			 * slotinfo array.
+			 */
+			idx = disp + listp->len;
+		}
+	}
+	else
+	{
+		struct link_map *old_map = listp->slotinfo[idx - disp].map;
+
+		/*
+		 * The entry might still be in its unused state if we are
+		 * closing an object that wasn't fully set up.
+		 */
+		if(__builtin_expect(old_map != NULL, 1))
+		{
+			_dl_assert(old_map->l_tls_modid == idx);
+
+			/* Mark the entry as unused. */
+			listp->slotinfo[idx - disp].gen = _dl_tls_generation + 1;
+			listp->slotinfo[idx - disp].map = NULL;
+		}
+
+		/*
+		 * If this is not the last currently used entry no need to
+		 * look further.
+		 */
+		if (idx != _dl_tls_max_dtv_idx)
+			return true;
+	}
+
+	while(idx - disp > (disp == 0 ? 1 + _dl_tls_static_nelem : 0))
+	{
+		--idx;
+
+		if(listp->slotinfo[idx - disp].map != NULL)
+		{
+			/* Found a new last used index.  */
+			_dl_tls_max_dtv_idx = idx;
+			return true;
+		}
+	}
+
+	/* No non-entry in this list element.  */
+	return false;
+}
+#endif
+
 void dl_cleanup(void) __attribute__ ((destructor));
 void dl_cleanup(void)
 {
@@ -165,6 +293,9 @@ void *dlopen(const char *libname, int flag)
 	unsigned int nlist, i;
 	struct elf_resolve **init_fini_list;
 	static bool _dl_init;
+#if USE_TLS
+	bool any_tls = false;
+#endif
 
 	/* A bit of sanity checking... */
 	if (!(flag & (RTLD_LAZY|RTLD_NOW))) {
@@ -396,6 +527,52 @@ void *dlopen(const char *libname, int flag)
 	}
 	/* TODO:  Should we set the protections of all pages back to R/O now ? */
 
+
+#if USE_TLS
+
+	for (i=0; i < nlist; i++) {
+		struct elf_resolve *tmp_tpnt = init_fini_list[i];
+		/* Only add TLS memory if this object is loaded now and
+		   therefore is not yet initialized.  */
+
+		if (!(tmp_tpnt->init_flag & INIT_FUNCS_CALLED)
+		/* Only if the module defines thread local data. */
+			&& __builtin_expect (tmp_tpnt->l_tls_blocksize > 0, 0)) {
+			
+			/* Now that we know the object is loaded successfully add
+			modules containing TLS data to the slot info table.  We
+			might have to increase its size.  */
+			_dl_add_to_slotinfo ((struct link_map*)tmp_tpnt);
+			
+			/* It is the case in which we couldn't perform TLS static 
+			   initialization at relocation time, and we delayed it until
+			   the relocation has been completed. */
+
+			if (tmp_tpnt->l_need_tls_init) {
+				tmp_tpnt->l_need_tls_init = 0;
+# ifdef SHARED
+				/* Update the slot information data for at least the
+				generation of the DSO we are allocating data for.  */
+				_dl_update_slotinfo (tmp_tpnt->l_tls_modid);
+# endif
+
+				_dl_init_static_tls((struct link_map*)tmp_tpnt);
+				_dl_assert (tmp_tpnt->l_need_tls_init == 0);
+		}
+
+		/* We have to bump the generation counter. */
+		any_tls = true;
+		}
+	}
+
+	/* Bump the generation number if necessary.  */
+	if (any_tls && __builtin_expect (++_dl_tls_generation == 0, 0)) {
+		_dl_debug_early("TLS generation counter wrapped! Please report this.");
+		_dl_exit(30);
+	}
+
+#endif
+
 	/* Notify the debugger we have added some objects. */
 	if (_dl_debug_addr) {
 		dl_brk = (void (*)(void)) _dl_debug_addr->r_brk;
@@ -445,6 +622,7 @@ void *dlsym(void *vhandle, const char *name)
 	ElfW(Addr) from;
 	struct dyn_elf *rpnt;
 	void *ret;
+	struct elf_resolve *tls_tpnt = NULL;
 	/* Nastiness to support underscore prefixes.  */
 #ifdef __UCLIBC_UNDERSCORES__
 	char tmp_buf[80];
@@ -499,7 +677,15 @@ void *dlsym(void *vhandle, const char *name)
 	tpnt = NULL;
 	if (handle == _dl_symbol_tables)
 		tpnt = handle->dyn; /* Only search RTLD_GLOBAL objs if global object */
-	ret = _dl_find_hash(name2, handle, tpnt, ELF_RTYPE_CLASS_DLSYM);
+	ret = _dl_find_hash(name2, handle, NULL, 0, &tls_tpnt);
+
+#if defined USE_TLS && defined SHARED
+	if(tls_tpnt) {
+		/* The found symbol is a thread-local storage variable.
+		Return the address for to the current thread.  */
+		ret = _dl_tls_symaddr ((struct link_map *)tls_tpnt, (Elf32_Addr)ret);
+	}
+#endif
 
 	/*
 	 * Nothing found.
@@ -532,6 +718,12 @@ static int do_dlclose(void *vhandle, int need_fini)
 	struct dyn_elf *handle;
 	unsigned int end;
 	unsigned int i, j;
+#if USE_TLS
+	bool any_tls = false;
+	size_t tls_free_start = NO_TLS_OFFSET;
+	size_t tls_free_end = NO_TLS_OFFSET; 
+	struct link_map *tls_lmap;
+#endif
 
 	handle = (struct dyn_elf *) vhandle;
 	if (handle == _dl_symbol_tables)
@@ -587,6 +779,118 @@ static int do_dlclose(void *vhandle, int need_fini)
 				if (end < ppnt->p_vaddr + ppnt->p_memsz)
 					end = ppnt->p_vaddr + ppnt->p_memsz;
 			}
+
+#if USE_TLS
+			/* Do the cast to make things easy. */
+			tls_lmap = (struct link_map *) tpnt;
+
+			/* Remove the object from the dtv slotinfo array if it uses TLS. */
+			if (__builtin_expect (tls_lmap->l_tls_blocksize > 0, 0))
+			{
+				any_tls = true;
+
+				if (_dl_tls_dtv_slotinfo_list != NULL
+						&& ! remove_slotinfo (tls_lmap->l_tls_modid,
+						_dl_tls_dtv_slotinfo_list, 0,
+						(tpnt->init_flag & INIT_FUNCS_CALLED)))
+					/* All dynamically loaded modules with TLS are unloaded. */
+					_dl_tls_max_dtv_idx = _dl_tls_static_nelem;
+
+				if (tls_lmap->l_tls_offset != NO_TLS_OFFSET)
+				{
+					/*
+					 * Collect a contiguous chunk built from the objects in
+					 * this search list, going in either direction.  When the
+					 * whole chunk is at the end of the used area then we can
+					 * reclaim it.
+					 */
+# if TLS_TCB_AT_TP
+					if (tls_free_start == NO_TLS_OFFSET
+						|| (size_t) tls_lmap->l_tls_offset == tls_free_start)
+					{
+						/* Extend the contiguous chunk being reclaimed. */
+						tls_free_start
+							= tls_lmap->l_tls_offset -
+							  tls_lmap->l_tls_blocksize;
+
+						if (tls_free_end == NO_TLS_OFFSET)
+							tls_free_end = tls_lmap->l_tls_offset;
+					}
+					else if (tls_lmap->l_tls_offset - tls_lmap->l_tls_blocksize
+							== tls_free_end)
+						/* Extend the chunk backwards.  */
+						tls_free_end = tls_lmap->l_tls_offset;
+					else
+					{
+						/*
+						 * This isn't contiguous with the last chunk freed.
+						 * One of them will be leaked unless we can free
+						 * one block right away.
+						 */
+						if (tls_free_end == _dl_tls_static_used)
+						{
+							_dl_tls_static_used = tls_free_start;
+							tls_free_end = tls_lmap->l_tls_offset;
+							tls_free_start
+								= tls_free_end - tls_lmap->l_tls_blocksize;
+						}
+						else if ((size_t) tls_lmap->l_tls_offset
+								== _dl_tls_static_used)
+							_dl_tls_static_used = tls_lmap->l_tls_offset -
+								tls_lmap->l_tls_blocksize;
+						else if (tls_free_end < (size_t) tls_lmap->l_tls_offset)
+						{
+							/*
+							 * We pick the later block. It has a chance
+							 * to be freed.
+							 */
+							tls_free_end = tls_lmap->l_tls_offset;
+							tls_free_start = tls_free_end -
+								tls_lmap->l_tls_blocksize;
+						}
+					}
+# elif TLS_DTV_AT_TP
+					if ((size_t) tls_lmap->l_tls_offset == tls_free_end)
+						/* Extend the contiguous chunk being reclaimed. */
+						tls_free_end -= tls_lmap->l_tls_blocksize;
+					else if (tls_lmap->l_tls_offset + tls_lmap->l_tls_blocksize
+							== tls_free_start)
+						/* Extend the chunk backwards. */
+						tls_free_start = tls_lmap->l_tls_offset;
+					else
+					{
+						/*
+						 * This isn't contiguous with the last chunk
+						 * freed. One of them will be leaked.
+						 */
+						if (tls_free_end == _dl_tls_static_used)
+							_dl_tls_static_used = tls_free_start;
+						tls_free_start = tls_lmap->l_tls_offset;
+						tls_free_end = tls_free_start +
+							tls_lmap->l_tls_blocksize;
+					}
+# else
+#  error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined"
+# endif
+				} else {
+
+#define TLS_DTV_UNALLOCATED	((void *) -1l)
+
+					dtv_t *dtv = THREAD_DTV ();
+
+					_dl_assert(!(dtv[tls_lmap->l_tls_modid].pointer.is_static));
+					if(dtv[tls_lmap->l_tls_modid].pointer.val != TLS_DTV_UNALLOCATED) {
+						/* Note that free is called for NULL is well.  We
+						deallocate even if it is this dtv entry we are
+						supposed to load.  The reason is that we call
+						memalign and not malloc.  */		   
+						_dl_free (dtv[tls_lmap->l_tls_modid].pointer.val);
+						dtv[tls_lmap->l_tls_modid].pointer.val = TLS_DTV_UNALLOCATED;
+					}
+				}			
+			}
+#endif
+
 			DL_LIB_UNMAP (tpnt, end);
 			/* Free elements in RTLD_LOCAL scope list */
 			for (runp = tpnt->rtld_local; runp; runp = tmp) {
@@ -638,6 +942,21 @@ static int do_dlclose(void *vhandle, int need_fini)
 	free(handle->init_fini.init_fini);
 	free(handle);
 
+#if USE_TLS
+	/* If we removed any object which uses TLS bump the generation counter.  */
+	if (any_tls)
+	{
+		if (__builtin_expect (++_dl_tls_generation == 0, 0))
+		{
+			_dl_debug_early ("TLS generation counter wrapped!  Please report to the uClibc mailing list.\n");
+			_dl_exit(30);
+		}
+
+		if (tls_free_end == _dl_tls_static_used)
+			_dl_tls_static_used = tls_free_start;
+	}
+#endif
+
 	if (_dl_debug_addr) {
 		dl_brk = (void (*)(void)) _dl_debug_addr->r_brk;
 		if (dl_brk != NULL) {
-- 
1.6.2.5



More information about the uClibc mailing list