Browse Source

fix TLS layout of TLS variant I when there is a gap above TP

In TLS variant I the TLS is above TP (or above a fixed offset from TP)
but on some targets there is a reserved gap above TP before TLS starts.

This matters for the local-exec tls access model when the offsets of
TLS variables from the TP are hard coded by the linker into the
executable, so the libc must compute these offsets the same way as the
linker.  The tls offset of the main module has to be

	alignup(GAP_ABOVE_TP, main_tls_align).

If there is no TLS in the main module then the gap can be ignored
since musl does not use it and the tls access models of shared
libraries are not affected.

The previous setup only worked if (tls_align & -GAP_ABOVE_TP) == 0
(i.e. TLS did not require large alignment) because the gap was
treated as a fixed offset from TP.  Now the TP points at the end
of the pthread struct (which is aligned) and there is a gap above
it (which may also need alignment).

The fix required changing TP_ADJ and __pthread_self on affected
targets (aarch64, arm and sh) and in the tlsdesc asm the offset to
access the dtv changed too.
Szabolcs Nagy 6 years ago
parent
commit
610c5a8524

+ 3 - 2
arch/aarch64/pthread_arch.h

@@ -2,10 +2,11 @@ static inline struct pthread *__pthread_self()
 {
 	char *self;
 	__asm__ __volatile__ ("mrs %0,tpidr_el0" : "=r"(self));
-	return (void*)(self + 16 - sizeof(struct pthread));
+	return (void*)(self - sizeof(struct pthread));
 }
 
 #define TLS_ABOVE_TP
-#define TP_ADJ(p) ((char *)(p) + sizeof(struct pthread) - 16)
+#define GAP_ABOVE_TP 16
+#define TP_ADJ(p) ((char *)(p) + sizeof(struct pthread))
 
 #define MC_PC pc

+ 1 - 1
arch/aarch64/reloc.h

@@ -10,7 +10,7 @@
 
 #define NO_LEGACY_INITFINI
 
-#define TPOFF_K 16
+#define TPOFF_K 0
 
 #define REL_SYMBOLIC    R_AARCH64_ABS64
 #define REL_GOT         R_AARCH64_GLOB_DAT

+ 4 - 3
arch/arm/pthread_arch.h

@@ -5,7 +5,7 @@ static inline pthread_t __pthread_self()
 {
 	char *p;
 	__asm__ __volatile__ ( "mrc p15,0,%0,c13,c0,3" : "=r"(p) );
-	return (void *)(p+8-sizeof(struct pthread));
+	return (void *)(p-sizeof(struct pthread));
 }
 
 #else
@@ -21,12 +21,13 @@ static inline pthread_t __pthread_self()
 	extern uintptr_t __attribute__((__visibility__("hidden"))) __a_gettp_ptr;
 	register uintptr_t p __asm__("r0");
 	__asm__ __volatile__ ( BLX " %1" : "=r"(p) : "r"(__a_gettp_ptr) : "cc", "lr" );
-	return (void *)(p+8-sizeof(struct pthread));
+	return (void *)(p-sizeof(struct pthread));
 }
 
 #endif
 
 #define TLS_ABOVE_TP
-#define TP_ADJ(p) ((char *)(p) + sizeof(struct pthread) - 8)
+#define GAP_ABOVE_TP 8
+#define TP_ADJ(p) ((char *)(p) + sizeof(struct pthread))
 
 #define MC_PC arm_pc

+ 1 - 1
arch/arm/reloc.h

@@ -16,7 +16,7 @@
 
 #define NO_LEGACY_INITFINI
 
-#define TPOFF_K 8
+#define TPOFF_K 0
 
 #define REL_SYMBOLIC    R_ARM_ABS32
 #define REL_GOT         R_ARM_GLOB_DAT

+ 1 - 0
arch/mips/pthread_arch.h

@@ -11,6 +11,7 @@ static inline struct pthread *__pthread_self()
 }
 
 #define TLS_ABOVE_TP
+#define GAP_ABOVE_TP 0
 #define TP_ADJ(p) ((char *)(p) + sizeof(struct pthread) + 0x7000)
 
 #define DTP_OFFSET 0x8000

+ 1 - 0
arch/mips64/pthread_arch.h

@@ -11,6 +11,7 @@ static inline struct pthread *__pthread_self()
 }
 
 #define TLS_ABOVE_TP
+#define GAP_ABOVE_TP 0
 #define TP_ADJ(p) ((char *)(p) + sizeof(struct pthread) + 0x7000)
 
 #define DTP_OFFSET 0x8000

+ 1 - 0
arch/mipsn32/pthread_arch.h

@@ -11,6 +11,7 @@ static inline struct pthread *__pthread_self()
 }
 
 #define TLS_ABOVE_TP
+#define GAP_ABOVE_TP 0
 #define TP_ADJ(p) ((char *)(p) + sizeof(struct pthread) + 0x7000)
 
 #define DTP_OFFSET 0x8000

+ 1 - 0
arch/or1k/pthread_arch.h

@@ -12,6 +12,7 @@ static inline struct pthread *__pthread_self()
 }
 
 #define TLS_ABOVE_TP
+#define GAP_ABOVE_TP 0
 #define TP_ADJ(p) ((char *)(p) + sizeof(struct pthread))
 
 #define MC_PC regs.pc

+ 1 - 0
arch/powerpc/pthread_arch.h

@@ -11,6 +11,7 @@ static inline struct pthread *__pthread_self()
 }
                         
 #define TLS_ABOVE_TP
+#define GAP_ABOVE_TP 0
 #define TP_ADJ(p) ((char *)(p) + sizeof(struct pthread) + 0x7000)
 
 #define DTP_OFFSET 0x8000

+ 1 - 0
arch/powerpc64/pthread_arch.h

@@ -6,6 +6,7 @@ static inline struct pthread *__pthread_self()
 }
 
 #define TLS_ABOVE_TP
+#define GAP_ABOVE_TP 0
 #define TP_ADJ(p) ((char *)(p) + sizeof(struct pthread) + 0x7000)
 
 #define DTP_OFFSET 0x8000

+ 3 - 2
arch/sh/pthread_arch.h

@@ -2,10 +2,11 @@ static inline struct pthread *__pthread_self()
 {
 	char *self;
 	__asm__ __volatile__ ("stc gbr,%0" : "=r" (self) );
-	return (struct pthread *) (self + 8 - sizeof(struct pthread));
+	return (struct pthread *) (self - sizeof(struct pthread));
 }
 
 #define TLS_ABOVE_TP
-#define TP_ADJ(p) ((char *)(p) + sizeof(struct pthread) - 8)
+#define GAP_ABOVE_TP 8
+#define TP_ADJ(p) ((char *)(p) + sizeof(struct pthread))
 
 #define MC_PC sc_pc

+ 1 - 1
arch/sh/reloc.h

@@ -20,7 +20,7 @@
 
 #define LDSO_ARCH "sh" ENDIAN_SUFFIX FP_SUFFIX ABI_SUFFIX
 
-#define TPOFF_K 8
+#define TPOFF_K 0
 
 #define REL_SYMBOLIC    R_SH_DIR32
 #define REL_OFFSET      R_SH_REL32

+ 3 - 2
ldso/dynlink.c

@@ -1594,8 +1594,9 @@ _Noreturn void __dls3(size_t *sp)
 		libc.tls_head = tls_tail = &app.tls;
 		app.tls_id = tls_cnt = 1;
 #ifdef TLS_ABOVE_TP
-		app.tls.offset = 0;
-		tls_offset = app.tls.size
+		app.tls.offset = GAP_ABOVE_TP;
+		app.tls.offset += -GAP_ABOVE_TP & (app.tls.align-1);
+		tls_offset = app.tls.offset + app.tls.size
 			+ ( -((uintptr_t)app.tls.image + app.tls.size)
 			& (app.tls.align-1) );
 #else

+ 8 - 2
src/env/__init_tls.c

@@ -104,13 +104,19 @@ static void static_init_tls(size_t *aux)
 
 	main_tls.size += (-main_tls.size - (uintptr_t)main_tls.image)
 		& (main_tls.align-1);
-	if (main_tls.align < MIN_TLS_ALIGN) main_tls.align = MIN_TLS_ALIGN;
-#ifndef TLS_ABOVE_TP
+#ifdef TLS_ABOVE_TP
+	main_tls.offset = GAP_ABOVE_TP;
+	main_tls.offset += -GAP_ABOVE_TP & (main_tls.align-1);
+#else
 	main_tls.offset = main_tls.size;
 #endif
+	if (main_tls.align < MIN_TLS_ALIGN) main_tls.align = MIN_TLS_ALIGN;
 
 	libc.tls_align = main_tls.align;
 	libc.tls_size = 2*sizeof(void *) + sizeof(struct pthread)
+#ifdef TLS_ABOVE_TP
+		+ main_tls.offset
+#endif
 		+ main_tls.size + main_tls.align
 		+ MIN_TLS_ALIGN-1 & -MIN_TLS_ALIGN;
 

+ 2 - 3
src/ldso/aarch64/tlsdesc.s

@@ -14,7 +14,7 @@ __tlsdesc_static:
 // size_t __tlsdesc_dynamic(size_t *a)
 // {
 // 	struct {size_t modidx,off;} *p = (void*)a[1];
-// 	size_t *dtv = *(size_t**)(tp + 16 - 8);
+// 	size_t *dtv = *(size_t**)(tp - 8);
 // 	if (p->modidx <= dtv[0])
 // 		return dtv[p->modidx] + p->off - tp;
 // 	return __tls_get_new(p) - tp;
@@ -28,8 +28,7 @@ __tlsdesc_dynamic:
 	mrs x1,tpidr_el0      // tp
 	ldr x0,[x0,#8]        // p
 	ldr x2,[x0]           // p->modidx
-	add x3,x1,#8
-	ldr x3,[x3]           // dtv
+	ldr x3,[x1,#-8]       // dtv
 	ldr x4,[x3]           // dtv[0]
 	cmp x2,x4
 	b.hi 1f