Browse Source

i386 vsyscall support (vdso-provided sysenter/syscall instruction based)

this doubles the performance of the fastest syscalls on the atom I
tested it on; improvement is reportedly much more dramatic on
worst-case cpus. cannot be used for cancellable syscalls.
Rich Felker 12 years ago
parent
commit
12e9b4faf6
4 changed files with 69 additions and 82 deletions
  1. 7 66
      arch/i386/syscall_arch.h
  2. 3 0
      src/env/__libc_start_main.c
  3. 58 16
      src/internal/i386/syscall.s
  4. 1 0
      src/internal/libc.c

+ 7 - 66
arch/i386/syscall_arch.h

@@ -8,111 +8,52 @@
 static inline long __syscall0(long n)
 {
 	unsigned long __ret;
-	__asm__ __volatile__ ("int $128" : "=a"(__ret) : "a"(n) : "memory");
+	__asm__ __volatile__ ("call __vsyscall" : "=a"(__ret) : "a"(n) : "memory");
 	return __ret;
 }
 
-#ifndef __PIC__
-
 static inline long __syscall1(long n, long a1)
 {
 	unsigned long __ret;
-	__asm__ __volatile__ ("int $128" : "=a"(__ret) : "a"(n), "b"(a1) : "memory");
+	__asm__ __volatile__ ("call __vsyscall" : "=a"(__ret) : "a"(n), "d"(a1) : "memory");
 	return __ret;
 }
 
 static inline long __syscall2(long n, long a1, long a2)
 {
 	unsigned long __ret;
-	__asm__ __volatile__ ("int $128" : "=a"(__ret) : "a"(n), "b"(a1), "c"(a2) : "memory");
+	__asm__ __volatile__ ("call __vsyscall" : "=a"(__ret) : "a"(n), "d"(a1), "c"(a2) : "memory");
 	return __ret;
 }
 
 static inline long __syscall3(long n, long a1, long a2, long a3)
 {
 	unsigned long __ret;
-	__asm__ __volatile__ ("int $128" : "=a"(__ret) : "a"(n), "b"(a1), "c"(a2), "d"(a3) : "memory");
+	__asm__ __volatile__ ("call __vsyscall" : "=a"(__ret) : "a"(n), "d"(a1), "c"(a2), "D"(a3) : "memory");
 	return __ret;
 }
 
 static inline long __syscall4(long n, long a1, long a2, long a3, long a4)
 {
 	unsigned long __ret;
-	__asm__ __volatile__ ("int $128" : "=a"(__ret) : "a"(n), "b"(a1), "c"(a2), "d"(a3), "S"(a4) : "memory");
+	__asm__ __volatile__ ("call __vsyscall" : "=a"(__ret) : "a"(n), "d"(a1), "c"(a2), "D"(a3), "S"(a4) : "memory");
 	return __ret;
 }
 
 static inline long __syscall5(long n, long a1, long a2, long a3, long a4, long a5)
 {
 	unsigned long __ret;
-	__asm__ __volatile__ ("int $128" : "=a"(__ret) : "a"(n), "b"(a1), "c"(a2), "d"(a3), "S"(a4), "D"(a5) : "memory");
+	__asm__ __volatile__ ("push %6 ; call __vsyscall ; add $4,%%esp" : "=a"(__ret) : "a"(n), "d"(a1), "c"(a2), "D"(a3), "S"(a4), "g"(a5) : "memory");
 	return __ret;
 }
 
 static inline long __syscall6(long n, long a1, long a2, long a3, long a4, long a5, long a6)
 {
 	unsigned long __ret;
-	__asm__ __volatile__ ("pushl %7 ; pushl %%ebp ; mov 4(%%esp),%%ebp ; int $128 ; popl %%ebp ; popl %%ecx"
-		: "=a"(__ret) : "a"(n), "b"(a1), "c"(a2), "d"(a3), "S"(a4), "D"(a5), "g"(a6) : "memory");
-	return __ret;
-}
-
-#else
-
-static inline long __syscall1(long n, long a1)
-{
-	unsigned long __ret;
-	__asm__ __volatile__ ("xchg %2,%%ebx ; int $128 ; xchg %2,%%ebx"
-		: "=a"(__ret) : "a"(n), "d"(a1) : "memory");
-	return __ret;
-}
-
-static inline long __syscall2(long n, long a1, long a2)
-{
-	unsigned long __ret;
-	__asm__ __volatile__ ("xchg %2,%%ebx ; int $128 ; xchg %2,%%ebx"
-		: "=a"(__ret) : "a"(n), "d"(a1), "c"(a2) : "memory");
-	return __ret;
-}
-
-static inline long __syscall3(long n, long a1, long a2, long a3)
-{
-	unsigned long __ret;
-	__asm__ __volatile__ ("xchg %2,%%ebx ; int $128 ; xchg %2,%%ebx"
-		: "=a"(__ret) : "a"(n), "S"(a1), "c"(a2), "d"(a3) : "memory");
+	__asm__ __volatile__ ("push %6 ; call __vsyscall6 ; add $4,%%esp" : "=a"(__ret) : "a"(n), "d"(a1), "c"(a2), "D"(a3), "S"(a4), "g"((long[]){a5, a6}) : "memory");
 	return __ret;
 }
 
-static inline long __syscall4(long n, long a1, long a2, long a3, long a4)
-{
-	unsigned long __ret;
-	__asm__ __volatile__ ("xchg %2,%%ebx ; int $128 ; xchg %2,%%ebx"
-		: "=a"(__ret) : "a"(n), "D"(a1), "c"(a2), "d"(a3), "S"(a4) : "memory");
-	return __ret;
-}
-
-#if 0
-static inline long __syscall5(long n, long a1, long a2, long a3, long a4, long a5)
-{
-	unsigned long __ret;
-	__asm__ __volatile__ ("pushl %2 ; pushl %%ebx ; mov 4(%%esp),%%ebx ; int $128 ; popl %%ebx ; popl %%ecx"
-		: "=a"(__ret) : "a"(n), "g"(a1), "c"(a2), "d"(a3), "S"(a4), "D"(a5) : "memory");
-	return __ret;
-}
-#else
-static inline long __syscall5(long n, long a1, long a2, long a3, long a4, long a5)
-{
-	return (__syscall)(n, a1, a2, a3, a4, a5);
-}
-#endif
-
-static inline long __syscall6(long n, long a1, long a2, long a3, long a4, long a5, long a6)
-{
-	return (__syscall)(n, a1, a2, a3, a4, a5, a6);
-}
-
-#endif
-
 
 #define __SC_socket      1
 #define __SC_bind        2

+ 3 - 0
src/env/__libc_start_main.c

@@ -6,6 +6,8 @@ void __init_security(size_t *);
 
 #define AUX_CNT 38
 
+extern size_t __hwcap, __sysinfo;
+
 void __init_libc(char **envp)
 {
 	size_t i, *auxv, aux[AUX_CNT] = { 0 };
@@ -14,6 +16,7 @@ void __init_libc(char **envp)
 	libc.auxv = auxv = (void *)(envp+i+1);
 	for (i=0; auxv[i]; i+=2) if (auxv[i]<AUX_CNT) aux[auxv[i]] = auxv[i+1];
 	__hwcap = aux[AT_HWCAP];
+	__sysinfo = aux[AT_SYSINFO];
 
 	__init_tls(aux);
 	__init_security(aux);

+ 58 - 16
src/internal/i386/syscall.s

@@ -1,20 +1,62 @@
+.hidden __sysinfo
+
+.global __vsyscall
+.type __vsyscall,@function
+__vsyscall:
+	push %edi
+	push %ebx
+	mov %edx,%ebx
+	mov %edi,%edx
+	mov 12(%esp),%edi
+	push %eax
+	call 1f
+2:	pop %ebx
+	pop %ebx
+	pop %edi
+	ret
+
+1:	mov (%esp),%eax
+	add $[__sysinfo-2b],%eax
+	mov (%eax),%eax
+	test %eax,%eax
+	jz 1f
+	push %eax
+	mov 8(%esp),%eax
+	ret
+1:	mov 4(%esp),%eax
+	int $128
+	ret
+
+.global __vsyscall6
+.type __vsyscall6,@function
+__vsyscall6:
+	push %ebp
+	push %eax
+	mov 12(%esp), %ebp
+	mov (%ebp), %eax
+	mov 4(%ebp), %ebp
+	push %eax
+	mov 4(%esp),%eax
+	call __vsyscall
+	pop %ebp
+	pop %ebp
+	pop %ebp
+	ret
+
 .global __syscall
 .type __syscall,@function
 __syscall:
-	pushl %ebx
-	pushl %esi
-	pushl %edi
-	pushl %ebp
-	movl 20(%esp),%eax
-	movl 24(%esp),%ebx
-	movl 28(%esp),%ecx
-	movl 32(%esp),%edx
-	movl 36(%esp),%esi
-	movl 40(%esp),%edi
-	movl 44(%esp),%ebp
-	int $128
-	popl %ebp
-	popl %edi
-	popl %esi
-	popl %ebx
+	lea 24(%esp),%eax
+	push %esi
+	push %edi
+	push %eax
+	mov 16(%esp),%eax
+	mov 20(%esp),%edx
+	mov 24(%esp),%ecx
+	mov 28(%esp),%edi
+	mov 32(%esp),%esi
+	call __vsyscall6
+	pop %edi
+	pop %edi
+	pop %esi
 	ret

+ 1 - 0
src/internal/libc.c

@@ -15,3 +15,4 @@ __asm__(".hidden __libc");
 #endif
 
 size_t __hwcap;
+size_t __sysinfo;