Browse Source

add sse fenv support on i386 through hwcap

the sse and x87 rounding modes should be always the same,
the visible exception flags are the bitwise or of the two
fenv states (so it's enough to query the rounding mode or
raise exceptions on one fenv)
Szabolcs Nagy 11 years ago
parent
commit
ebc10fa176
2 changed files with 61 additions and 9 deletions
  1. 61 6
      src/fenv/i386/fenv.s
  2. 0 3
      src/fenv/x86_64/fenv.s

+ 61 - 6
src/fenv/i386/fenv.s

@@ -1,14 +1,26 @@
+.hidden __hwcap
+
 .global feclearexcept
 .global feclearexcept
 .type feclearexcept,@function
 .type feclearexcept,@function
 feclearexcept:	
 feclearexcept:	
 	mov 4(%esp),%ecx
 	mov 4(%esp),%ecx
 	not %ecx
 	not %ecx
-	test $0x3f,%ecx
+		# consider sse fenv as well if the cpu has XMM capability
+	call 1f
+1:	addl $__hwcap-1b,(%esp)
+	pop %edx
+	testl $0x02000000,(%edx)
+	jz 1f
+	stmxcsr 4(%esp)
+	and %ecx,4(%esp)
+	ldmxcsr 4(%esp)
+1:	test $0x3f,%ecx
 	jnz 2f
 	jnz 2f
 1:	fnclex
 1:	fnclex
 	xor %eax,%eax
 	xor %eax,%eax
 	ret
 	ret
 2:	fnstsw %ax
 2:	fnstsw %ax
+		# TODO: only load/store fenv if exceptions arent clear yet
 	and %ecx,%eax
 	and %ecx,%eax
 	jz 1b
 	jz 1b
 	sub $32,%esp
 	sub $32,%esp
@@ -41,7 +53,18 @@ fesetround:
 	andb $0xf3,1(%esp)
 	andb $0xf3,1(%esp)
 	or %ch,1(%esp)
 	or %ch,1(%esp)
 	fldcw (%esp)
 	fldcw (%esp)
-	pop %ecx
+		# consider sse fenv as well if the cpu has XMM capability
+	call 1f
+1:	addl $__hwcap-1b,(%esp)
+	pop %edx
+	testl $0x02000000,(%edx)
+	jmp 1f
+	stmxcsr (%esp)
+	shl $3,%ch
+	andb $0x9f,1(%esp)
+	or %ch,1(%esp)
+	ldmxcsr (%esp)
+1:	pop %ecx
 	ret
 	ret
 
 
 .global fegetround
 .global fegetround
@@ -59,7 +82,18 @@ fegetenv:
 	mov 4(%esp),%ecx
 	mov 4(%esp),%ecx
 	xor %eax,%eax
 	xor %eax,%eax
 	fnstenv (%ecx)
 	fnstenv (%ecx)
-	ret
+		# consider sse fenv as well if the cpu has XMM capability
+	call 1f
+1:	addl $__hwcap-1b,(%esp)
+	pop %edx
+	testl $0x02000000,(%edx)
+	jz 1f
+	push %eax
+	stmxcsr (%esp)
+	pop %edx
+	and $0x3f,%edx
+	or %edx,4(%ecx)
+1:	ret
 
 
 .global fesetenv
 .global fesetenv
 .type fesetenv,@function
 .type fesetenv,@function
@@ -69,7 +103,8 @@ fesetenv:
 	inc %ecx
 	inc %ecx
 	jz 1f
 	jz 1f
 	fldenv -1(%ecx)
 	fldenv -1(%ecx)
-	ret
+	movl -1(%ecx),%ecx
+	jmp 2f
 1:	push %eax
 1:	push %eax
 	push %eax
 	push %eax
 	push %eax
 	push %eax
@@ -79,12 +114,32 @@ fesetenv:
 	pushl $0x37f
 	pushl $0x37f
 	fldenv (%esp)
 	fldenv (%esp)
 	add $28,%esp
 	add $28,%esp
-	ret
+		# consider sse fenv as well if the cpu has XMM capability
+2:	call 1f
+1:	addl $__hwcap-1b,(%esp)
+	pop %edx
+	testl $0x02000000,(%edx)
+	jz 1f
+		# mxcsr := same rounding mode, cleared exceptions, default mask
+	and $0xc00,%ecx
+	shl $3,%ecx
+	or $0x1f80,%ecx
+	mov %ecx,4(%esp)
+	ldmxcsr 4(%esp)
+1:	ret
 
 
 .global fetestexcept
 .global fetestexcept
 .type fetestexcept,@function
 .type fetestexcept,@function
 fetestexcept:
 fetestexcept:
 	mov 4(%esp),%ecx
 	mov 4(%esp),%ecx
 	fnstsw %ax
 	fnstsw %ax
-	and %ecx,%eax
+		# consider sse fenv as well if the cpu has XMM capability
+	call 1f
+1:	addl $__hwcap-1b,(%esp)
+	pop %edx
+	testl $0x02000000,(%edx)
+	jz 1f
+	stmxcsr 4(%esp)
+	or 4(%esp),%eax
+1:	and %ecx,%eax
 	ret
 	ret

+ 0 - 3
src/fenv/x86_64/fenv.s

@@ -28,9 +28,6 @@ feraiseexcept:
 	stmxcsr -8(%rsp)
 	stmxcsr -8(%rsp)
 	or %edi,-8(%rsp)
 	or %edi,-8(%rsp)
 	ldmxcsr -8(%rsp)
 	ldmxcsr -8(%rsp)
-	fnstenv -32(%rsp)
-	or %edi,-28(%rsp)
-	fldenv -32(%rsp)
 	xor %eax,%eax
 	xor %eax,%eax
 	ret
 	ret