Преглед изворни кода

add sse fenv support on i386 through hwcap

the sse and x87 rounding modes should be always the same,
the visible exception flags are the bitwise or of the two
fenv states (so it's enough to query the rounding mode or
raise exceptions on one fenv)
Szabolcs Nagy пре 11 година
родитељ
комит
ebc10fa176
2 измењених фајлова са 61 додато и 9 уклоњено
  1. 61 6
      src/fenv/i386/fenv.s
  2. 0 3
      src/fenv/x86_64/fenv.s

+ 61 - 6
src/fenv/i386/fenv.s

@@ -1,14 +1,26 @@
+.hidden __hwcap
+
 .global feclearexcept
 .type feclearexcept,@function
 feclearexcept:	
 	mov 4(%esp),%ecx
 	not %ecx
-	test $0x3f,%ecx
+		# consider sse fenv as well if the cpu has XMM capability
+	call 1f
+1:	addl $__hwcap-1b,(%esp)
+	pop %edx
+	testl $0x02000000,(%edx)
+	jz 1f
+	stmxcsr 4(%esp)
+	and %ecx,4(%esp)
+	ldmxcsr 4(%esp)
+1:	test $0x3f,%ecx
 	jnz 2f
 1:	fnclex
 	xor %eax,%eax
 	ret
 2:	fnstsw %ax
+		# TODO: only load/store fenv if exceptions arent clear yet
 	and %ecx,%eax
 	jz 1b
 	sub $32,%esp
@@ -41,7 +53,18 @@ fesetround:
 	andb $0xf3,1(%esp)
 	or %ch,1(%esp)
 	fldcw (%esp)
-	pop %ecx
+		# consider sse fenv as well if the cpu has XMM capability
+	call 1f
+1:	addl $__hwcap-1b,(%esp)
+	pop %edx
+	testl $0x02000000,(%edx)
+	jmp 1f
+	stmxcsr (%esp)
+	shl $3,%ch
+	andb $0x9f,1(%esp)
+	or %ch,1(%esp)
+	ldmxcsr (%esp)
+1:	pop %ecx
 	ret
 
 .global fegetround
@@ -59,7 +82,18 @@ fegetenv:
 	mov 4(%esp),%ecx
 	xor %eax,%eax
 	fnstenv (%ecx)
-	ret
+		# consider sse fenv as well if the cpu has XMM capability
+	call 1f
+1:	addl $__hwcap-1b,(%esp)
+	pop %edx
+	testl $0x02000000,(%edx)
+	jz 1f
+	push %eax
+	stmxcsr (%esp)
+	pop %edx
+	and $0x3f,%edx
+	or %edx,4(%ecx)
+1:	ret
 
 .global fesetenv
 .type fesetenv,@function
@@ -69,7 +103,8 @@ fesetenv:
 	inc %ecx
 	jz 1f
 	fldenv -1(%ecx)
-	ret
+	movl -1(%ecx),%ecx
+	jmp 2f
 1:	push %eax
 	push %eax
 	push %eax
@@ -79,12 +114,32 @@ fesetenv:
 	pushl $0x37f
 	fldenv (%esp)
 	add $28,%esp
-	ret
+		# consider sse fenv as well if the cpu has XMM capability
+2:	call 1f
+1:	addl $__hwcap-1b,(%esp)
+	pop %edx
+	testl $0x02000000,(%edx)
+	jz 1f
+		# mxcsr := same rounding mode, cleared exceptions, default mask
+	and $0xc00,%ecx
+	shl $3,%ecx
+	or $0x1f80,%ecx
+	mov %ecx,4(%esp)
+	ldmxcsr 4(%esp)
+1:	ret
 
 .global fetestexcept
 .type fetestexcept,@function
 fetestexcept:
 	mov 4(%esp),%ecx
 	fnstsw %ax
-	and %ecx,%eax
+		# consider sse fenv as well if the cpu has XMM capability
+	call 1f
+1:	addl $__hwcap-1b,(%esp)
+	pop %edx
+	testl $0x02000000,(%edx)
+	jz 1f
+	stmxcsr 4(%esp)
+	or 4(%esp),%eax
+1:	and %ecx,%eax
 	ret

+ 0 - 3
src/fenv/x86_64/fenv.s

@@ -28,9 +28,6 @@ feraiseexcept:
 	stmxcsr -8(%rsp)
 	or %edi,-8(%rsp)
 	ldmxcsr -8(%rsp)
-	fnstenv -32(%rsp)
-	or %edi,-28(%rsp)
-	fldenv -32(%rsp)
 	xor %eax,%eax
 	ret