|
@@ -1,14 +1,26 @@
|
|
|
+.hidden __hwcap
|
|
|
+
|
|
|
.global feclearexcept
|
|
|
.type feclearexcept,@function
|
|
|
feclearexcept:
|
|
|
mov 4(%esp),%ecx
|
|
|
not %ecx
|
|
|
- test $0x3f,%ecx
|
|
|
+ # consider sse fenv as well if the cpu has XMM capability
|
|
|
+ call 1f
|
|
|
+1: addl $__hwcap-1b,(%esp)
|
|
|
+ pop %edx
|
|
|
+ testl $0x02000000,(%edx)
|
|
|
+ jz 1f
|
|
|
+ stmxcsr 4(%esp)
|
|
|
+ and %ecx,4(%esp)
|
|
|
+ ldmxcsr 4(%esp)
|
|
|
+1: test $0x3f,%ecx
|
|
|
jnz 2f
|
|
|
1: fnclex
|
|
|
xor %eax,%eax
|
|
|
ret
|
|
|
2: fnstsw %ax
|
|
|
+ # TODO: only load/store fenv if exceptions arent clear yet
|
|
|
and %ecx,%eax
|
|
|
jz 1b
|
|
|
sub $32,%esp
|
|
@@ -41,7 +53,18 @@ fesetround:
|
|
|
andb $0xf3,1(%esp)
|
|
|
or %ch,1(%esp)
|
|
|
fldcw (%esp)
|
|
|
- pop %ecx
|
|
|
+ # consider sse fenv as well if the cpu has XMM capability
|
|
|
+ call 1f
|
|
|
+1: addl $__hwcap-1b,(%esp)
|
|
|
+ pop %edx
|
|
|
+ testl $0x02000000,(%edx)
|
|
|
+ jmp 1f
|
|
|
+ stmxcsr (%esp)
|
|
|
+ shl $3,%ch
|
|
|
+ andb $0x9f,1(%esp)
|
|
|
+ or %ch,1(%esp)
|
|
|
+ ldmxcsr (%esp)
|
|
|
+1: pop %ecx
|
|
|
ret
|
|
|
|
|
|
.global fegetround
|
|
@@ -59,7 +82,18 @@ fegetenv:
|
|
|
mov 4(%esp),%ecx
|
|
|
xor %eax,%eax
|
|
|
fnstenv (%ecx)
|
|
|
- ret
|
|
|
+ # consider sse fenv as well if the cpu has XMM capability
|
|
|
+ call 1f
|
|
|
+1: addl $__hwcap-1b,(%esp)
|
|
|
+ pop %edx
|
|
|
+ testl $0x02000000,(%edx)
|
|
|
+ jz 1f
|
|
|
+ push %eax
|
|
|
+ stmxcsr (%esp)
|
|
|
+ pop %edx
|
|
|
+ and $0x3f,%edx
|
|
|
+ or %edx,4(%ecx)
|
|
|
+1: ret
|
|
|
|
|
|
.global fesetenv
|
|
|
.type fesetenv,@function
|
|
@@ -69,7 +103,8 @@ fesetenv:
|
|
|
inc %ecx
|
|
|
jz 1f
|
|
|
fldenv -1(%ecx)
|
|
|
- ret
|
|
|
+ movl -1(%ecx),%ecx
|
|
|
+ jmp 2f
|
|
|
1: push %eax
|
|
|
push %eax
|
|
|
push %eax
|
|
@@ -79,12 +114,32 @@ fesetenv:
|
|
|
pushl $0x37f
|
|
|
fldenv (%esp)
|
|
|
add $28,%esp
|
|
|
- ret
|
|
|
+ # consider sse fenv as well if the cpu has XMM capability
|
|
|
+2: call 1f
|
|
|
+1: addl $__hwcap-1b,(%esp)
|
|
|
+ pop %edx
|
|
|
+ testl $0x02000000,(%edx)
|
|
|
+ jz 1f
|
|
|
+ # mxcsr := same rounding mode, cleared exceptions, default mask
|
|
|
+ and $0xc00,%ecx
|
|
|
+ shl $3,%ecx
|
|
|
+ or $0x1f80,%ecx
|
|
|
+ mov %ecx,4(%esp)
|
|
|
+ ldmxcsr 4(%esp)
|
|
|
+1: ret
|
|
|
|
|
|
.global fetestexcept
|
|
|
.type fetestexcept,@function
|
|
|
fetestexcept:
|
|
|
mov 4(%esp),%ecx
|
|
|
fnstsw %ax
|
|
|
- and %ecx,%eax
|
|
|
+ # consider sse fenv as well if the cpu has XMM capability
|
|
|
+ call 1f
|
|
|
+1: addl $__hwcap-1b,(%esp)
|
|
|
+ pop %edx
|
|
|
+ testl $0x02000000,(%edx)
|
|
|
+ jz 1f
|
|
|
+ stmxcsr 4(%esp)
|
|
|
+ or 4(%esp),%eax
|
|
|
+1: and %ecx,%eax
|
|
|
ret
|