-
Notifications
You must be signed in to change notification settings - Fork 15.3k
optimize is_finite assembly
#169402
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
optimize is_finite assembly
#169402
Conversation
is_finite on floating pointsis_finite assembly
|
@llvm/pr-subscribers-backend-amdgpu @llvm/pr-subscribers-backend-aarch64 Author: Folkert de Vries (folkertdev) ChangesFixes #169270 Changes the implementation of src(float):
movd eax, xmm0
and eax, 2147483647
cmp eax, 2139095040
setl al
ret
tgt(float):
subss xmm0, xmm0
ucomiss xmm0, xmm0
setnp al
retIn theory this should be an improvement on other targets too, but locally at least I don't observe any changes in tests. So either this logic is just untested, or most backends have a custom lowering. I did add a new test for aarch64. For x64 this is an improvement, for x87 I'm not entirely sure. From the issue > A reminder: This optimization should be disabled if I'm checking Patch is 23.93 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/169402.diff 3 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 920dff935daed..36102d3227afb 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -9122,8 +9122,16 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
; // Detect finite numbers of f80 by checking individual classes because
// they have different settings of the explicit integer bit.
else if ((Test & fcFinite) == fcFinite) {
- // finite(V) ==> abs(V) < exp_mask
- PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ExpMaskV, ISD::SETLT);
+ // Float arithmetic may emit FP exceptions.
+ if (Flags.hasNoFPExcept()) {
+ // finite(V) ==> (V - V) == (V - V)
+ SDValue Sub = DAG.getNode(ISD::FSUB, DL, OperandVT, Op, Op);
+ SDValue Zero = DAG.getConstantFP(0.0, DL, OperandVT);
+ PartialRes = DAG.getSetCC(DL, ResultVT, Sub, Zero, ISD::SETO);
+ } else {
+ // finite(V) ==> abs(V) < exp_mask
+ PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ExpMaskV, ISD::SETLT);
+ }
Test &= ~fcFinite;
} else if ((Test & fcFinite) == fcPosFinite) {
// finite(V) && V > 0 ==> V < exp_mask
diff --git a/llvm/test/CodeGen/AArch64/is_fpclass.ll b/llvm/test/CodeGen/AArch64/is_fpclass.ll
new file mode 100644
index 0000000000000..64782a1c7bb36
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/is_fpclass.ll
@@ -0,0 +1,49 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc -mtriple=aarch64 -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI
+
+define i1 @isfinite_f(float %x) {
+; CHECK-SD-LABEL: isfinite_f:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: fsub s0, s0, s0
+; CHECK-SD-NEXT: fcmp s0, s0
+; CHECK-SD-NEXT: cset w0, vc
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: isfinite_f:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: fmov w9, s0
+; CHECK-GI-NEXT: mov w8, #2139095040 // =0x7f800000
+; CHECK-GI-NEXT: and w9, w9, #0x7fffffff
+; CHECK-GI-NEXT: cmp w9, w8
+; CHECK-GI-NEXT: cset w0, lo
+; CHECK-GI-NEXT: ret
+entry:
+ %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 504) ; 0x1f8 = "finite"
+ ret i1 %0
+}
+
+define i1 @not_isfinite_f(float %x) {
+; CHECK-SD-LABEL: not_isfinite_f:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: fsub s0, s0, s0
+; CHECK-SD-NEXT: fcmp s0, s0
+; CHECK-SD-NEXT: cset w0, vs
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: not_isfinite_f:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: fmov w9, s0
+; CHECK-GI-NEXT: mov w8, #2139095040 // =0x7f800000
+; CHECK-GI-NEXT: and w9, w9, #0x7fffffff
+; CHECK-GI-NEXT: cmp w9, w8
+; CHECK-GI-NEXT: cset w8, eq
+; CHECK-GI-NEXT: cset w9, hi
+; CHECK-GI-NEXT: orr w0, w8, w9
+; CHECK-GI-NEXT: ret
+entry:
+ %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 519) ; ~0x1f8 = "~finite"
+ ret i1 %0
+}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK: {{.*}}
diff --git a/llvm/test/CodeGen/X86/is_fpclass.ll b/llvm/test/CodeGen/X86/is_fpclass.ll
index 97136dafa6c2c..adab828de8897 100644
--- a/llvm/test/CodeGen/X86/is_fpclass.ll
+++ b/llvm/test/CodeGen/X86/is_fpclass.ll
@@ -240,18 +240,20 @@ entry:
define i1 @isfinite_f(float %x) {
; X86-LABEL: isfinite_f:
; X86: # %bb.0: # %entry
-; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
-; X86-NEXT: andl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
-; X86-NEXT: setl %al
+; X86-NEXT: flds {{[0-9]+}}(%esp)
+; X86-NEXT: fsub %st, %st(0)
+; X86-NEXT: fucomp %st(0)
+; X86-NEXT: fnstsw %ax
+; X86-NEXT: # kill: def $ah killed $ah killed $ax
+; X86-NEXT: sahf
+; X86-NEXT: setnp %al
; X86-NEXT: retl
;
; X64-LABEL: isfinite_f:
; X64: # %bb.0: # %entry
-; X64-NEXT: movd %xmm0, %eax
-; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
-; X64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
-; X64-NEXT: setl %al
+; X64-NEXT: subss %xmm0, %xmm0
+; X64-NEXT: ucomiss %xmm0, %xmm0
+; X64-NEXT: setnp %al
; X64-NEXT: retq
entry:
%0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 504) ; 0x1f8 = "finite"
@@ -261,18 +263,20 @@ entry:
define i1 @not_isfinite_f(float %x) {
; X86-LABEL: not_isfinite_f:
; X86: # %bb.0: # %entry
-; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
-; X86-NEXT: andl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
-; X86-NEXT: setge %al
+; X86-NEXT: flds {{[0-9]+}}(%esp)
+; X86-NEXT: fsub %st, %st(0)
+; X86-NEXT: fucomp %st(0)
+; X86-NEXT: fnstsw %ax
+; X86-NEXT: # kill: def $ah killed $ah killed $ax
+; X86-NEXT: sahf
+; X86-NEXT: setp %al
; X86-NEXT: retl
;
; X64-LABEL: not_isfinite_f:
; X64: # %bb.0: # %entry
-; X64-NEXT: movd %xmm0, %eax
-; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
-; X64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
-; X64-NEXT: setge %al
+; X64-NEXT: subss %xmm0, %xmm0
+; X64-NEXT: ucomiss %xmm0, %xmm0
+; X64-NEXT: setp %al
; X64-NEXT: retq
entry:
%0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 519) ; ~0x1f8 = "~finite"
@@ -1017,18 +1021,21 @@ entry:
define i1 @isfinite_f_strictfp(float %x) strictfp {
; X86-LABEL: isfinite_f_strictfp:
; X86: # %bb.0: # %entry
-; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
-; X86-NEXT: andl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
-; X86-NEXT: setl %al
+; X86-NEXT: flds {{[0-9]+}}(%esp)
+; X86-NEXT: fsub %st, %st(0)
+; X86-NEXT: fucomp %st(0)
+; X86-NEXT: wait
+; X86-NEXT: fnstsw %ax
+; X86-NEXT: # kill: def $ah killed $ah killed $ax
+; X86-NEXT: sahf
+; X86-NEXT: setnp %al
; X86-NEXT: retl
;
; X64-LABEL: isfinite_f_strictfp:
; X64: # %bb.0: # %entry
-; X64-NEXT: movd %xmm0, %eax
-; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
-; X64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
-; X64-NEXT: setl %al
+; X64-NEXT: subss %xmm0, %xmm0
+; X64-NEXT: ucomiss %xmm0, %xmm0
+; X64-NEXT: setnp %al
; X64-NEXT: retq
entry:
%0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 504) strictfp ; 0x1f8 = "finite"
@@ -1038,18 +1045,21 @@ entry:
define i1 @not_isfinite_f_strictfp(float %x) strictfp {
; X86-LABEL: not_isfinite_f_strictfp:
; X86: # %bb.0: # %entry
-; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
-; X86-NEXT: andl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
-; X86-NEXT: setge %al
+; X86-NEXT: flds {{[0-9]+}}(%esp)
+; X86-NEXT: fsub %st, %st(0)
+; X86-NEXT: fucomp %st(0)
+; X86-NEXT: wait
+; X86-NEXT: fnstsw %ax
+; X86-NEXT: # kill: def $ah killed $ah killed $ax
+; X86-NEXT: sahf
+; X86-NEXT: setp %al
; X86-NEXT: retl
;
; X64-LABEL: not_isfinite_f_strictfp:
; X64: # %bb.0: # %entry
-; X64-NEXT: movd %xmm0, %eax
-; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
-; X64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
-; X64-NEXT: setge %al
+; X64-NEXT: subss %xmm0, %xmm0
+; X64-NEXT: ucomiss %xmm0, %xmm0
+; X64-NEXT: setp %al
; X64-NEXT: retq
entry:
%0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 519) strictfp ; ~0x1f8 = ~"finite"
@@ -1150,31 +1160,21 @@ entry:
define i1 @isfinite_d(double %x) {
; X86-LABEL: isfinite_d:
; X86: # %bb.0: # %entry
-; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
-; X86-NEXT: andl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: cmpl $2146435072, %eax # imm = 0x7FF00000
-; X86-NEXT: setl %al
+; X86-NEXT: fldl {{[0-9]+}}(%esp)
+; X86-NEXT: fsub %st, %st(0)
+; X86-NEXT: fucomp %st(0)
+; X86-NEXT: fnstsw %ax
+; X86-NEXT: # kill: def $ah killed $ah killed $ax
+; X86-NEXT: sahf
+; X86-NEXT: setnp %al
; X86-NEXT: retl
;
-; X64-GENERIC-LABEL: isfinite_d:
-; X64-GENERIC: # %bb.0: # %entry
-; X64-GENERIC-NEXT: movq %xmm0, %rax
-; X64-GENERIC-NEXT: movabsq $9223372036854775807, %rcx # imm = 0x7FFFFFFFFFFFFFFF
-; X64-GENERIC-NEXT: andq %rax, %rcx
-; X64-GENERIC-NEXT: movabsq $9218868437227405312, %rax # imm = 0x7FF0000000000000
-; X64-GENERIC-NEXT: cmpq %rax, %rcx
-; X64-GENERIC-NEXT: setl %al
-; X64-GENERIC-NEXT: retq
-;
-; X64-NDD-LABEL: isfinite_d:
-; X64-NDD: # %bb.0: # %entry
-; X64-NDD-NEXT: movq %xmm0, %rax
-; X64-NDD-NEXT: movabsq $9223372036854775807, %rcx # imm = 0x7FFFFFFFFFFFFFFF
-; X64-NDD-NEXT: andq %rcx, %rax
-; X64-NDD-NEXT: movabsq $9218868437227405312, %rcx # imm = 0x7FF0000000000000
-; X64-NDD-NEXT: cmpq %rcx, %rax
-; X64-NDD-NEXT: setl %al
-; X64-NDD-NEXT: retq
+; X64-LABEL: isfinite_d:
+; X64: # %bb.0: # %entry
+; X64-NEXT: subsd %xmm0, %xmm0
+; X64-NEXT: ucomisd %xmm0, %xmm0
+; X64-NEXT: setnp %al
+; X64-NEXT: retq
entry:
%0 = tail call i1 @llvm.is.fpclass.f64(double %x, i32 504) ; 0x1f8 = "finite"
ret i1 %0
@@ -2032,18 +2032,20 @@ entry:
define i1 @isinf_or_nan_f(float %x) {
; X86-LABEL: isinf_or_nan_f:
; X86: # %bb.0: # %entry
-; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
-; X86-NEXT: andl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
-; X86-NEXT: setge %al
+; X86-NEXT: flds {{[0-9]+}}(%esp)
+; X86-NEXT: fsub %st, %st(0)
+; X86-NEXT: fucomp %st(0)
+; X86-NEXT: fnstsw %ax
+; X86-NEXT: # kill: def $ah killed $ah killed $ax
+; X86-NEXT: sahf
+; X86-NEXT: setp %al
; X86-NEXT: retl
;
; X64-LABEL: isinf_or_nan_f:
; X64: # %bb.0: # %entry
-; X64-NEXT: movd %xmm0, %eax
-; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
-; X64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
-; X64-NEXT: setge %al
+; X64-NEXT: subss %xmm0, %xmm0
+; X64-NEXT: ucomiss %xmm0, %xmm0
+; X64-NEXT: setp %al
; X64-NEXT: retq
entry:
%0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 519) ; 0x204|0x3 = "inf|nan"
@@ -2053,18 +2055,20 @@ entry:
define i1 @not_isinf_or_nan_f(float %x) {
; X86-LABEL: not_isinf_or_nan_f:
; X86: # %bb.0: # %entry
-; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
-; X86-NEXT: andl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
-; X86-NEXT: setl %al
+; X86-NEXT: flds {{[0-9]+}}(%esp)
+; X86-NEXT: fsub %st, %st(0)
+; X86-NEXT: fucomp %st(0)
+; X86-NEXT: fnstsw %ax
+; X86-NEXT: # kill: def $ah killed $ah killed $ax
+; X86-NEXT: sahf
+; X86-NEXT: setnp %al
; X86-NEXT: retl
;
; X64-LABEL: not_isinf_or_nan_f:
; X64: # %bb.0: # %entry
-; X64-NEXT: movd %xmm0, %eax
-; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
-; X64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
-; X64-NEXT: setl %al
+; X64-NEXT: subss %xmm0, %xmm0
+; X64-NEXT: ucomiss %xmm0, %xmm0
+; X64-NEXT: setnp %al
; X64-NEXT: retq
entry:
%0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 504) ; ~(0x204|0x3) = "~(inf|nan)"
@@ -2274,16 +2278,26 @@ define i1 @is_plus_inf_or_qnan_f(float %x) {
define i1 @not_is_plus_inf_or_snan_f(float %x) {
; X86-LABEL: not_is_plus_inf_or_snan_f:
; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: cmpl $-8388608, %eax # imm = 0xFF800000
-; X86-NEXT: sete %cl
-; X86-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
-; X86-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
-; X86-NEXT: setl %dl
-; X86-NEXT: orb %cl, %dl
-; X86-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000
+; X86-NEXT: pushl %eax
+; X86-NEXT: .cfi_def_cfa_offset 8
+; X86-NEXT: flds {{[0-9]+}}(%esp)
+; X86-NEXT: fsts (%esp)
+; X86-NEXT: movl (%esp), %ecx
+; X86-NEXT: cmpl $-8388608, %ecx # imm = 0xFF800000
+; X86-NEXT: sete %dl
+; X86-NEXT: fsub %st, %st(0)
+; X86-NEXT: fucomp %st(0)
+; X86-NEXT: fnstsw %ax
+; X86-NEXT: # kill: def $ah killed $ah killed $ax
+; X86-NEXT: sahf
+; X86-NEXT: setnp %ah
+; X86-NEXT: andl $2147483647, %ecx # imm = 0x7FFFFFFF
+; X86-NEXT: cmpl $2143289344, %ecx # imm = 0x7FC00000
; X86-NEXT: setge %al
; X86-NEXT: orb %dl, %al
+; X86-NEXT: orb %ah, %al
+; X86-NEXT: popl %ecx
+; X86-NEXT: .cfi_def_cfa_offset 4
; X86-NEXT: retl
;
; X64-GENERIC-LABEL: not_is_plus_inf_or_snan_f:
@@ -2291,10 +2305,11 @@ define i1 @not_is_plus_inf_or_snan_f(float %x) {
; X64-GENERIC-NEXT: movd %xmm0, %eax
; X64-GENERIC-NEXT: cmpl $-8388608, %eax # imm = 0xFF800000
; X64-GENERIC-NEXT: sete %cl
-; X64-GENERIC-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
-; X64-GENERIC-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
-; X64-GENERIC-NEXT: setl %dl
+; X64-GENERIC-NEXT: subss %xmm0, %xmm0
+; X64-GENERIC-NEXT: ucomiss %xmm0, %xmm0
+; X64-GENERIC-NEXT: setnp %dl
; X64-GENERIC-NEXT: orb %cl, %dl
+; X64-GENERIC-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
; X64-GENERIC-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000
; X64-GENERIC-NEXT: setge %al
; X64-GENERIC-NEXT: orb %dl, %al
@@ -2305,10 +2320,11 @@ define i1 @not_is_plus_inf_or_snan_f(float %x) {
; X64-NDD-NEXT: movd %xmm0, %eax
; X64-NDD-NEXT: cmpl $-8388608, %eax # imm = 0xFF800000
; X64-NDD-NEXT: sete %cl
-; X64-NDD-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
-; X64-NDD-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
-; X64-NDD-NEXT: setl %dl
+; X64-NDD-NEXT: subss %xmm0, %xmm0
+; X64-NDD-NEXT: ucomiss %xmm0, %xmm0
+; X64-NDD-NEXT: setnp %dl
; X64-NDD-NEXT: orb %dl, %cl
+; X64-NDD-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
; X64-NDD-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000
; X64-NDD-NEXT: setge %al
; X64-NDD-NEXT: orb %cl, %al
@@ -2320,55 +2336,67 @@ define i1 @not_is_plus_inf_or_snan_f(float %x) {
define i1 @not_is_plus_inf_or_qnan_f(float %x) {
; X86-LABEL: not_is_plus_inf_or_qnan_f:
; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: pushl %eax
+; X86-NEXT: .cfi_def_cfa_offset 8
+; X86-NEXT: flds {{[0-9]+}}(%esp)
+; X86-NEXT: fsts (%esp)
+; X86-NEXT: movl (%esp), %eax
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: andl $2147483647, %ecx # imm = 0x7FFFFFFF
; X86-NEXT: cmpl $2143289344, %ecx # imm = 0x7FC00000
; X86-NEXT: setl %dl
; X86-NEXT: cmpl $2139095041, %ecx # imm = 0x7F800001
-; X86-NEXT: setge %dh
-; X86-NEXT: andb %dl, %dh
+; X86-NEXT: setge %ch
+; X86-NEXT: andb %dl, %ch
; X86-NEXT: cmpl $-8388608, %eax # imm = 0xFF800000
-; X86-NEXT: sete %dl
-; X86-NEXT: cmpl $2139095040, %ecx # imm = 0x7F800000
-; X86-NEXT: setl %al
-; X86-NEXT: orb %dl, %al
-; X86-NEXT: orb %dh, %al
+; X86-NEXT: sete %cl
+; X86-NEXT: fsub %st, %st(0)
+; X86-NEXT: fucomp %st(0)
+; X86-NEXT: fnstsw %ax
+; X86-NEXT: # kill: def $ah killed $ah killed $ax
+; X86-NEXT: sahf
+; X86-NEXT: setnp %al
+; X86-NEXT: orb %ch, %cl
+; X86-NEXT: orb %al, %cl
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: popl %ecx
+; X86-NEXT: .cfi_def_cfa_offset 4
; X86-NEXT: retl
;
; X64-GENERIC-LABEL: not_is_plus_inf_or_qnan_f:
; X64-GENERIC: # %bb.0:
; X64-GENERIC-NEXT: movd %xmm0, %eax
-; X64-GENERIC-NEXT: movl %eax, %ecx
-; X64-GENERIC-NEXT: andl $2147483647, %ecx # imm = 0x7FFFFFFF
-; X64-GENERIC-NEXT: cmpl $2143289344, %ecx # imm = 0x7FC00000
-; X64-GENERIC-NEXT: setl %dl
-; X64-GENERIC-NEXT: cmpl $2139095041, %ecx # imm = 0x7F800001
-; X64-GENERIC-NEXT: setge %sil
-; X64-GENERIC-NEXT: andb %dl, %sil
; X64-GENERIC-NEXT: cmpl $-8388608, %eax # imm = 0xFF800000
-; X64-GENERIC-NEXT: sete %dl
-; X64-GENERIC-NEXT: cmpl $2139095040, %ecx # imm = 0x7F800000
-; X64-GENERIC-NEXT: setl %al
+; X64-GENERIC-NEXT: sete %cl
+; X64-GENERIC-NEXT: subss %xmm0, %xmm0
+; X64-GENERIC-NEXT: ucomiss %xmm0, %xmm0
+; X64-GENERIC-NEXT: setnp %dl
+; X64-GENERIC-NEXT: orb %cl, %dl
+; X64-GENERIC-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
+; X64-GENERIC-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000
+; X64-GENERIC-NEXT: setl %cl
+; X64-GENERIC-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001
+; X64-GENERIC-NEXT: setge %al
+; X64-GENERIC-NEXT: andb %cl, %al
; X64-GENERIC-NEXT: orb %dl, %al
-; X64-GENERIC-NEXT: orb %sil, %al
; X64-GENERIC-NEXT: retq
;
; X64-NDD-LABEL: not_is_plus_inf_or_qnan_f:
; X64-NDD: # %bb.0:
; X64-NDD-NEXT: movd %xmm0, %eax
-; X64-NDD-NEXT: andl $2147483647, %eax, %ecx # imm = 0x7FFFFFFF
-; X64-NDD-NEXT: cmpl $2143289344, %ecx # imm = 0x7FC00000
-; X64-NDD-NEXT: setl %dl
-; X64-NDD-NEXT: cmpl $2139095041, %ecx # imm = 0x7F800001
-; X64-NDD-NEXT: setge %sil
-; X64-NDD-NEXT: andb %sil, %dl
; X64-NDD-NEXT: cmpl $-8388608, %eax # imm = 0xFF800000
-; X64-NDD-NEXT: sete %al
-; X64-NDD-NEXT: cmpl $2139095040, %ecx # imm = 0x7F800000
-; X64-NDD-NEXT: setl %cl
+; X64-NDD-NEXT: sete %cl
+; X64-NDD-NEXT: subss %xmm0, %xmm0
+; X64-NDD-NEXT: ucomiss %xmm0, %xmm0
+; X64-NDD-NEXT: setnp %dl
+; X64-NDD-NEXT: orb %dl, %cl
+; X64-NDD-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
+; X64-NDD-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000
+; X64-NDD-NEXT: setl %dl
+; X64-NDD-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001
+; X64-NDD-NEXT: setge %al
+; X64-NDD-NEXT: andb %dl, %al
; X64-NDD-NEXT: orb %cl, %al
-; X64-NDD-NEXT: orb %dl, %al
; X64-NDD-NEXT: retq
%class = tail call i1 @llvm.is.fpclass.f32(float %x, i32 509) ; ~(+inf|qnan)
ret i1 %class
@@ -2451,16 +2479,26 @@ define i1 @is_minus_inf_or_qnan_f(float %x) {
define i1 @not_is_minus_inf_or_snan_f(float %x) {
; X86-LABEL: not_is_minus_inf_or_snan_f:
; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
-; X86-NEXT: sete %cl
-; X86-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
-; X86-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
-; X86-NEXT: setl %dl
-; X86-NEXT: orb %cl, %dl
-; X86-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000
+; X86-NEXT: pushl %eax
+; X86-NEXT: .cfi_def_cfa_offset 8
+; X86-NEXT: flds {{[0-9]+}}(%esp)
+; X86-NEXT: fsts (%esp)
+; X86-NEXT: movl (%esp), %ecx
+; X86-NEXT: cmpl $2139095040, %ecx # imm = 0x7F800000
+; X86-NEXT: sete %dl
+; X86-NEXT: fsub %st, %st(0)
+; X86-NEXT: fucomp %st(0)
+; X86-NEXT: fnstsw %ax
+; X86-NEXT: # kill: def $ah killed $ah killed $ax
+; X86-NEXT: sahf
+; X86-NEXT: setnp %ah
+; X86-NEXT: andl $2147483647, %ecx # imm = 0x7FFFFFFF
+; X86-NEXT: cmpl $2143289344, %ecx # imm = 0x7FC00000
; X86-NEXT: setge %al
; X86-NEXT: orb %dl, %al
+; X86-NEXT: orb %ah, %al
+; X86-NEXT: popl %ecx
+; X86-NEXT: .cfi_def_cfa_offset 4
; X86-NEXT: retl
;
; X64-GENERIC-LABEL: not_is_minus_inf_or_snan_f:
@@ -2468,10 +2506,11 @@ define i1 @not_is_minus_inf_or_snan_f(float %x) {
; X64-GENERIC-NEXT: movd %xmm0, %eax
; X64-GENERIC-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
; X64-GENERIC-NEXT: sete %cl
-; X64-GENERIC-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
-; X64-GENERIC-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
-; X64-GENERIC-NEXT: setl %dl
+; X64-GENERIC-NEXT: subss %xmm0, %xmm0
+; X64-GENERIC-NEXT: ucomiss %xmm0, %xmm0
+; X64-GENERIC-NEXT: setnp %dl
; X64-GENERIC-NEXT: orb %cl, %dl
+; X64-GENERIC-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
; X64-GENERIC-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000
; X64-GENERIC-NEXT: setge %al
; X64-GENERIC-NEXT: orb %dl, %al
@@ -2482,10 +2521,11 @@ define i1 @not_is_minus_inf_or_snan_f(float %x) {
; X64-NDD-NEXT: movd %xmm0, %eax
; X64-NDD-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
; X64-NDD-NEXT: sete %cl
-; X64-NDD-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
-; X64-NDD-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
-; X64-NDD-NEXT: setl %dl
+; X64-NDD-NEXT: subss %xmm0, %xmm0
+; X64-NDD-NEXT: ucomiss %xmm0, %xmm0
+; X64-NDD-NEXT: setnp %dl
; X64-NDD-NEXT: orb %dl, %cl
+; X64-NDD-...
[truncated]
|
🐧 Linux x64 Test Results
|
arsenm
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you also test this on
4a4b918 to
8896b93
Compare
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
8896b93 to
4859ae6
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this now implements the approach suggested by @Explorer09 over in the issue. So FP exceptions are no longer a consideration. F80 is special-cased right above this block, based on my reading this trick should work for the other types, especially considering that the pre-existing implementation uses the same idea.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@arsenm I believe this has everything you asked for now, the various types, vectors, strictfp and a 32-bit softfloat target.
4859ae6 to
212c0a6
Compare
212c0a6 to
66c7ffb
Compare
|
I just made another implementation that is as small as the This approach, https://godbolt.org/z/P7KGhcPWM #include <math.h>
#include <stdbool.h>
#include <stdint.h>
bool isfinite_f_1(float x) {
union { float f; uint32_t u; } v, inf;
inf.f = INFINITY;
v.f = x;
v.u = ~v.u;
__asm__ ("" : "+r" (v.u));
return (v.u & inf.u) == 0;
}
bool isfinite_f_2(float x) {
union { float f; uint32_t u; } v, inf;
inf.f = INFINITY;
v.f = x;
v.u = (uint32_t)(v.u << 1);
return v.u < inf.u * 2;
} |
Fixes #169270
Changes the implementation of
is_finiteto emit fewer instructions, e.g.X86_64
Aarch64
See the issue for more information.