Skip to content

Commit 1fbfd27

Browse files
committed
[llvm] Ensure that soft float targets don't emit fma() libcalls.
The previous behavior could be harmful in some edge cases, such as emitting a call to fma() in the fma() implementation itself. Do this by just being more accurate in isFMAFasterThanFMulAndFAdd(). This was already done for PowerPC; this commit just extends that to Arm, z/Arch, and x86. MIPS and SPARC already got it right, but I added tests for them too, for good measure.
1 parent d4f41be commit 1fbfd27

File tree

9 files changed

+3618
-0
lines changed

9 files changed

+3618
-0
lines changed

llvm/include/llvm/CodeGen/TargetLowering.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3240,6 +3240,9 @@ class TargetLoweringBase {
32403240
/// not legal, but should return true if those types will eventually legalize
32413241
/// to types that support FMAs. After legalization, it will only be called on
32423242
/// types that support FMAs (via Legal or Custom actions)
3243+
///
3244+
/// Targets that care about soft float support should return false when soft
3245+
/// float code is being generated (i.e. use-soft-float).
32433246
virtual bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
32443247
EVT) const {
32453248
return false;

llvm/lib/Target/ARM/ARMISelLowering.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19488,6 +19488,9 @@ bool ARMTargetLowering::allowTruncateForTailCall(Type *Ty1, Type *Ty2) const {
1948819488
/// patterns (and we don't have the non-fused floating point instruction).
1948919489
bool ARMTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
1949019490
EVT VT) const {
19491+
if (Subtarget->useSoftFloat())
19492+
return false;
19493+
1949119494
if (!VT.isSimple())
1949219495
return false;
1949319496

llvm/lib/Target/SystemZ/SystemZISelLowering.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -786,6 +786,9 @@ EVT SystemZTargetLowering::getSetCCResultType(const DataLayout &DL,
786786

787787
bool SystemZTargetLowering::isFMAFasterThanFMulAndFAdd(
788788
const MachineFunction &MF, EVT VT) const {
789+
if (useSoftFloat())
790+
return false;
791+
789792
VT = VT.getScalarType();
790793

791794
if (!VT.isSimple())

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34645,6 +34645,9 @@ bool X86TargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const {
3464534645

3464634646
bool X86TargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
3464734647
EVT VT) const {
34648+
if (Subtarget.useSoftFloat())
34649+
return false;
34650+
3464834651
if (!Subtarget.hasAnyFMA())
3464934652
return false;
3465034653

llvm/test/CodeGen/ARM/fmuladd-soft-float.ll

Lines changed: 393 additions & 0 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/Mips/fmuladd-soft-float.ll

Lines changed: 912 additions & 0 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/SPARC/fmuladd-soft-float.ll

Lines changed: 414 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 225 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,225 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -mtriple=s390x --fp-contract off < %s | FileCheck %s -check-prefix=SOFT-FLOAT-FPC-OFF
3+
; RUN: llc -mtriple=s390x --fp-contract on < %s | FileCheck %s -check-prefix=SOFT-FLOAT-FPC-ON
4+
; RUN: llc -mtriple=s390x --fp-contract fast < %s | FileCheck %s -check-prefix=SOFT-FLOAT-FPC-FAST
5+
6+
define float @fmuladd_intrinsic_f32(float %a, float %b, float %c) #0 {
7+
; SOFT-FLOAT-FPC-OFF-LABEL: fmuladd_intrinsic_f32:
8+
; SOFT-FLOAT-FPC-OFF: # %bb.0:
9+
; SOFT-FLOAT-FPC-OFF-NEXT: stmg %r13, %r15, 104(%r15)
10+
; SOFT-FLOAT-FPC-OFF-NEXT: .cfi_offset %r13, -56
11+
; SOFT-FLOAT-FPC-OFF-NEXT: .cfi_offset %r14, -48
12+
; SOFT-FLOAT-FPC-OFF-NEXT: .cfi_offset %r15, -40
13+
; SOFT-FLOAT-FPC-OFF-NEXT: aghi %r15, -160
14+
; SOFT-FLOAT-FPC-OFF-NEXT: .cfi_def_cfa_offset 320
15+
; SOFT-FLOAT-FPC-OFF-NEXT: llgfr %r2, %r2
16+
; SOFT-FLOAT-FPC-OFF-NEXT: llgfr %r3, %r3
17+
; SOFT-FLOAT-FPC-OFF-NEXT: lr %r13, %r4
18+
; SOFT-FLOAT-FPC-OFF-NEXT: brasl %r14, __mulsf3@PLT
19+
; SOFT-FLOAT-FPC-OFF-NEXT: llgfr %r3, %r13
20+
; SOFT-FLOAT-FPC-OFF-NEXT: brasl %r14, __addsf3@PLT
21+
; SOFT-FLOAT-FPC-OFF-NEXT: # kill: def $r2l killed $r2l killed $r2d
22+
; SOFT-FLOAT-FPC-OFF-NEXT: lmg %r13, %r15, 264(%r15)
23+
; SOFT-FLOAT-FPC-OFF-NEXT: br %r14
24+
;
25+
; SOFT-FLOAT-FPC-ON-LABEL: fmuladd_intrinsic_f32:
26+
; SOFT-FLOAT-FPC-ON: # %bb.0:
27+
; SOFT-FLOAT-FPC-ON-NEXT: stmg %r13, %r15, 104(%r15)
28+
; SOFT-FLOAT-FPC-ON-NEXT: .cfi_offset %r13, -56
29+
; SOFT-FLOAT-FPC-ON-NEXT: .cfi_offset %r14, -48
30+
; SOFT-FLOAT-FPC-ON-NEXT: .cfi_offset %r15, -40
31+
; SOFT-FLOAT-FPC-ON-NEXT: aghi %r15, -160
32+
; SOFT-FLOAT-FPC-ON-NEXT: .cfi_def_cfa_offset 320
33+
; SOFT-FLOAT-FPC-ON-NEXT: llgfr %r2, %r2
34+
; SOFT-FLOAT-FPC-ON-NEXT: llgfr %r3, %r3
35+
; SOFT-FLOAT-FPC-ON-NEXT: lr %r13, %r4
36+
; SOFT-FLOAT-FPC-ON-NEXT: brasl %r14, __mulsf3@PLT
37+
; SOFT-FLOAT-FPC-ON-NEXT: llgfr %r3, %r13
38+
; SOFT-FLOAT-FPC-ON-NEXT: brasl %r14, __addsf3@PLT
39+
; SOFT-FLOAT-FPC-ON-NEXT: # kill: def $r2l killed $r2l killed $r2d
40+
; SOFT-FLOAT-FPC-ON-NEXT: lmg %r13, %r15, 264(%r15)
41+
; SOFT-FLOAT-FPC-ON-NEXT: br %r14
42+
;
43+
; SOFT-FLOAT-FPC-FAST-LABEL: fmuladd_intrinsic_f32:
44+
; SOFT-FLOAT-FPC-FAST: # %bb.0:
45+
; SOFT-FLOAT-FPC-FAST-NEXT: stmg %r13, %r15, 104(%r15)
46+
; SOFT-FLOAT-FPC-FAST-NEXT: .cfi_offset %r13, -56
47+
; SOFT-FLOAT-FPC-FAST-NEXT: .cfi_offset %r14, -48
48+
; SOFT-FLOAT-FPC-FAST-NEXT: .cfi_offset %r15, -40
49+
; SOFT-FLOAT-FPC-FAST-NEXT: aghi %r15, -160
50+
; SOFT-FLOAT-FPC-FAST-NEXT: .cfi_def_cfa_offset 320
51+
; SOFT-FLOAT-FPC-FAST-NEXT: llgfr %r2, %r2
52+
; SOFT-FLOAT-FPC-FAST-NEXT: llgfr %r3, %r3
53+
; SOFT-FLOAT-FPC-FAST-NEXT: lr %r13, %r4
54+
; SOFT-FLOAT-FPC-FAST-NEXT: brasl %r14, __mulsf3@PLT
55+
; SOFT-FLOAT-FPC-FAST-NEXT: llgfr %r3, %r13
56+
; SOFT-FLOAT-FPC-FAST-NEXT: brasl %r14, __addsf3@PLT
57+
; SOFT-FLOAT-FPC-FAST-NEXT: # kill: def $r2l killed $r2l killed $r2d
58+
; SOFT-FLOAT-FPC-FAST-NEXT: lmg %r13, %r15, 264(%r15)
59+
; SOFT-FLOAT-FPC-FAST-NEXT: br %r14
60+
%result = call float @llvm.fmuladd.f32(float %a, float %b, float %c)
61+
ret float %result
62+
}
63+
64+
define double @fmuladd_intrinsic_f64(double %a, double %b, double %c) #0 {
65+
; SOFT-FLOAT-FPC-OFF-LABEL: fmuladd_intrinsic_f64:
66+
; SOFT-FLOAT-FPC-OFF: # %bb.0:
67+
; SOFT-FLOAT-FPC-OFF-NEXT: stmg %r13, %r15, 104(%r15)
68+
; SOFT-FLOAT-FPC-OFF-NEXT: .cfi_offset %r13, -56
69+
; SOFT-FLOAT-FPC-OFF-NEXT: .cfi_offset %r14, -48
70+
; SOFT-FLOAT-FPC-OFF-NEXT: .cfi_offset %r15, -40
71+
; SOFT-FLOAT-FPC-OFF-NEXT: aghi %r15, -160
72+
; SOFT-FLOAT-FPC-OFF-NEXT: .cfi_def_cfa_offset 320
73+
; SOFT-FLOAT-FPC-OFF-NEXT: lgr %r13, %r4
74+
; SOFT-FLOAT-FPC-OFF-NEXT: brasl %r14, __muldf3@PLT
75+
; SOFT-FLOAT-FPC-OFF-NEXT: lgr %r3, %r13
76+
; SOFT-FLOAT-FPC-OFF-NEXT: brasl %r14, __adddf3@PLT
77+
; SOFT-FLOAT-FPC-OFF-NEXT: lmg %r13, %r15, 264(%r15)
78+
; SOFT-FLOAT-FPC-OFF-NEXT: br %r14
79+
;
80+
; SOFT-FLOAT-FPC-ON-LABEL: fmuladd_intrinsic_f64:
81+
; SOFT-FLOAT-FPC-ON: # %bb.0:
82+
; SOFT-FLOAT-FPC-ON-NEXT: stmg %r13, %r15, 104(%r15)
83+
; SOFT-FLOAT-FPC-ON-NEXT: .cfi_offset %r13, -56
84+
; SOFT-FLOAT-FPC-ON-NEXT: .cfi_offset %r14, -48
85+
; SOFT-FLOAT-FPC-ON-NEXT: .cfi_offset %r15, -40
86+
; SOFT-FLOAT-FPC-ON-NEXT: aghi %r15, -160
87+
; SOFT-FLOAT-FPC-ON-NEXT: .cfi_def_cfa_offset 320
88+
; SOFT-FLOAT-FPC-ON-NEXT: lgr %r13, %r4
89+
; SOFT-FLOAT-FPC-ON-NEXT: brasl %r14, __muldf3@PLT
90+
; SOFT-FLOAT-FPC-ON-NEXT: lgr %r3, %r13
91+
; SOFT-FLOAT-FPC-ON-NEXT: brasl %r14, __adddf3@PLT
92+
; SOFT-FLOAT-FPC-ON-NEXT: lmg %r13, %r15, 264(%r15)
93+
; SOFT-FLOAT-FPC-ON-NEXT: br %r14
94+
;
95+
; SOFT-FLOAT-FPC-FAST-LABEL: fmuladd_intrinsic_f64:
96+
; SOFT-FLOAT-FPC-FAST: # %bb.0:
97+
; SOFT-FLOAT-FPC-FAST-NEXT: stmg %r13, %r15, 104(%r15)
98+
; SOFT-FLOAT-FPC-FAST-NEXT: .cfi_offset %r13, -56
99+
; SOFT-FLOAT-FPC-FAST-NEXT: .cfi_offset %r14, -48
100+
; SOFT-FLOAT-FPC-FAST-NEXT: .cfi_offset %r15, -40
101+
; SOFT-FLOAT-FPC-FAST-NEXT: aghi %r15, -160
102+
; SOFT-FLOAT-FPC-FAST-NEXT: .cfi_def_cfa_offset 320
103+
; SOFT-FLOAT-FPC-FAST-NEXT: lgr %r13, %r4
104+
; SOFT-FLOAT-FPC-FAST-NEXT: brasl %r14, __muldf3@PLT
105+
; SOFT-FLOAT-FPC-FAST-NEXT: lgr %r3, %r13
106+
; SOFT-FLOAT-FPC-FAST-NEXT: brasl %r14, __adddf3@PLT
107+
; SOFT-FLOAT-FPC-FAST-NEXT: lmg %r13, %r15, 264(%r15)
108+
; SOFT-FLOAT-FPC-FAST-NEXT: br %r14
109+
%result = call double @llvm.fmuladd.f64(double %a, double %b, double %c)
110+
ret double %result
111+
}
112+
113+
define float @fmuladd_contract_f32(float %a, float %b, float %c) #0 {
114+
; SOFT-FLOAT-FPC-OFF-LABEL: fmuladd_contract_f32:
115+
; SOFT-FLOAT-FPC-OFF: # %bb.0:
116+
; SOFT-FLOAT-FPC-OFF-NEXT: stmg %r13, %r15, 104(%r15)
117+
; SOFT-FLOAT-FPC-OFF-NEXT: .cfi_offset %r13, -56
118+
; SOFT-FLOAT-FPC-OFF-NEXT: .cfi_offset %r14, -48
119+
; SOFT-FLOAT-FPC-OFF-NEXT: .cfi_offset %r15, -40
120+
; SOFT-FLOAT-FPC-OFF-NEXT: aghi %r15, -160
121+
; SOFT-FLOAT-FPC-OFF-NEXT: .cfi_def_cfa_offset 320
122+
; SOFT-FLOAT-FPC-OFF-NEXT: llgfr %r2, %r2
123+
; SOFT-FLOAT-FPC-OFF-NEXT: llgfr %r3, %r3
124+
; SOFT-FLOAT-FPC-OFF-NEXT: lr %r13, %r4
125+
; SOFT-FLOAT-FPC-OFF-NEXT: brasl %r14, __mulsf3@PLT
126+
; SOFT-FLOAT-FPC-OFF-NEXT: llgfr %r3, %r13
127+
; SOFT-FLOAT-FPC-OFF-NEXT: brasl %r14, __addsf3@PLT
128+
; SOFT-FLOAT-FPC-OFF-NEXT: # kill: def $r2l killed $r2l killed $r2d
129+
; SOFT-FLOAT-FPC-OFF-NEXT: lmg %r13, %r15, 264(%r15)
130+
; SOFT-FLOAT-FPC-OFF-NEXT: br %r14
131+
;
132+
; SOFT-FLOAT-FPC-ON-LABEL: fmuladd_contract_f32:
133+
; SOFT-FLOAT-FPC-ON: # %bb.0:
134+
; SOFT-FLOAT-FPC-ON-NEXT: stmg %r13, %r15, 104(%r15)
135+
; SOFT-FLOAT-FPC-ON-NEXT: .cfi_offset %r13, -56
136+
; SOFT-FLOAT-FPC-ON-NEXT: .cfi_offset %r14, -48
137+
; SOFT-FLOAT-FPC-ON-NEXT: .cfi_offset %r15, -40
138+
; SOFT-FLOAT-FPC-ON-NEXT: aghi %r15, -160
139+
; SOFT-FLOAT-FPC-ON-NEXT: .cfi_def_cfa_offset 320
140+
; SOFT-FLOAT-FPC-ON-NEXT: llgfr %r2, %r2
141+
; SOFT-FLOAT-FPC-ON-NEXT: llgfr %r3, %r3
142+
; SOFT-FLOAT-FPC-ON-NEXT: lr %r13, %r4
143+
; SOFT-FLOAT-FPC-ON-NEXT: brasl %r14, __mulsf3@PLT
144+
; SOFT-FLOAT-FPC-ON-NEXT: llgfr %r3, %r13
145+
; SOFT-FLOAT-FPC-ON-NEXT: brasl %r14, __addsf3@PLT
146+
; SOFT-FLOAT-FPC-ON-NEXT: # kill: def $r2l killed $r2l killed $r2d
147+
; SOFT-FLOAT-FPC-ON-NEXT: lmg %r13, %r15, 264(%r15)
148+
; SOFT-FLOAT-FPC-ON-NEXT: br %r14
149+
;
150+
; SOFT-FLOAT-FPC-FAST-LABEL: fmuladd_contract_f32:
151+
; SOFT-FLOAT-FPC-FAST: # %bb.0:
152+
; SOFT-FLOAT-FPC-FAST-NEXT: stmg %r13, %r15, 104(%r15)
153+
; SOFT-FLOAT-FPC-FAST-NEXT: .cfi_offset %r13, -56
154+
; SOFT-FLOAT-FPC-FAST-NEXT: .cfi_offset %r14, -48
155+
; SOFT-FLOAT-FPC-FAST-NEXT: .cfi_offset %r15, -40
156+
; SOFT-FLOAT-FPC-FAST-NEXT: aghi %r15, -160
157+
; SOFT-FLOAT-FPC-FAST-NEXT: .cfi_def_cfa_offset 320
158+
; SOFT-FLOAT-FPC-FAST-NEXT: llgfr %r2, %r2
159+
; SOFT-FLOAT-FPC-FAST-NEXT: llgfr %r3, %r3
160+
; SOFT-FLOAT-FPC-FAST-NEXT: lr %r13, %r4
161+
; SOFT-FLOAT-FPC-FAST-NEXT: brasl %r14, __mulsf3@PLT
162+
; SOFT-FLOAT-FPC-FAST-NEXT: llgfr %r3, %r13
163+
; SOFT-FLOAT-FPC-FAST-NEXT: brasl %r14, __addsf3@PLT
164+
; SOFT-FLOAT-FPC-FAST-NEXT: # kill: def $r2l killed $r2l killed $r2d
165+
; SOFT-FLOAT-FPC-FAST-NEXT: lmg %r13, %r15, 264(%r15)
166+
; SOFT-FLOAT-FPC-FAST-NEXT: br %r14
167+
%product = fmul float %a, %b
168+
%result = fadd float %product, %c
169+
ret float %result
170+
}
171+
172+
define double @fmuladd_contract_f64(double %a, double %b, double %c) #0 {
173+
; SOFT-FLOAT-FPC-OFF-LABEL: fmuladd_contract_f64:
174+
; SOFT-FLOAT-FPC-OFF: # %bb.0:
175+
; SOFT-FLOAT-FPC-OFF-NEXT: stmg %r13, %r15, 104(%r15)
176+
; SOFT-FLOAT-FPC-OFF-NEXT: .cfi_offset %r13, -56
177+
; SOFT-FLOAT-FPC-OFF-NEXT: .cfi_offset %r14, -48
178+
; SOFT-FLOAT-FPC-OFF-NEXT: .cfi_offset %r15, -40
179+
; SOFT-FLOAT-FPC-OFF-NEXT: aghi %r15, -160
180+
; SOFT-FLOAT-FPC-OFF-NEXT: .cfi_def_cfa_offset 320
181+
; SOFT-FLOAT-FPC-OFF-NEXT: lgr %r13, %r4
182+
; SOFT-FLOAT-FPC-OFF-NEXT: brasl %r14, __muldf3@PLT
183+
; SOFT-FLOAT-FPC-OFF-NEXT: lgr %r3, %r13
184+
; SOFT-FLOAT-FPC-OFF-NEXT: brasl %r14, __adddf3@PLT
185+
; SOFT-FLOAT-FPC-OFF-NEXT: lmg %r13, %r15, 264(%r15)
186+
; SOFT-FLOAT-FPC-OFF-NEXT: br %r14
187+
;
188+
; SOFT-FLOAT-FPC-ON-LABEL: fmuladd_contract_f64:
189+
; SOFT-FLOAT-FPC-ON: # %bb.0:
190+
; SOFT-FLOAT-FPC-ON-NEXT: stmg %r13, %r15, 104(%r15)
191+
; SOFT-FLOAT-FPC-ON-NEXT: .cfi_offset %r13, -56
192+
; SOFT-FLOAT-FPC-ON-NEXT: .cfi_offset %r14, -48
193+
; SOFT-FLOAT-FPC-ON-NEXT: .cfi_offset %r15, -40
194+
; SOFT-FLOAT-FPC-ON-NEXT: aghi %r15, -160
195+
; SOFT-FLOAT-FPC-ON-NEXT: .cfi_def_cfa_offset 320
196+
; SOFT-FLOAT-FPC-ON-NEXT: lgr %r13, %r4
197+
; SOFT-FLOAT-FPC-ON-NEXT: brasl %r14, __muldf3@PLT
198+
; SOFT-FLOAT-FPC-ON-NEXT: lgr %r3, %r13
199+
; SOFT-FLOAT-FPC-ON-NEXT: brasl %r14, __adddf3@PLT
200+
; SOFT-FLOAT-FPC-ON-NEXT: lmg %r13, %r15, 264(%r15)
201+
; SOFT-FLOAT-FPC-ON-NEXT: br %r14
202+
;
203+
; SOFT-FLOAT-FPC-FAST-LABEL: fmuladd_contract_f64:
204+
; SOFT-FLOAT-FPC-FAST: # %bb.0:
205+
; SOFT-FLOAT-FPC-FAST-NEXT: stmg %r13, %r15, 104(%r15)
206+
; SOFT-FLOAT-FPC-FAST-NEXT: .cfi_offset %r13, -56
207+
; SOFT-FLOAT-FPC-FAST-NEXT: .cfi_offset %r14, -48
208+
; SOFT-FLOAT-FPC-FAST-NEXT: .cfi_offset %r15, -40
209+
; SOFT-FLOAT-FPC-FAST-NEXT: aghi %r15, -160
210+
; SOFT-FLOAT-FPC-FAST-NEXT: .cfi_def_cfa_offset 320
211+
; SOFT-FLOAT-FPC-FAST-NEXT: lgr %r13, %r4
212+
; SOFT-FLOAT-FPC-FAST-NEXT: brasl %r14, __muldf3@PLT
213+
; SOFT-FLOAT-FPC-FAST-NEXT: lgr %r3, %r13
214+
; SOFT-FLOAT-FPC-FAST-NEXT: brasl %r14, __adddf3@PLT
215+
; SOFT-FLOAT-FPC-FAST-NEXT: lmg %r13, %r15, 264(%r15)
216+
; SOFT-FLOAT-FPC-FAST-NEXT: br %r14
217+
%product = fmul double %a, %b
218+
%result = fadd double %product, %c
219+
ret double %result
220+
}
221+
222+
attributes #0 = { "use-soft-float"="true" }
223+
224+
declare float @llvm.fmuladd.f32(float %a, float %b, float %c)
225+
declare double @llvm.fmuladd.f64(double %a, double %b, double %c)

0 commit comments

Comments
 (0)