Skip to content

Commit 09ec80e

Browse files
ecneliseststellar
authored andcommitted
[PowerPC] Treat llvm.fmuladd intrinsic as using CTR
This fixes bug 55463, similar to D78668. This is a temporary fix since we will switch to post-isel CTR loop determination in the future. Reviewed By: dim, shchenz Differential Revision: https://reviews.llvm.org/D125746 (cherry picked from commit d9d15af)
1 parent b950bd2 commit 09ec80e

File tree

2 files changed

+137
-3
lines changed

2 files changed

+137
-3
lines changed

llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -491,15 +491,13 @@ bool PPCTTIImpl::mightUseCTR(BasicBlock *BB, TargetLibraryInfo *LibInfo,
491491
case Intrinsic::experimental_constrained_sin:
492492
case Intrinsic::experimental_constrained_cos:
493493
return true;
494-
// There is no corresponding FMA instruction for PPC double double.
495-
// Thus, we need to disable CTR loop generation for this type.
496-
case Intrinsic::fmuladd:
497494
case Intrinsic::copysign:
498495
if (CI->getArgOperand(0)->getType()->getScalarType()->
499496
isPPC_FP128Ty())
500497
return true;
501498
else
502499
continue; // ISD::FCOPYSIGN is never a library call.
500+
case Intrinsic::fmuladd:
503501
case Intrinsic::fma: Opcode = ISD::FMA; break;
504502
case Intrinsic::sqrt: Opcode = ISD::FSQRT; break;
505503
case Intrinsic::floor: Opcode = ISD::FFLOOR; break;

llvm/test/CodeGen/PowerPC/pr55463.ll

Lines changed: 136 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,136 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -mtriple=powerpcspe -verify-machineinstrs < %s | FileCheck %s
3+
4+
define void @baz() #0 {
5+
; CHECK-LABEL: baz:
6+
; CHECK: # %bb.0: # %bb
7+
; CHECK-NEXT: mflr 0
8+
; CHECK-NEXT: stw 0, 4(1)
9+
; CHECK-NEXT: stwu 1, -48(1)
10+
; CHECK-NEXT: li 3, .LCPI0_0@l
11+
; CHECK-NEXT: li 5, .LCPI0_1@l
12+
; CHECK-NEXT: lis 4, .LCPI0_0@ha
13+
; CHECK-NEXT: lis 6, .LCPI0_1@ha
14+
; CHECK-NEXT: evstdd 29, 24(1) # 8-byte Folded Spill
15+
; CHECK-NEXT: evstdd 30, 32(1) # 8-byte Folded Spill
16+
; CHECK-NEXT: evlddx 30, 4, 3
17+
; CHECK-NEXT: # implicit-def: $r3
18+
; CHECK-NEXT: evlddx 29, 6, 5
19+
; CHECK-NEXT: evstdd 28, 16(1) # 8-byte Folded Spill
20+
; CHECK-NEXT: # implicit-def: $r28
21+
; CHECK-NEXT: .p2align 4
22+
; CHECK-NEXT: .LBB0_1: # %bb1
23+
; CHECK-NEXT: #
24+
; CHECK-NEXT: efdcfsi 8, 3
25+
; CHECK-NEXT: mr 4, 30
26+
; CHECK-NEXT: mr 6, 29
27+
; CHECK-NEXT: evmergehi 3, 30, 30
28+
; CHECK-NEXT: evmergehi 5, 29, 29
29+
; CHECK-NEXT: # kill: def $r3 killed $r3 killed $s3
30+
; CHECK-NEXT: # kill: def $r5 killed $r5 killed $s5
31+
; CHECK-NEXT: evmergehi 7, 8, 8
32+
; CHECK-NEXT: # kill: def $r8 killed $r8 killed $s8
33+
; CHECK-NEXT: # kill: def $r7 killed $r7 killed $s7
34+
; CHECK-NEXT: bl fma
35+
; CHECK-NEXT: evmergelo 3, 3, 4
36+
; CHECK-NEXT: addi 5, 28, 1
37+
; CHECK-NEXT: cmplw 5, 28
38+
; CHECK-NEXT: mr 28, 5
39+
; CHECK-NEXT: efdctsiz 3, 3
40+
; CHECK-NEXT: bge 0, .LBB0_1
41+
; CHECK-NEXT: # %bb.2: # %bb8
42+
; CHECK-NEXT: bl wibble
43+
; CHECK-NEXT: evldd 30, 32(1) # 8-byte Folded Reload
44+
; CHECK-NEXT: evldd 29, 24(1) # 8-byte Folded Reload
45+
; CHECK-NEXT: evldd 28, 16(1) # 8-byte Folded Reload
46+
; CHECK-NEXT: lwz 0, 52(1)
47+
; CHECK-NEXT: addi 1, 1, 48
48+
; CHECK-NEXT: mtlr 0
49+
; CHECK-NEXT: blr
50+
bb:
51+
br label %bb1
52+
53+
bb1:
54+
%tmp = phi i32 [ %tmp6, %bb1 ], [ undef, %bb ]
55+
%tmp2 = phi i32 [ %tmp3, %bb1 ], [ undef, %bb ]
56+
%tmp3 = add nsw i32 %tmp2, 1
57+
%tmp4 = sitofp i32 %tmp to double
58+
%tmp5 = tail call double @llvm.fmuladd.f64(double 0.000000e+00, double -0.000000e+00, double %tmp4)
59+
%tmp6 = fptosi double %tmp5 to i32
60+
%tmp7 = icmp eq i32 %tmp2, 0
61+
br i1 %tmp7, label %bb8, label %bb1
62+
63+
bb8:
64+
call void @wibble(i32 %tmp6)
65+
ret void
66+
}
67+
68+
define void @wombat() #0 {
69+
; CHECK-LABEL: wombat:
70+
; CHECK: # %bb.0: # %bb
71+
; CHECK-NEXT: mflr 0
72+
; CHECK-NEXT: stw 0, 4(1)
73+
; CHECK-NEXT: stwu 1, -48(1)
74+
; CHECK-NEXT: li 3, .LCPI1_0@l
75+
; CHECK-NEXT: li 5, .LCPI1_1@l
76+
; CHECK-NEXT: lis 4, .LCPI1_0@ha
77+
; CHECK-NEXT: lis 6, .LCPI1_1@ha
78+
; CHECK-NEXT: evstdd 29, 24(1) # 8-byte Folded Spill
79+
; CHECK-NEXT: evstdd 30, 32(1) # 8-byte Folded Spill
80+
; CHECK-NEXT: evlddx 30, 4, 3
81+
; CHECK-NEXT: # implicit-def: $r3
82+
; CHECK-NEXT: evlddx 29, 6, 5
83+
; CHECK-NEXT: evstdd 28, 16(1) # 8-byte Folded Spill
84+
; CHECK-NEXT: # implicit-def: $r28
85+
; CHECK-NEXT: .p2align 4
86+
; CHECK-NEXT: .LBB1_1: # %bb1
87+
; CHECK-NEXT: #
88+
; CHECK-NEXT: efdcfsi 8, 3
89+
; CHECK-NEXT: mr 4, 30
90+
; CHECK-NEXT: mr 6, 29
91+
; CHECK-NEXT: evmergehi 3, 30, 30
92+
; CHECK-NEXT: evmergehi 5, 29, 29
93+
; CHECK-NEXT: # kill: def $r3 killed $r3 killed $s3
94+
; CHECK-NEXT: # kill: def $r5 killed $r5 killed $s5
95+
; CHECK-NEXT: evmergehi 7, 8, 8
96+
; CHECK-NEXT: # kill: def $r8 killed $r8 killed $s8
97+
; CHECK-NEXT: # kill: def $r7 killed $r7 killed $s7
98+
; CHECK-NEXT: bl fma
99+
; CHECK-NEXT: evmergelo 3, 3, 4
100+
; CHECK-NEXT: addi 5, 28, 1
101+
; CHECK-NEXT: cmplw 5, 28
102+
; CHECK-NEXT: mr 28, 5
103+
; CHECK-NEXT: efdctsiz 3, 3
104+
; CHECK-NEXT: bge 0, .LBB1_1
105+
; CHECK-NEXT: # %bb.2: # %bb8
106+
; CHECK-NEXT: bl wibble
107+
; CHECK-NEXT: evldd 30, 32(1) # 8-byte Folded Reload
108+
; CHECK-NEXT: evldd 29, 24(1) # 8-byte Folded Reload
109+
; CHECK-NEXT: evldd 28, 16(1) # 8-byte Folded Reload
110+
; CHECK-NEXT: lwz 0, 52(1)
111+
; CHECK-NEXT: addi 1, 1, 48
112+
; CHECK-NEXT: mtlr 0
113+
; CHECK-NEXT: blr
114+
bb:
115+
br label %bb1
116+
117+
bb1:
118+
%tmp = phi i32 [ %tmp6, %bb1 ], [ undef, %bb ]
119+
%tmp2 = phi i32 [ %tmp3, %bb1 ], [ undef, %bb ]
120+
%tmp3 = add nsw i32 %tmp2, 1
121+
%tmp4 = sitofp i32 %tmp to double
122+
%tmp5 = tail call double @llvm.fma.f64(double 0.000000e+00, double -0.000000e+00, double %tmp4)
123+
%tmp6 = fptosi double %tmp5 to i32
124+
%tmp7 = icmp eq i32 %tmp2, 0
125+
br i1 %tmp7, label %bb8, label %bb1
126+
127+
bb8:
128+
call void @wibble(i32 %tmp6)
129+
ret void
130+
}
131+
132+
declare void @wibble(i32)
133+
declare double @llvm.fmuladd.f64(double, double, double)
134+
declare double @llvm.fma.f64(double, double, double)
135+
136+
attributes #0 = { nounwind }

0 commit comments

Comments
 (0)