Skip to content

Commit e38181c

Browse files
committed
[MachineLICM] Let targets decide if copy-like instructions are cheap
When checking whether it is profitable to hoist an instruction, the pass may override a target's ruling because it assumes that all COPY instructions are cheap, and that may not be the case for all micro-architectures. On AArch64 there's 0% difference in performance in LLVM's test-suite. Additionally, very few tests were affected by this change which shows how useful it is to keep it.
1 parent a061171 commit e38181c

File tree

9 files changed

+387
-235
lines changed

9 files changed

+387
-235
lines changed

llvm/lib/CodeGen/MachineLICM.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1219,7 +1219,7 @@ bool MachineLICMImpl::HasHighOperandLatency(MachineInstr &MI, unsigned DefIdx,
12191219
/// Return true if the instruction is marked "cheap" or the operand latency
12201220
/// between its def and a use is one or less.
12211221
bool MachineLICMImpl::IsCheapInstruction(MachineInstr &MI) const {
1222-
if (TII->isAsCheapAsAMove(MI) || MI.isCopyLike())
1222+
if (TII->isAsCheapAsAMove(MI))
12231223
return true;
12241224

12251225
bool isCheap = false;

llvm/test/CodeGen/AArch64/dag-combine-concat-vectors.ll

Lines changed: 21 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -18,14 +18,28 @@ define fastcc i8 @allocno_reload_assign(ptr %p) {
1818
; CHECK-NEXT: fmov w8, s0
1919
; CHECK-NEXT: movi v0.2d, #0000000000000000
2020
; CHECK-NEXT: mvn w8, w8
21+
; CHECK-NEXT: uunpklo z1.h, z0.b
22+
; CHECK-NEXT: uunpkhi z2.h, z0.b
2123
; CHECK-NEXT: sbfx x8, x8, #0, #1
2224
; CHECK-NEXT: whilelo p0.b, xzr, x8
25+
; CHECK-NEXT: uunpklo z3.s, z1.h
26+
; CHECK-NEXT: uunpkhi z4.s, z1.h
27+
; CHECK-NEXT: uunpklo z6.s, z2.h
28+
; CHECK-NEXT: uunpkhi z16.s, z2.h
2329
; CHECK-NEXT: punpklo p1.h, p0.b
2430
; CHECK-NEXT: punpkhi p0.h, p0.b
2531
; CHECK-NEXT: punpklo p2.h, p1.b
2632
; CHECK-NEXT: punpkhi p4.h, p1.b
33+
; CHECK-NEXT: uunpklo z1.d, z3.s
34+
; CHECK-NEXT: uunpkhi z2.d, z3.s
2735
; CHECK-NEXT: punpklo p6.h, p0.b
36+
; CHECK-NEXT: uunpklo z3.d, z4.s
37+
; CHECK-NEXT: uunpkhi z4.d, z4.s
2838
; CHECK-NEXT: punpkhi p0.h, p0.b
39+
; CHECK-NEXT: uunpklo z5.d, z6.s
40+
; CHECK-NEXT: uunpkhi z6.d, z6.s
41+
; CHECK-NEXT: uunpklo z7.d, z16.s
42+
; CHECK-NEXT: uunpkhi z16.d, z16.s
2943
; CHECK-NEXT: punpklo p1.h, p2.b
3044
; CHECK-NEXT: punpkhi p2.h, p2.b
3145
; CHECK-NEXT: punpklo p3.h, p4.b
@@ -35,28 +49,14 @@ define fastcc i8 @allocno_reload_assign(ptr %p) {
3549
; CHECK-NEXT: punpklo p7.h, p0.b
3650
; CHECK-NEXT: punpkhi p0.h, p0.b
3751
; CHECK-NEXT: .LBB0_1: // =>This Inner Loop Header: Depth=1
38-
; CHECK-NEXT: uunpklo z1.h, z0.b
39-
; CHECK-NEXT: uunpklo z2.s, z1.h
40-
; CHECK-NEXT: uunpkhi z1.s, z1.h
41-
; CHECK-NEXT: uunpklo z3.d, z2.s
42-
; CHECK-NEXT: uunpkhi z2.d, z2.s
43-
; CHECK-NEXT: st1b { z3.d }, p1, [z0.d]
52+
; CHECK-NEXT: st1b { z1.d }, p1, [z0.d]
4453
; CHECK-NEXT: st1b { z2.d }, p2, [z0.d]
45-
; CHECK-NEXT: uunpklo z2.d, z1.s
46-
; CHECK-NEXT: uunpkhi z1.d, z1.s
47-
; CHECK-NEXT: st1b { z2.d }, p3, [z0.d]
48-
; CHECK-NEXT: uunpkhi z2.h, z0.b
49-
; CHECK-NEXT: uunpklo z3.s, z2.h
50-
; CHECK-NEXT: uunpkhi z2.s, z2.h
51-
; CHECK-NEXT: st1b { z1.d }, p4, [z0.d]
52-
; CHECK-NEXT: uunpklo z1.d, z3.s
53-
; CHECK-NEXT: st1b { z1.d }, p5, [z0.d]
54-
; CHECK-NEXT: uunpkhi z1.d, z3.s
55-
; CHECK-NEXT: st1b { z1.d }, p6, [z0.d]
56-
; CHECK-NEXT: uunpklo z1.d, z2.s
57-
; CHECK-NEXT: st1b { z1.d }, p7, [z0.d]
58-
; CHECK-NEXT: uunpkhi z1.d, z2.s
59-
; CHECK-NEXT: st1b { z1.d }, p0, [z0.d]
54+
; CHECK-NEXT: st1b { z3.d }, p3, [z0.d]
55+
; CHECK-NEXT: st1b { z4.d }, p4, [z0.d]
56+
; CHECK-NEXT: st1b { z5.d }, p5, [z0.d]
57+
; CHECK-NEXT: st1b { z6.d }, p6, [z0.d]
58+
; CHECK-NEXT: st1b { z7.d }, p7, [z0.d]
59+
; CHECK-NEXT: st1b { z16.d }, p0, [z0.d]
6060
; CHECK-NEXT: str p8, [x0]
6161
; CHECK-NEXT: b .LBB0_1
6262
br label %1

llvm/test/CodeGen/PowerPC/vsx-fma-m-early.ll

Lines changed: 139 additions & 119 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,84 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
12
;; Tests that the ppc-vsx-fma-mutate pass with the schedule-ppc-vsx-fma-mutation-early pass does not hoist xxspltiw out of loops.
23
; RUN: llc -verify-machineinstrs -mcpu=pwr10 -disable-ppc-vsx-fma-mutation=false \
34
; RUN: -ppc-asm-full-reg-names -schedule-ppc-vsx-fma-mutation-early \
4-
; RUN: -mtriple powerpc64-ibm-aix < %s | FileCheck --check-prefixes=CHECK64,AIX64 %s
5+
; RUN: -mtriple powerpc64-ibm-aix < %s | FileCheck --check-prefixes=AIX64 %s
56

67
; RUN: llc -verify-machineinstrs -mcpu=pwr10 -disable-ppc-vsx-fma-mutation=false \
78
; RUN: -ppc-asm-full-reg-names -schedule-ppc-vsx-fma-mutation-early \
8-
; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck --check-prefixes=CHECK64,LINUX64 %s
9+
; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck --check-prefixes=LINUX64 %s
910

1011
; RUN: llc -verify-machineinstrs -mcpu=pwr10 -disable-ppc-vsx-fma-mutation=false \
1112
; RUN: -ppc-asm-full-reg-names -schedule-ppc-vsx-fma-mutation-early \
1213
; RUN: -mtriple powerpc-ibm-aix < %s | FileCheck --check-prefix=CHECK32 %s
1314

1415
define void @bar(ptr noalias nocapture noundef writeonly %__output_a, ptr noalias nocapture noundef readonly %var1321In_a, ptr noalias nocapture noundef readonly %n) {
16+
; AIX64-LABEL: bar:
17+
; AIX64: # %bb.0: # %entry
18+
; AIX64-NEXT: lwz r5, 0(r5)
19+
; AIX64-NEXT: cmpwi r5, 1
20+
; AIX64-NEXT: bltlr cr0
21+
; AIX64-NEXT: # %bb.1: # %for.body.preheader
22+
; AIX64-NEXT: xxspltiw vs0, 1069066811
23+
; AIX64-NEXT: xxspltiw vs1, 1170469888
24+
; AIX64-NEXT: mtctr r5
25+
; AIX64-NEXT: li r5, 0
26+
; AIX64-NEXT: .align 5
27+
; AIX64-NEXT: L..BB0_2: # %for.body
28+
; AIX64-NEXT: #
29+
; AIX64-NEXT: lxvx vs2, r4, r5
30+
; AIX64-NEXT: xvmaddmsp vs2, vs0, vs1
31+
; AIX64-NEXT: stxvx vs2, r3, r5
32+
; AIX64-NEXT: addi r5, r5, 16
33+
; AIX64-NEXT: bdnz L..BB0_2
34+
; AIX64-NEXT: # %bb.3: # %for.end
35+
; AIX64-NEXT: blr
36+
;
37+
; LINUX64-LABEL: bar:
38+
; LINUX64: # %bb.0: # %entry
39+
; LINUX64-NEXT: lwz r5, 0(r5)
40+
; LINUX64-NEXT: cmpwi r5, 1
41+
; LINUX64-NEXT: bltlr cr0
42+
; LINUX64-NEXT: # %bb.1: # %for.body.preheader
43+
; LINUX64-NEXT: xxspltiw vs0, 1069066811
44+
; LINUX64-NEXT: xxspltiw vs1, 1170469888
45+
; LINUX64-NEXT: mtctr r5
46+
; LINUX64-NEXT: li r5, 0
47+
; LINUX64-NEXT: .p2align 5
48+
; LINUX64-NEXT: .LBB0_2: # %for.body
49+
; LINUX64-NEXT: #
50+
; LINUX64-NEXT: lxvx vs2, r4, r5
51+
; LINUX64-NEXT: xvmaddmsp vs2, vs0, vs1
52+
; LINUX64-NEXT: stxvx vs2, r3, r5
53+
; LINUX64-NEXT: addi r5, r5, 16
54+
; LINUX64-NEXT: bdnz .LBB0_2
55+
; LINUX64-NEXT: # %bb.3: # %for.end
56+
; LINUX64-NEXT: blr
57+
;
58+
; CHECK32-LABEL: bar:
59+
; CHECK32: # %bb.0: # %entry
60+
; CHECK32-NEXT: lwz r5, 0(r5)
61+
; CHECK32-NEXT: cmpwi r5, 0
62+
; CHECK32-NEXT: blelr cr0
63+
; CHECK32-NEXT: # %bb.1: # %for.body.preheader
64+
; CHECK32-NEXT: xxspltiw vs0, 1069066811
65+
; CHECK32-NEXT: xxspltiw vs1, 1170469888
66+
; CHECK32-NEXT: li r6, 0
67+
; CHECK32-NEXT: li r7, 0
68+
; CHECK32-NEXT: .align 4
69+
; CHECK32-NEXT: L..BB0_2: # %for.body
70+
; CHECK32-NEXT: #
71+
; CHECK32-NEXT: slwi r8, r7, 4
72+
; CHECK32-NEXT: addic r7, r7, 1
73+
; CHECK32-NEXT: addze r6, r6
74+
; CHECK32-NEXT: lxvx vs2, r4, r8
75+
; CHECK32-NEXT: xvmaddmsp vs2, vs0, vs1
76+
; CHECK32-NEXT: stxvx vs2, r3, r8
77+
; CHECK32-NEXT: xor r8, r7, r5
78+
; CHECK32-NEXT: or. r8, r8, r6
79+
; CHECK32-NEXT: bne cr0, L..BB0_2
80+
; CHECK32-NEXT: # %bb.3: # %for.end
81+
; CHECK32-NEXT: blr
1582
entry:
1683
%0 = load i32, ptr %n, align 4
1784
%cmp11 = icmp sgt i32 %0, 0
@@ -28,7 +95,7 @@ for.body:
2895
%add.ptr.val = load <4 x float>, ptr %add.ptr, align 1
2996
%2 = tail call contract <4 x float> @llvm.fma.v4f32(<4 x float> %add.ptr.val, <4 x float> <float 0x3FF7154760000000, float 0x3FF7154760000000, float 0x3FF7154760000000, float 0x3FF7154760000000>, <4 x float> <float 6.270500e+03, float 6.270500e+03, float 6.270500e+03, float 6.270500e+03>)
3097
%add.ptr6 = getelementptr inbounds float, ptr %__output_a, i64 %1
31-
store <4 x float> %2, ptr %add.ptr6, align 1
98+
store <4 x float> %2, ptr %add.ptr6, align 1
3299
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
33100
%exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
34101
br i1 %exitcond.not, label %for.end, label %for.body
@@ -38,6 +105,74 @@ for.end:
38105
}
39106

40107
define void @foo(i1 %cmp97) #0 {
108+
; AIX64-LABEL: foo:
109+
; AIX64: # %bb.0: # %entry
110+
; AIX64-NEXT: andi. r3, r3, 1
111+
; AIX64-NEXT: bclr 4, gt, 0
112+
; AIX64-NEXT: # %bb.1: # %for.body.preheader
113+
; AIX64-NEXT: xxlxor f0, f0, f0
114+
; AIX64-NEXT: xxlxor f2, f2, f2
115+
; AIX64-NEXT: xxmrghd vs1, vs0, vs0
116+
; AIX64-NEXT: xvcvdpsp vs34, vs1
117+
; AIX64-NEXT: xxlxor vs1, vs1, vs1
118+
; AIX64-NEXT: .align 4
119+
; AIX64-NEXT: L..BB1_2: # %for.body
120+
; AIX64-NEXT: #
121+
; AIX64-NEXT: xxmrghd vs2, vs2, vs0
122+
; AIX64-NEXT: xvcvdpsp vs35, vs2
123+
; AIX64-NEXT: xxspltiw vs2, 1170469888
124+
; AIX64-NEXT: vmrgew v3, v3, v2
125+
; AIX64-NEXT: xvcmpgtsp vs3, vs1, vs35
126+
; AIX64-NEXT: xvmaddasp vs2, vs35, vs1
127+
; AIX64-NEXT: xxland vs2, vs3, vs2
128+
; AIX64-NEXT: xscvspdpn f2, vs2
129+
; AIX64-NEXT: b L..BB1_2
130+
;
131+
; LINUX64-LABEL: foo:
132+
; LINUX64: # %bb.0: # %entry
133+
; LINUX64-NEXT: andi. r3, r3, 1
134+
; LINUX64-NEXT: bclr 4, gt, 0
135+
; LINUX64-NEXT: # %bb.1: # %for.body.preheader
136+
; LINUX64-NEXT: xxlxor f0, f0, f0
137+
; LINUX64-NEXT: xxlxor f2, f2, f2
138+
; LINUX64-NEXT: xxspltd vs1, vs0, 0
139+
; LINUX64-NEXT: xvcvdpsp vs34, vs1
140+
; LINUX64-NEXT: xxlxor vs1, vs1, vs1
141+
; LINUX64-NEXT: .p2align 4
142+
; LINUX64-NEXT: .LBB1_2: # %for.body
143+
; LINUX64-NEXT: #
144+
; LINUX64-NEXT: xxmrghd vs2, vs0, vs2
145+
; LINUX64-NEXT: xvcvdpsp vs35, vs2
146+
; LINUX64-NEXT: xxspltiw vs2, 1170469888
147+
; LINUX64-NEXT: vmrgew v3, v2, v3
148+
; LINUX64-NEXT: xvcmpgtsp vs3, vs1, vs35
149+
; LINUX64-NEXT: xvmaddasp vs2, vs35, vs1
150+
; LINUX64-NEXT: xxland vs2, vs3, vs2
151+
; LINUX64-NEXT: xxsldwi vs2, vs2, vs2, 3
152+
; LINUX64-NEXT: xscvspdpn f2, vs2
153+
; LINUX64-NEXT: b .LBB1_2
154+
;
155+
; CHECK32-LABEL: foo:
156+
; CHECK32: # %bb.0: # %entry
157+
; CHECK32-NEXT: andi. r3, r3, 1
158+
; CHECK32-NEXT: bclr 4, gt, 0
159+
; CHECK32-NEXT: # %bb.1: # %for.body.preheader
160+
; CHECK32-NEXT: lwz r3, L..C0(r2) # %const.0
161+
; CHECK32-NEXT: xxlxor f1, f1, f1
162+
; CHECK32-NEXT: xxlxor vs0, vs0, vs0
163+
; CHECK32-NEXT: xscvdpspn vs35, f1
164+
; CHECK32-NEXT: lxv vs34, 0(r3)
165+
; CHECK32-NEXT: .align 4
166+
; CHECK32-NEXT: L..BB1_2: # %for.body
167+
; CHECK32-NEXT: #
168+
; CHECK32-NEXT: xscvdpspn vs36, f1
169+
; CHECK32-NEXT: xxspltiw vs1, 1170469888
170+
; CHECK32-NEXT: vperm v4, v4, v3, v2
171+
; CHECK32-NEXT: xvcmpgtsp vs2, vs0, vs36
172+
; CHECK32-NEXT: xvmaddasp vs1, vs36, vs0
173+
; CHECK32-NEXT: xxland vs1, vs2, vs1
174+
; CHECK32-NEXT: xscvspdpn f1, vs1
175+
; CHECK32-NEXT: b L..BB1_2
41176
entry:
42177
br i1 %cmp97, label %for.body, label %for.end
43178

@@ -57,122 +192,7 @@ for.end: ; preds = %entry
57192
}
58193

59194
; Function Attrs: mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none)
60-
declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>)
195+
declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>)
61196

62197
; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none)
63198
declare <4 x i32> @llvm.ppc.vsx.xvcmpgtsp(<4 x float>, <4 x float>)
64-
65-
; CHECK64: bar:
66-
; CHECK64: # %bb.0: # %entry
67-
; CHECK64-NEXT: lwz r5, 0(r5)
68-
; CHECK64-NEXT: cmpwi r5, 1
69-
; CHECK64-NEXT: bltlr cr0
70-
; CHECK64-NEXT: # %bb.1: # %for.body.preheader
71-
; CHECK64-NEXT: xxspltiw vs0, 1069066811
72-
; CHECK64-NEXT: xxspltiw vs1, 1170469888
73-
; CHECK64-NEXT: mtctr r5
74-
; CHECK64-NEXT: li r5, 0
75-
; CHECK64-NEXT: {{.*}}align 5
76-
; CHECK64-NEXT: [[L2_bar:.*]]: # %for.body
77-
; CHECK64-NEXT: # =>This Inner Loop Header: Depth=1
78-
; CHECK64-NEXT: lxvx vs2, r4, r5
79-
; CHECK64-NEXT: xvmaddmsp vs2, vs0, vs1
80-
; CHECK64-NEXT: stxvx vs2, r3, r5
81-
; CHECK64-NEXT: addi r5, r5, 16
82-
; CHECK64-NEXT: bdnz [[L2_bar]]
83-
; CHECK64-NEXT: # %bb.3: # %for.end
84-
; CHECK64-NEXT: blr
85-
86-
; AIX64: .foo:
87-
; AIX64-NEXT: # %bb.0: # %entry
88-
; AIX64-NEXT: andi. r3, r3, 1
89-
; AIX64-NEXT: bclr 4, gt, 0
90-
; AIX64-NEXT: # %bb.1: # %for.body.preheader
91-
; AIX64-NEXT: xxlxor f0, f0, f0
92-
; AIX64-NEXT: xxlxor vs1, vs1, vs1
93-
; AIX64-NEXT: xxlxor f2, f2, f2
94-
; AIX64-NEXT: .align 4
95-
; AIX64-NEXT: L..BB1_2: # %for.body
96-
; AIX64-NEXT: # =>This Inner Loop Header: Depth=1
97-
; AIX64-NEXT: xxmrghd vs2, vs2, vs0
98-
; AIX64-NEXT: xvcvdpsp vs34, vs2
99-
; AIX64-NEXT: xxmrghd vs2, vs0, vs0
100-
; AIX64-NEXT: xvcvdpsp vs35, vs2
101-
; AIX64-NEXT: xxspltiw vs2, 1170469888
102-
; AIX64-NEXT: vmrgew v2, v2, v3
103-
; AIX64-NEXT: xvcmpgtsp vs3, vs1, vs34
104-
; AIX64-NEXT: xvmaddasp vs2, vs34, vs1
105-
; AIX64-NEXT: xxland vs2, vs3, vs2
106-
; AIX64-NEXT: xscvspdpn f2, vs2
107-
; AIX64-NEXT: b L..BB1_2
108-
109-
; LINUX64: foo: # @foo
110-
; LINUX64-NEXT: .Lfunc_begin1:
111-
; LINUX64-NEXT: .cfi_startproc
112-
; LINUX64-NEXT: # %bb.0: # %entry
113-
; LINUX64-NEXT: andi. r3, r3, 1
114-
; LINUX64-NEXT: bclr 4, gt, 0
115-
; LINUX64-NEXT: # %bb.1: # %for.body.preheader
116-
; LINUX64-NEXT: xxlxor f0, f0, f0
117-
; LINUX64-NEXT: xxlxor vs1, vs1, vs1
118-
; LINUX64-NEXT: xxlxor f2, f2, f2
119-
; LINUX64-NEXT: .p2align 4
120-
; LINUX64-NEXT: .LBB1_2: # %for.body
121-
; LINUX64-NEXT: # =>This Inner Loop Header: Depth=1
122-
; LINUX64-NEXT: xxmrghd vs2, vs0, vs2
123-
; LINUX64-NEXT: xvcvdpsp vs34, vs2
124-
; LINUX64-NEXT: xxspltd vs2, vs0, 0
125-
; LINUX64-NEXT: xvcvdpsp vs35, vs2
126-
; LINUX64-NEXT: xxspltiw vs2, 1170469888
127-
; LINUX64-NEXT: vmrgew v2, v3, v2
128-
; LINUX64-NEXT: xvcmpgtsp vs3, vs1, vs34
129-
; LINUX64-NEXT: xvmaddasp vs2, vs34, vs1
130-
; LINUX64-NEXT: xxland vs2, vs3, vs2
131-
; LINUX64-NEXT: xxsldwi vs2, vs2, vs2, 3
132-
; LINUX64-NEXT: xscvspdpn f2, vs2
133-
; LINUX64-NEXT: b .LBB1_2
134-
135-
; CHECK32: .bar:
136-
; CHECK32-NEXT: # %bb.0: # %entry
137-
; CHECK32-NEXT: lwz r5, 0(r5)
138-
; CHECK32-NEXT: cmpwi r5, 0
139-
; CHECK32-NEXT: blelr cr0
140-
; CHECK32-NEXT: # %bb.1: # %for.body.preheader
141-
; CHECK32-NEXT: xxspltiw vs0, 1069066811
142-
; CHECK32-NEXT: xxspltiw vs1, 1170469888
143-
; CHECK32-NEXT: li r6, 0
144-
; CHECK32-NEXT: li r7, 0
145-
; CHECK32-NEXT: .align 4
146-
; CHECK32-NEXT: [[L2_foo:.*]]: # %for.body
147-
; CHECK32-NEXT: # =>This Inner Loop Header: Depth=1
148-
; CHECK32-NEXT: slwi r8, r7, 4
149-
; CHECK32-NEXT: addic r7, r7, 1
150-
; CHECK32-NEXT: addze r6, r6
151-
; CHECK32-NEXT: lxvx vs2, r4, r8
152-
; CHECK32-NEXT: xvmaddmsp vs2, vs0, vs1
153-
; CHECK32-NEXT: stxvx vs2, r3, r8
154-
; CHECK32-NEXT: xor r8, r7, r5
155-
; CHECK32-NEXT: or. r8, r8, r6
156-
; CHECK32-NEXT: bne cr0, [[L2_foo]]
157-
158-
; CHECK32: .foo:
159-
; CHECK32-NEXT: # %bb.0: # %entry
160-
; CHECK32-NEXT: andi. r3, r3, 1
161-
; CHECK32-NEXT: bclr 4, gt, 0
162-
; CHECK32-NEXT: # %bb.1: # %for.body.preheader
163-
; CHECK32-NEXT: lwz r3, L..C0(r2) # %const.0
164-
; CHECK32-NEXT: xxlxor f1, f1, f1
165-
; CHECK32-NEXT: xxlxor vs0, vs0, vs0
166-
; CHECK32-NEXT: xscvdpspn vs35, f1
167-
; CHECK32-NEXT: lxv vs34, 0(r3)
168-
; CHECK32-NEXT: .align 4
169-
; CHECK32-NEXT: L..BB1_2: # %for.body
170-
; CHECK32-NEXT: # =>This Inner Loop Header: Depth=1
171-
; CHECK32-NEXT: xscvdpspn vs36, f1
172-
; CHECK32-NEXT: xxspltiw vs1, 1170469888
173-
; CHECK32-NEXT: vperm v4, v4, v3, v2
174-
; CHECK32-NEXT: xvcmpgtsp vs2, vs0, vs36
175-
; CHECK32-NEXT: xvmaddasp vs1, vs36, vs0
176-
; CHECK32-NEXT: xxland vs1, vs2, vs1
177-
; CHECK32-NEXT: xscvspdpn f1, vs1
178-
; CHECK32-NEXT: b L..BB1_2

0 commit comments

Comments
 (0)