11;; Test ppc-vsx-fma-mutate pass with -schedule-ppc-vsx-fma-mutation-early do not hosit some xxspltiw instruction.
22; RUN: llc -verify-machineinstrs -mcpu=pwr10 -disable-ppc-vsx-fma-mutation=false \
33; RUN: -ppc-asm-full-reg-names -schedule-ppc-vsx-fma-mutation-early \
4- ; RUN: -mtriple powerpc64-ibm-aix < %s | FileCheck --check-prefix =CHECK64-M %s
4+ ; RUN: -mtriple powerpc64-ibm-aix < %s | FileCheck --check-prefixes =CHECK64,AIX64 %s
55
66; RUN: llc -verify-machineinstrs -mcpu=pwr10 -disable-ppc-vsx-fma-mutation=false \
77; RUN: -ppc-asm-full-reg-names -schedule-ppc-vsx-fma-mutation-early \
8- ; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck --check-prefix =CHECK64-M %s
8+ ; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck --check-prefixes =CHECK64,LINUX64 %s
99
1010; RUN: llc -verify-machineinstrs -mcpu=pwr10 -disable-ppc-vsx-fma-mutation=false \
1111; RUN: -ppc-asm-full-reg-names -schedule-ppc-vsx-fma-mutation-early \
12- ; RUN: -mtriple powerpc-ibm-aix < %s | FileCheck --check-prefix=CHECK32-M %s
12+ ; RUN: -mtriple powerpc-ibm-aix < %s | FileCheck --check-prefix=CHECK32 %s
1313
14- define void @vsexp (ptr noalias nocapture noundef writeonly %__output_a , ptr noalias nocapture noundef readonly %var1321In_a , ptr noalias nocapture noundef readonly %n ) {
14+ define void @bar (ptr noalias nocapture noundef writeonly %__output_a , ptr noalias nocapture noundef readonly %var1321In_a , ptr noalias nocapture noundef readonly %n ) {
1515entry:
1616 %0 = load i32 , ptr %n , align 4
1717 %cmp11 = icmp sgt i32 %0 , 0
@@ -37,48 +37,142 @@ for.end:
3737 ret void
3838}
3939
40+ define void @foo (i1 %cmp97 ) #0 {
41+ entry:
42+ br i1 %cmp97 , label %for.body , label %for.end
43+
44+ for.body: ; preds = %for.body, %entry
45+ %0 = phi float [ %vecext.i , %for.body ], [ 0 .000000e+00 , %entry ]
46+ %splat.splatinsert.i = insertelement <4 x float > zeroinitializer , float %0 , i64 0
47+ %1 = tail call <4 x float > @llvm.fma.v4f32 (<4 x float > %splat.splatinsert.i , <4 x float > zeroinitializer , <4 x float > splat (float 6 .270500e+03 ))
48+ %2 = tail call <4 x i32 > @llvm.ppc.vsx.xvcmpgtsp (<4 x float > zeroinitializer , <4 x float > %splat.splatinsert.i )
49+ %3 = bitcast <4 x float > %1 to <4 x i32 >
50+ %and1.i8896 = and <4 x i32 > %2 , %3
51+ %4 = bitcast <4 x i32 > %and1.i8896 to <4 x float >
52+ %vecext.i = extractelement <4 x float > %4 , i64 0
53+ br label %for.body
54+
55+ for.end: ; preds = %entry
56+ ret void
57+ }
58+
4059; Function Attrs: mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none)
4160declare <4 x float > @llvm.fma.v4f32 (<4 x float >, <4 x float >, <4 x float >)
4261
43- ; CHECK64-M: # %bb.0: # %entry
44- ; CHECK64-M-NEXT: lwz r5, 0(r5)
45- ; CHECK64-M-NEXT: cmpwi r5, 1
46- ; CHECK64-M-NEXT: bltlr cr0
47- ; CHECK64-M-NEXT: # %bb.1: # %for.body.preheader
48- ; CHECK64-M-NEXT: xxspltiw vs0, 1069066811
49- ; CHECK64-M-NEXT: mtctr r5
50- ; CHECK64-M-NEXT: li r5, 0
51- ; CHECK64-M-NEXT: {{.*}}align 5
52- ; CHECK64-M-NEXT: [[L2:.*]]: # %for.body
53- ; CHECK64-M-NEXT: # =>This Inner Loop Header: Depth=1
54- ; CHECK64-M-NEXT: lxvx vs1, r4, r5
55- ; CHECK64-M-NEXT: xxspltiw vs2, 1170469888
56- ; CHECK64-M-NEXT: xvmaddasp vs2, vs1, vs0
57- ; CHECK64-M-NEXT: stxvx vs2, r3, r5
58- ; CHECK64-M-NEXT: addi r5, r5, 16
59- ; CHECK64-M-NEXT: bdnz [[L2]]
60- ; CHECK64-M-NEXT: # %bb.3: # %for.end
61- ; CHECK64-M-NEXT: blr
62-
63- ; CHECK32-M: .vsexp:
64- ; CHECK32-M-NEXT: # %bb.0: # %entry
65- ; CHECK32-M-NEXT: lwz r5, 0(r5)
66- ; CHECK32-M-NEXT: cmpwi r5, 0
67- ; CHECK32-M-NEXT: blelr cr0
68- ; CHECK32-M-NEXT: # %bb.1: # %for.body.preheader
69- ; CHECK32-M-NEXT: xxspltiw vs0, 1069066811
70- ; CHECK32-M-NEXT: li r6, 0
71- ; CHECK32-M-NEXT: li r7, 0
72- ; CHECK32-M-NEXT: .align 4
73- ; CHECK32-M-NEXT: L..BB0_2: # %for.body
74- ; CHECK32-M-NEXT: # =>This Inner Loop Header: Depth=1
75- ; CHECK32-M-NEXT: slwi r8, r7, 4
76- ; CHECK32-M-NEXT: xxspltiw vs2, 1170469888
77- ; CHECK32-M-NEXT: addic r7, r7, 1
78- ; CHECK32-M-NEXT: addze r6, r6
79- ; CHECK32-M-NEXT: lxvx vs1, r4, r8
80- ; CHECK32-M-NEXT: xvmaddasp vs2, vs1, vs0
81- ; CHECK32-M-NEXT: stxvx vs2, r3, r8
82- ; CHECK32-M-NEXT: xor r8, r7, r5
83- ; CHECK32-M-NEXT: or. r8, r8, r6
84- ; CHECK32-M-NEXT: bne cr0, L..BB0_2
62+ ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none)
63+ declare <4 x i32 > @llvm.ppc.vsx.xvcmpgtsp (<4 x float >, <4 x float >)
64+
65+ ; CHECK64: bar:
66+ ; CHECK64: # %bb.0: # %entry
67+ ; CHECK64-NEXT: lwz r5, 0(r5)
68+ ; CHECK64-NEXT: cmpwi r5, 1
69+ ; CHECK64-NEXT: bltlr cr0
70+ ; CHECK64-NEXT: # %bb.1: # %for.body.preheader
71+ ; CHECK64-NEXT: xxspltiw vs0, 1069066811
72+ ; CHECK64-NEXT: mtctr r5
73+ ; CHECK64-NEXT: li r5, 0
74+ ; CHECK64-NEXT: {{.*}}align 5
75+ ; CHECK64-NEXT: [[L2_bar:.*]]: # %for.body
76+ ; CHECK64-NEXT: # =>This Inner Loop Header: Depth=1
77+ ; CHECK64-NEXT: lxvx vs1, r4, r5
78+ ; CHECK64-NEXT: xxspltiw vs2, 1170469888
79+ ; CHECK64-NEXT: xvmaddasp vs2, vs1, vs0
80+ ; CHECK64-NEXT: stxvx vs2, r3, r5
81+ ; CHECK64-NEXT: addi r5, r5, 16
82+ ; CHECK64-NEXT: bdnz [[L2_bar]]
83+ ; CHECK64-NEXT: # %bb.3: # %for.end
84+ ; CHECK64-NEXT: blr
85+
86+ ; AIX64: .foo:
87+ ; AIX64-NEXT: # %bb.0: # %entry
88+ ; AIX64-NEXT: andi. r3, r3, 1
89+ ; AIX64-NEXT: bclr 4, gt, 0
90+ ; AIX64-NEXT: # %bb.1: # %for.body.preheader
91+ ; AIX64-NEXT: xxlxor f0, f0, f0
92+ ; AIX64-NEXT: xxlxor vs1, vs1, vs1
93+ ; AIX64-NEXT: xxlxor f2, f2, f2
94+ ; AIX64-NEXT: .align 4
95+ ; AIX64-NEXT: L..BB1_2: # %for.body
96+ ; AIX64-NEXT: # =>This Inner Loop Header: Depth=1
97+ ; AIX64-NEXT: xxmrghd vs2, vs2, vs0
98+ ; AIX64-NEXT: xvcvdpsp vs34, vs2
99+ ; AIX64-NEXT: xxmrghd vs2, vs0, vs0
100+ ; AIX64-NEXT: xvcvdpsp vs35, vs2
101+ ; AIX64-NEXT: xxspltiw vs2, 1170469888
102+ ; AIX64-NEXT: vmrgew v2, v2, v3
103+ ; AIX64-NEXT: xvcmpgtsp vs3, vs1, vs34
104+ ; AIX64-NEXT: xvmaddasp vs2, vs34, vs1
105+ ; AIX64-NEXT: xxland vs2, vs3, vs2
106+ ; AIX64-NEXT: xscvspdpn f2, vs2
107+ ; AIX64-NEXT: b L..BB1_2
108+
109+ ; LINUX64: foo: # @foo
110+ ; LINUX64-NEXT: .Lfunc_begin1:
111+ ; LINUX64-NEXT: .cfi_startproc
112+ ; LINUX64-NEXT: # %bb.0: # %entry
113+ ; LINUX64-NEXT: andi. r3, r3, 1
114+ ; LINUX64-NEXT: bclr 4, gt, 0
115+ ; LINUX64-NEXT: # %bb.1: # %for.body.preheader
116+ ; LINUX64-NEXT: xxlxor f0, f0, f0
117+ ; LINUX64-NEXT: xxlxor vs1, vs1, vs1
118+ ; LINUX64-NEXT: xxlxor f2, f2, f2
119+ ; LINUX64-NEXT: .p2align 4
120+ ; LINUX64-NEXT: .LBB1_2: # %for.body
121+ ; LINUX64-NEXT: # =>This Inner Loop Header: Depth=1
122+ ; LINUX64-NEXT: xxmrghd vs2, vs0, vs2
123+ ; LINUX64-NEXT: xvcvdpsp vs34, vs2
124+ ; LINUX64-NEXT: xxspltd vs2, vs0, 0
125+ ; LINUX64-NEXT: xvcvdpsp vs35, vs2
126+ ; LINUX64-NEXT: xxspltiw vs2, 1170469888
127+ ; LINUX64-NEXT: vmrgew v2, v3, v2
128+ ; LINUX64-NEXT: xvcmpgtsp vs3, vs1, vs34
129+ ; LINUX64-NEXT: xvmaddasp vs2, vs34, vs1
130+ ; LINUX64-NEXT: xxland vs2, vs3, vs2
131+ ; LINUX64-NEXT: xxsldwi vs2, vs2, vs2, 3
132+ ; LINUX64-NEXT: xscvspdpn f2, vs2
133+ ; LINUX64-NEXT: b .LBB1_2
134+
135+ ; CHECK32: .bar:
136+ ; CHECK32-NEXT: # %bb.0: # %entry
137+ ; CHECK32-NEXT: lwz r5, 0(r5)
138+ ; CHECK32-NEXT: cmpwi r5, 0
139+ ; CHECK32-NEXT: blelr cr0
140+ ; CHECK32-NEXT: # %bb.1: # %for.body.preheader
141+ ; CHECK32-NEXT: xxspltiw vs0, 1069066811
142+ ; CHECK32-NEXT: li r6, 0
143+ ; CHECK32-NEXT: li r7, 0
144+ ; CHECK32-NEXT: .align 4
145+ ; CHECK32-NEXT: [[L2_foo:.*]]: # %for.body
146+ ; CHECK32-NEXT: # =>This Inner Loop Header: Depth=1
147+ ; CHECK32-NEXT: slwi r8, r7, 4
148+ ; CHECK32-NEXT: xxspltiw vs2, 1170469888
149+ ; CHECK32-NEXT: addic r7, r7, 1
150+ ; CHECK32-NEXT: addze r6, r6
151+ ; CHECK32-NEXT: lxvx vs1, r4, r8
152+ ; CHECK32-NEXT: xvmaddasp vs2, vs1, vs0
153+ ; CHECK32-NEXT: stxvx vs2, r3, r8
154+ ; CHECK32-NEXT: xor r8, r7, r5
155+ ; CHECK32-NEXT: or. r8, r8, r6
156+ ; CHECK32-NEXT: bne cr0, [[L2_foo]]
157+
158+ ; CHECK32: .foo:
159+ ; CHECK32-NEXT: # %bb.0: # %entry
160+ ; CHECK32-NEXT: andi. r3, r3, 1
161+ ; CHECK32-NEXT: bclr 4, gt, 0
162+ ; CHECK32-NEXT: # %bb.1: # %for.body.preheader
163+ ; CHECK32-NEXT: lwz r3, L..C0(r2) # %const.0
164+ ; CHECK32-NEXT: xxlxor f1, f1, f1
165+ ; CHECK32-NEXT: xxlxor vs0, vs0, vs0
166+ ; CHECK32-NEXT: xscvdpspn vs35, f1
167+ ; CHECK32-NEXT: lxv vs34, 0(r3)
168+ ; CHECK32-NEXT: .align 4
169+ ; CHECK32-NEXT: L..BB1_2: # %for.body
170+ ; CHECK32-NEXT: # =>This Inner Loop Header: Depth=1
171+ ; CHECK32-NEXT: xscvdpspn vs36, f1
172+ ; CHECK32-NEXT: xxspltiw vs1, 1170469888
173+ ; CHECK32-NEXT: vperm v4, v4, v3, v2
174+ ; CHECK32-NEXT: xvcmpgtsp vs2, vs0, vs36
175+ ; CHECK32-NEXT: xvmaddasp vs1, vs36, vs0
176+ ; CHECK32-NEXT: xxland vs1, vs2, vs1
177+ ; CHECK32-NEXT: xscvspdpn f1, vs1
178+ ; CHECK32-NEXT: b L..BB1_2
0 commit comments