@@ -129,6 +129,248 @@ entry:
129129 ret void
130130}
131131
132+ define void @text512 (ptr %vp1 , ptr %rp1 , ptr %rp2 , ptr %rp3 , ptr %rp4 ) {
133+ ; CHECK-LABEL: text512:
134+ ; CHECK: # %bb.0: # %entry
135+ ; CHECK-NEXT: dmsetdmrz dmr0
136+ ; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
137+ ; CHECK-NEXT: stxv v2, 16(r4)
138+ ; CHECK-NEXT: stxv v3, 0(r4)
139+ ; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
140+ ; CHECK-NEXT: stxv v2, 16(r6)
141+ ; CHECK-NEXT: stxv v3, 0(r6)
142+ ; CHECK-NEXT: blr
143+ ;
144+ ; CHECK-BE-LABEL: text512:
145+ ; CHECK-BE: # %bb.0: # %entry
146+ ; CHECK-BE-NEXT: dmsetdmrz dmr0
147+ ; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
148+ ; CHECK-BE-NEXT: stxv v3, 16(r4)
149+ ; CHECK-BE-NEXT: stxv v2, 0(r4)
150+ ; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
151+ ; CHECK-BE-NEXT: stxv v3, 16(r6)
152+ ; CHECK-BE-NEXT: stxv v2, 0(r6)
153+ ; CHECK-BE-NEXT: blr
154+ entry:
155+ %z = call <1024 x i1 > @llvm.ppc.mma.dmsetdmrz ()
156+ %x = call { <256 x i1 >, <256 x i1 > } @llvm.ppc.mma.dmxxextfdmr512 (<1024 x i1 > %z , i32 0 )
157+ %p = extractvalue { <256 x i1 >, <256 x i1 > } %x , 0
158+ store <256 x i1 > %p , ptr %rp1 , align 16
159+ %y = call { <256 x i1 >, <256 x i1 > } @llvm.ppc.mma.dmxxextfdmr512 (<1024 x i1 > %z , i32 1 )
160+ %q = extractvalue { <256 x i1 >, <256 x i1 > } %y , 0
161+ store <256 x i1 > %q , ptr %rp3 , align 16
162+ ret void
163+ }
164+
165+ define void @text256 (ptr %vp1 , ptr %rp1 , ptr %rp2 , ptr %rp3 , ptr %rp4 ) {
166+ ; CHECK-LABEL: text256:
167+ ; CHECK: # %bb.0: # %entry
168+ ; CHECK-NEXT: dmsetdmrz dmr0
169+ ; CHECK-NEXT: dmxxextfdmr256 vsp34, dmrrowp0, 0
170+ ; CHECK-NEXT: stxv v2, 16(r4)
171+ ; CHECK-NEXT: stxv v3, 0(r4)
172+ ; CHECK-NEXT: dmxxextfdmr256 vsp34, dmrrowp1, 1
173+ ; CHECK-NEXT: stxv v2, 16(r5)
174+ ; CHECK-NEXT: stxv v3, 0(r5)
175+ ; CHECK-NEXT: dmxxextfdmr256 vsp34, dmrrowp2, 2
176+ ; CHECK-NEXT: stxv v2, 16(r6)
177+ ; CHECK-NEXT: stxv v3, 0(r6)
178+ ; CHECK-NEXT: dmxxextfdmr256 vsp34, dmrrowp3, 3
179+ ; CHECK-NEXT: stxv v2, 16(r7)
180+ ; CHECK-NEXT: stxv v3, 0(r7)
181+ ; CHECK-NEXT: blr
182+ ;
183+ ; CHECK-BE-LABEL: text256:
184+ ; CHECK-BE: # %bb.0: # %entry
185+ ; CHECK-BE-NEXT: dmsetdmrz dmr0
186+ ; CHECK-BE-NEXT: dmxxextfdmr256 vsp34, dmrrowp0, 0
187+ ; CHECK-BE-NEXT: stxv v3, 16(r4)
188+ ; CHECK-BE-NEXT: stxv v2, 0(r4)
189+ ; CHECK-BE-NEXT: dmxxextfdmr256 vsp34, dmrrowp1, 1
190+ ; CHECK-BE-NEXT: stxv v3, 16(r5)
191+ ; CHECK-BE-NEXT: stxv v2, 0(r5)
192+ ; CHECK-BE-NEXT: dmxxextfdmr256 vsp34, dmrrowp2, 2
193+ ; CHECK-BE-NEXT: stxv v3, 16(r6)
194+ ; CHECK-BE-NEXT: stxv v2, 0(r6)
195+ ; CHECK-BE-NEXT: dmxxextfdmr256 vsp34, dmrrowp3, 3
196+ ; CHECK-BE-NEXT: stxv v3, 16(r7)
197+ ; CHECK-BE-NEXT: stxv v2, 0(r7)
198+ ; CHECK-BE-NEXT: blr
199+ entry:
200+ %z = call <1024 x i1 > @llvm.ppc.mma.dmsetdmrz ()
201+ %x = call <256 x i1 > @llvm.ppc.mma.dmxxextfdmr256 (<1024 x i1 > %z , i32 0 )
202+ store <256 x i1 > %x , ptr %rp1 , align 16
203+ %q = call <256 x i1 > @llvm.ppc.mma.dmxxextfdmr256 (<1024 x i1 > %z , i32 1 )
204+ store <256 x i1 > %q , ptr %rp2 , align 16
205+ %w = call <256 x i1 > @llvm.ppc.mma.dmxxextfdmr256 (<1024 x i1 > %z , i32 2 )
206+ store <256 x i1 > %w , ptr %rp3 , align 16
207+ %y = call <256 x i1 > @llvm.ppc.mma.dmxxextfdmr256 (<1024 x i1 > %z , i32 3 )
208+ store <256 x i1 > %y , ptr %rp4 , align 16
209+ ret void
210+ }
211+
212+ define void @tins512 (ptr %vp1 , ptr %vp2 , ptr %vp3 , ptr %vp4 , ptr %rp1 , ptr %rp2 ) {
213+ ; CHECK-LABEL: tins512:
214+ ; CHECK: # %bb.0: # %entry
215+ ; CHECK-NEXT: lxv v2, 16(r3)
216+ ; CHECK-NEXT: lxv v3, 0(r3)
217+ ; CHECK-NEXT: lxv v4, 16(r4)
218+ ; CHECK-NEXT: lxv v5, 0(r4)
219+ ; CHECK-NEXT: dmsetdmrz dmr0
220+ ; CHECK-NEXT: dmxxinstdmr512 wacc0, vsp34, vsp36, 0
221+ ; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
222+ ; CHECK-NEXT: stxvp vsp34, 96(r7)
223+ ; CHECK-NEXT: stxvp vsp36, 64(r7)
224+ ; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
225+ ; CHECK-NEXT: stxvp vsp34, 32(r7)
226+ ; CHECK-NEXT: stxvp vsp36, 0(r7)
227+ ; CHECK-NEXT: lxv v2, 16(r5)
228+ ; CHECK-NEXT: lxv v4, 16(r6)
229+ ; CHECK-NEXT: lxv v3, 0(r5)
230+ ; CHECK-NEXT: lxv v5, 0(r6)
231+ ; CHECK-NEXT: dmxxinstdmr512 wacc_hi0, vsp34, vsp36, 1
232+ ; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
233+ ; CHECK-NEXT: stxvp vsp34, 96(r8)
234+ ; CHECK-NEXT: stxvp vsp36, 64(r8)
235+ ; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
236+ ; CHECK-NEXT: stxvp vsp34, 32(r8)
237+ ; CHECK-NEXT: stxvp vsp36, 0(r8)
238+ ; CHECK-NEXT: blr
239+ ;
240+ ; CHECK-BE-LABEL: tins512:
241+ ; CHECK-BE: # %bb.0: # %entry
242+ ; CHECK-BE-NEXT: lxv v2, 0(r3)
243+ ; CHECK-BE-NEXT: lxv v3, 16(r3)
244+ ; CHECK-BE-NEXT: lxv v4, 0(r4)
245+ ; CHECK-BE-NEXT: lxv v5, 16(r4)
246+ ; CHECK-BE-NEXT: dmsetdmrz dmr0
247+ ; CHECK-BE-NEXT: dmxxinstdmr512 wacc0, vsp34, vsp36, 0
248+ ; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
249+ ; CHECK-BE-NEXT: stxvp vsp36, 96(r7)
250+ ; CHECK-BE-NEXT: stxvp vsp34, 64(r7)
251+ ; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
252+ ; CHECK-BE-NEXT: stxvp vsp36, 32(r7)
253+ ; CHECK-BE-NEXT: stxvp vsp34, 0(r7)
254+ ; CHECK-BE-NEXT: lxv v2, 0(r5)
255+ ; CHECK-BE-NEXT: lxv v4, 0(r6)
256+ ; CHECK-BE-NEXT: lxv v3, 16(r5)
257+ ; CHECK-BE-NEXT: lxv v5, 16(r6)
258+ ; CHECK-BE-NEXT: dmxxinstdmr512 wacc_hi0, vsp34, vsp36, 1
259+ ; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
260+ ; CHECK-BE-NEXT: stxvp vsp36, 96(r8)
261+ ; CHECK-BE-NEXT: stxvp vsp34, 64(r8)
262+ ; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
263+ ; CHECK-BE-NEXT: stxvp vsp36, 32(r8)
264+ ; CHECK-BE-NEXT: stxvp vsp34, 0(r8)
265+ ; CHECK-BE-NEXT: blr
266+ entry:
267+ %z = call <1024 x i1 > @llvm.ppc.mma.dmsetdmrz ()
268+ %l1 = load <256 x i1 >, ptr %vp1 , align 16
269+ %r1 = load <256 x i1 >, ptr %vp2 , align 16
270+ %a = call <1024 x i1 > @llvm.ppc.mma.dmxxinstdmr512 (<1024 x i1 > %z , <256 x i1 > %l1 , <256 x i1 > %r1 , i32 0 )
271+ store <1024 x i1 > %a , ptr %rp1 , align 16
272+ %l2 = load <256 x i1 >, ptr %vp3 , align 16
273+ %r2 = load <256 x i1 >, ptr %vp4 , align 16
274+ %b = call <1024 x i1 > @llvm.ppc.mma.dmxxinstdmr512 (<1024 x i1 > %a , <256 x i1 > %l2 , <256 x i1 > %r2 , i32 1 )
275+ store <1024 x i1 > %b , ptr %rp2 , align 16
276+ ret void
277+ }
278+
279+ define void @tins256 (ptr %vp1 , ptr %vp2 , ptr %vp3 , ptr %vp4 , ptr %rp1 , ptr %rp2 , ptr %rp3 , ptr %rp4 ) {
280+ ; CHECK-LABEL: tins256:
281+ ; CHECK: # %bb.0: # %entry
282+ ; CHECK-NEXT: lxv v2, 16(r3)
283+ ; CHECK-NEXT: lxv v3, 0(r3)
284+ ; CHECK-NEXT: dmsetdmrz dmr0
285+ ; CHECK-NEXT: dmxxinstdmr256 dmrrowp0, vsp34, 0
286+ ; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
287+ ; CHECK-NEXT: stxvp vsp34, 96(r7)
288+ ; CHECK-NEXT: stxvp vsp36, 64(r7)
289+ ; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
290+ ; CHECK-NEXT: stxvp vsp34, 32(r7)
291+ ; CHECK-NEXT: stxvp vsp36, 0(r7)
292+ ; CHECK-NEXT: lxv v2, 16(r4)
293+ ; CHECK-NEXT: lxv v3, 0(r4)
294+ ; CHECK-NEXT: dmxxinstdmr256 dmrrowp1, vsp34, 1
295+ ; CHECK-NEXT: dmxxextfdmr512 vsp36, vsp32, wacc0, 0
296+ ; CHECK-NEXT: stxvp vsp36, 96(r8)
297+ ; CHECK-NEXT: stxvp vsp32, 64(r8)
298+ ; CHECK-NEXT: dmxxextfdmr512 vsp36, vsp32, wacc_hi0, 1
299+ ; CHECK-NEXT: stxvp vsp36, 32(r8)
300+ ; CHECK-NEXT: stxvp vsp32, 0(r8)
301+ ; CHECK-NEXT: dmxxinstdmr256 dmrrowp2, vsp34, 2
302+ ; CHECK-NEXT: dmxxextfdmr512 vsp36, vsp32, wacc0, 0
303+ ; CHECK-NEXT: stxvp vsp36, 96(r9)
304+ ; CHECK-NEXT: stxvp vsp32, 64(r9)
305+ ; CHECK-NEXT: dmxxextfdmr512 vsp36, vsp32, wacc_hi0, 1
306+ ; CHECK-NEXT: stxvp vsp36, 32(r9)
307+ ; CHECK-NEXT: stxvp vsp32, 0(r9)
308+ ; CHECK-NEXT: dmxxinstdmr256 dmrrowp3, vsp34, 3
309+ ; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
310+ ; CHECK-NEXT: stxvp vsp34, 96(r10)
311+ ; CHECK-NEXT: stxvp vsp36, 64(r10)
312+ ; CHECK-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
313+ ; CHECK-NEXT: stxvp vsp34, 32(r10)
314+ ; CHECK-NEXT: stxvp vsp36, 0(r10)
315+ ; CHECK-NEXT: blr
316+ ;
317+ ; CHECK-BE-LABEL: tins256:
318+ ; CHECK-BE: # %bb.0: # %entry
319+ ; CHECK-BE-NEXT: lxv v2, 0(r3)
320+ ; CHECK-BE-NEXT: lxv v3, 16(r3)
321+ ; CHECK-BE-NEXT: dmsetdmrz dmr0
322+ ; CHECK-BE-NEXT: dmxxinstdmr256 dmrrowp0, vsp34, 0
323+ ; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
324+ ; CHECK-BE-NEXT: stxvp vsp36, 96(r7)
325+ ; CHECK-BE-NEXT: stxvp vsp34, 64(r7)
326+ ; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
327+ ; CHECK-BE-NEXT: stxvp vsp36, 32(r7)
328+ ; CHECK-BE-NEXT: stxvp vsp34, 0(r7)
329+ ; CHECK-BE-NEXT: lxv v2, 0(r4)
330+ ; CHECK-BE-NEXT: lxv v3, 16(r4)
331+ ; CHECK-BE-NEXT: dmxxinstdmr256 dmrrowp1, vsp34, 1
332+ ; CHECK-BE-NEXT: dmxxextfdmr512 vsp36, vsp32, wacc_hi0, 1
333+ ; CHECK-BE-NEXT: stxvp vsp32, 96(r8)
334+ ; CHECK-BE-NEXT: stxvp vsp36, 64(r8)
335+ ; CHECK-BE-NEXT: dmxxextfdmr512 vsp36, vsp32, wacc0, 0
336+ ; CHECK-BE-NEXT: stxvp vsp32, 32(r8)
337+ ; CHECK-BE-NEXT: stxvp vsp36, 0(r8)
338+ ; CHECK-BE-NEXT: dmxxinstdmr256 dmrrowp2, vsp34, 2
339+ ; CHECK-BE-NEXT: dmxxextfdmr512 vsp36, vsp32, wacc_hi0, 1
340+ ; CHECK-BE-NEXT: stxvp vsp32, 96(r9)
341+ ; CHECK-BE-NEXT: stxvp vsp36, 64(r9)
342+ ; CHECK-BE-NEXT: dmxxextfdmr512 vsp36, vsp32, wacc0, 0
343+ ; CHECK-BE-NEXT: stxvp vsp32, 32(r9)
344+ ; CHECK-BE-NEXT: stxvp vsp36, 0(r9)
345+ ; CHECK-BE-NEXT: dmxxinstdmr256 dmrrowp3, vsp34, 3
346+ ; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc_hi0, 1
347+ ; CHECK-BE-NEXT: stxvp vsp36, 96(r10)
348+ ; CHECK-BE-NEXT: stxvp vsp34, 64(r10)
349+ ; CHECK-BE-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0
350+ ; CHECK-BE-NEXT: stxvp vsp36, 32(r10)
351+ ; CHECK-BE-NEXT: stxvp vsp34, 0(r10)
352+ ; CHECK-BE-NEXT: blr
353+ entry:
354+ %z = call <1024 x i1 > @llvm.ppc.mma.dmsetdmrz ()
355+ %l1 = load <256 x i1 >, ptr %vp1 , align 16
356+ %a = call <1024 x i1 > @llvm.ppc.mma.dmxxinstdmr256 (<1024 x i1 > %z , <256 x i1 > %l1 , i32 0 )
357+ store <1024 x i1 > %a , ptr %rp1 , align 16
358+ %l2 = load <256 x i1 >, ptr %vp2 , align 16
359+ %b = call <1024 x i1 > @llvm.ppc.mma.dmxxinstdmr256 (<1024 x i1 > %a , <256 x i1 > %l2 , i32 1 )
360+ store <1024 x i1 > %b , ptr %rp2 , align 16
361+ %l3 = load <256 x i1 >, ptr %vp3 , align 16
362+ %c = call <1024 x i1 > @llvm.ppc.mma.dmxxinstdmr256 (<1024 x i1 > %b , <256 x i1 > %l2 , i32 2 )
363+ store <1024 x i1 > %c , ptr %rp3 , align 16
364+ %l4 = load <256 x i1 >, ptr %vp4 , align 16
365+ %d = call <1024 x i1 > @llvm.ppc.mma.dmxxinstdmr256 (<1024 x i1 > %c , <256 x i1 > %l2 , i32 3 )
366+ store <1024 x i1 > %d , ptr %rp4 , align 16
367+ ret void
368+ }
369+
132370declare <1024 x i1 > @llvm.ppc.mma.dmsetdmrz ()
133371declare <1024 x i1 > @llvm.ppc.mma.dmmr (<1024 x i1 >)
134372declare <1024 x i1 > @llvm.ppc.mma.dmxor (<1024 x i1 >, <1024 x i1 >)
373+ declare <1024 x i1 > @llvm.ppc.mma.dmxxinstdmr512 (<1024 x i1 >, <256 x i1 >, <256 x i1 >, i32 )
374+ declare <1024 x i1 > @llvm.ppc.mma.dmxxinstdmr256 (<1024 x i1 >, <256 x i1 >, i32 )
375+ declare { <256 x i1 >, <256 x i1 > } @llvm.ppc.mma.dmxxextfdmr512 (<1024 x i1 >, i32 )
376+ declare <256 x i1 > @llvm.ppc.mma.dmxxextfdmr256 (<1024 x i1 >, i32 )
0 commit comments