@@ -153,6 +153,154 @@ void test_pmdmxvbf16gerx2pp(unsigned char *vdmrp, unsigned char *vpp, vector uns
153153 * ((__dmr1024 * )resp ) = vdmr ;
154154}
155155
156+ // CHECK-LABEL: void @test_dmxvf16gerx2(
157+ // CHECK-NEXT: [[ENTRY:.*:]]
158+ // CHECK-NEXT: [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA2:![0-9]+]]
159+ // CHECK-NEXT: [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxvf16gerx2(<256 x i1> [[TMP0]], <16 x i8> [[VC:%.*]])
160+ // CHECK-NEXT: store <1024 x i1> [[TMP1]], ptr [[RESP:%.*]], align 128, !tbaa [[TBAA6:![0-9]+]]
161+ // CHECK-NEXT: ret void
162+ //
163+ void test_dmxvf16gerx2 (unsigned char * vdmrp , unsigned char * vpp , vector unsigned char vc , unsigned char * resp ) {
164+ __dmr1024 vdmr = * ((__dmr1024 * )vdmrp );
165+ __vector_pair vp = * ((__vector_pair * )vpp );
166+ __builtin_mma_dmxvf16gerx2 (& vdmr , vp , vc );
167+ * ((__dmr1024 * )resp ) = vdmr ;
168+ }
169+
170+ // CHECK-LABEL: void @test_dmxvf16gerx2nn(
171+ // CHECK-NEXT: [[ENTRY:.*:]]
172+ // CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP:%.*]], align 128, !tbaa [[TBAA6]]
173+ // CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA2]]
174+ // CHECK-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxvf16gerx2nn(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]])
175+ // CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP:%.*]], align 128, !tbaa [[TBAA6]]
176+ // CHECK-NEXT: ret void
177+ //
178+ void test_dmxvf16gerx2nn (unsigned char * vdmrp , unsigned char * vpp , vector unsigned char vc , unsigned char * resp ) {
179+ __dmr1024 vdmr = * ((__dmr1024 * )vdmrp );
180+ __vector_pair vp = * ((__vector_pair * )vpp );
181+ __builtin_mma_dmxvf16gerx2nn (& vdmr , vp , vc );
182+ * ((__dmr1024 * )resp ) = vdmr ;
183+ }
184+
185+ // CHECK-LABEL: void @test_dmxvf16gerx2np(
186+ // CHECK-NEXT: [[ENTRY:.*:]]
187+ // CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP:%.*]], align 128, !tbaa [[TBAA6]]
188+ // CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA2]]
189+ // CHECK-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxvf16gerx2np(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]])
190+ // CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP:%.*]], align 128, !tbaa [[TBAA6]]
191+ // CHECK-NEXT: ret void
192+ //
193+ void test_dmxvf16gerx2np (unsigned char * vdmrp , unsigned char * vpp , vector unsigned char vc , unsigned char * resp ) {
194+ __dmr1024 vdmr = * ((__dmr1024 * )vdmrp );
195+ __vector_pair vp = * ((__vector_pair * )vpp );
196+ __builtin_mma_dmxvf16gerx2np (& vdmr , vp , vc );
197+ * ((__dmr1024 * )resp ) = vdmr ;
198+ }
199+
200+ // CHECK-LABEL: void @test_dmxvf16gerx2pn(
201+ // CHECK-NEXT: [[ENTRY:.*:]]
202+ // CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP:%.*]], align 128, !tbaa [[TBAA6]]
203+ // CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA2]]
204+ // CHECK-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxvf16gerx2pn(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]])
205+ // CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP:%.*]], align 128, !tbaa [[TBAA6]]
206+ // CHECK-NEXT: ret void
207+ //
208+ void test_dmxvf16gerx2pn (unsigned char * vdmrp , unsigned char * vpp , vector unsigned char vc , unsigned char * resp ) {
209+ __dmr1024 vdmr = * ((__dmr1024 * )vdmrp );
210+ __vector_pair vp = * ((__vector_pair * )vpp );
211+ __builtin_mma_dmxvf16gerx2pn (& vdmr , vp , vc );
212+ * ((__dmr1024 * )resp ) = vdmr ;
213+ }
214+
215+ // CHECK-LABEL: void @test_dmxvf16gerx2pp(
216+ // CHECK-NEXT: [[ENTRY:.*:]]
217+ // CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP:%.*]], align 128, !tbaa [[TBAA6]]
218+ // CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA2]]
219+ // CHECK-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.dmxvf16gerx2pp(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]])
220+ // CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP:%.*]], align 128, !tbaa [[TBAA6]]
221+ // CHECK-NEXT: ret void
222+ //
223+ void test_dmxvf16gerx2pp (unsigned char * vdmrp , unsigned char * vpp , vector unsigned char vc , unsigned char * resp ) {
224+ __dmr1024 vdmr = * ((__dmr1024 * )vdmrp );
225+ __vector_pair vp = * ((__vector_pair * )vpp );
226+ __builtin_mma_dmxvf16gerx2pp (& vdmr , vp , vc );
227+ * ((__dmr1024 * )resp ) = vdmr ;
228+ }
229+
230+ // CHECK-LABEL: void @test_pmdmxvf16gerx2(
231+ // CHECK-NEXT: [[ENTRY:.*:]]
232+ // CHECK-NEXT: [[TMP0:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA2]]
233+ // CHECK-NEXT: [[TMP1:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvf16gerx2(<256 x i1> [[TMP0]], <16 x i8> [[VC:%.*]], i32 0, i32 0, i32 0)
234+ // CHECK-NEXT: store <1024 x i1> [[TMP1]], ptr [[RESP:%.*]], align 128, !tbaa [[TBAA6]]
235+ // CHECK-NEXT: ret void
236+ //
237+ void test_pmdmxvf16gerx2 (unsigned char * vdmrp , unsigned char * vpp , vector unsigned char vc , unsigned char * resp ) {
238+ __dmr1024 vdmr = * ((__dmr1024 * )vdmrp );
239+ __vector_pair vp = * ((__vector_pair * )vpp );
240+ __builtin_mma_pmdmxvf16gerx2 (& vdmr , vp , vc , 0 , 0 , 0 );
241+ * ((__dmr1024 * )resp ) = vdmr ;
242+ }
243+
244+ // CHECK-LABEL: void @test_pmdmxvf16gerx2nn(
245+ // CHECK-NEXT: [[ENTRY:.*:]]
246+ // CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP:%.*]], align 128, !tbaa [[TBAA6]]
247+ // CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA2]]
248+ // CHECK-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvf16gerx2nn(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], i32 0, i32 0, i32 0)
249+ // CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP:%.*]], align 128, !tbaa [[TBAA6]]
250+ // CHECK-NEXT: ret void
251+ //
252+ void test_pmdmxvf16gerx2nn (unsigned char * vdmrp , unsigned char * vpp , vector unsigned char vc , unsigned char * resp ) {
253+ __dmr1024 vdmr = * ((__dmr1024 * )vdmrp );
254+ __vector_pair vp = * ((__vector_pair * )vpp );
255+ __builtin_mma_pmdmxvf16gerx2nn (& vdmr , vp , vc , 0 , 0 , 0 );
256+ * ((__dmr1024 * )resp ) = vdmr ;
257+ }
258+
259+ // CHECK-LABEL: void @test_pmdmxvf16gerx2np(
260+ // CHECK-NEXT: [[ENTRY:.*:]]
261+ // CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP:%.*]], align 128, !tbaa [[TBAA6]]
262+ // CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA2]]
263+ // CHECK-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvf16gerx2np(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], i32 0, i32 0, i32 0)
264+ // CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP:%.*]], align 128, !tbaa [[TBAA6]]
265+ // CHECK-NEXT: ret void
266+ //
267+ void test_pmdmxvf16gerx2np (unsigned char * vdmrp , unsigned char * vpp , vector unsigned char vc , unsigned char * resp ) {
268+ __dmr1024 vdmr = * ((__dmr1024 * )vdmrp );
269+ __vector_pair vp = * ((__vector_pair * )vpp );
270+ __builtin_mma_pmdmxvf16gerx2np (& vdmr , vp , vc , 0 , 0 , 0 );
271+ * ((__dmr1024 * )resp ) = vdmr ;
272+ }
273+
274+ // CHECK-LABEL: void @test_pmdmxvf16gerx2pn(
275+ // CHECK-NEXT: [[ENTRY:.*:]]
276+ // CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP:%.*]], align 128, !tbaa [[TBAA6]]
277+ // CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA2]]
278+ // CHECK-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvf16gerx2pn(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], i32 0, i32 0, i32 0)
279+ // CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP:%.*]], align 128, !tbaa [[TBAA6]]
280+ // CHECK-NEXT: ret void
281+ //
282+ void test_pmdmxvf16gerx2pn (unsigned char * vdmrp , unsigned char * vpp , vector unsigned char vc , unsigned char * resp ) {
283+ __dmr1024 vdmr = * ((__dmr1024 * )vdmrp );
284+ __vector_pair vp = * ((__vector_pair * )vpp );
285+ __builtin_mma_pmdmxvf16gerx2pn (& vdmr , vp , vc , 0 , 0 , 0 );
286+ * ((__dmr1024 * )resp ) = vdmr ;
287+ }
288+
289+ // CHECK-LABEL: void @test_pmdmxvf16gerx2pp(
290+ // CHECK-NEXT: [[ENTRY:.*:]]
291+ // CHECK-NEXT: [[TMP0:%.*]] = load <1024 x i1>, ptr [[VDMRP:%.*]], align 128, !tbaa [[TBAA6]]
292+ // CHECK-NEXT: [[TMP1:%.*]] = load <256 x i1>, ptr [[VPP:%.*]], align 32, !tbaa [[TBAA2]]
293+ // CHECK-NEXT: [[TMP2:%.*]] = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvf16gerx2pp(<1024 x i1> [[TMP0]], <256 x i1> [[TMP1]], <16 x i8> [[VC:%.*]], i32 0, i32 0, i32 0)
294+ // CHECK-NEXT: store <1024 x i1> [[TMP2]], ptr [[RESP:%.*]], align 128, !tbaa [[TBAA6]]
295+ // CHECK-NEXT: ret void
296+ //
297+ void test_pmdmxvf16gerx2pp (unsigned char * vdmrp , unsigned char * vpp , vector unsigned char vc , unsigned char * resp ) {
298+ __dmr1024 vdmr = * ((__dmr1024 * )vdmrp );
299+ __vector_pair vp = * ((__vector_pair * )vpp );
300+ __builtin_mma_pmdmxvf16gerx2pp (& vdmr , vp , vc , 0 , 0 , 0 );
301+ * ((__dmr1024 * )resp ) = vdmr ;
302+ }
303+
156304// CHECK: [[TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0}
157305// CHECK: [[META3]] = !{!"__vector_pair", [[META4:![0-9]+]], i64 0}
158306// CHECK: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0}
0 commit comments