11; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
22; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 < %s | FileCheck --check-prefix=GFX942 %s
33
4- define <2 x i32 > @masked_load_v2i32 (ptr addrspace (1 ) inreg nocapture readonly %ptr , i1 %mask ) {
5- ; GFX942-LABEL: masked_load_v2i32 :
4+ define <2 x i32 > @masked_load_ptr1_mask_v2i32 (ptr addrspace (1 ) inreg nocapture readonly %ptr , i1 %mask ) {
5+ ; GFX942-LABEL: masked_load_ptr1_mask_v2i32 :
66; GFX942: ; %bb.0: ; %entry
77; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
88; GFX942-NEXT: v_and_b32_e32 v0, 1, v0
@@ -18,14 +18,14 @@ define <2 x i32> @masked_load_v2i32(ptr addrspace(1) inreg nocapture readonly %p
1818; GFX942-NEXT: s_waitcnt vmcnt(0)
1919; GFX942-NEXT: s_setpc_b64 s[30:31]
2020entry:
21- %0 = insertelement <2 x i1 > poison, i1 %mask , i64 0
22- %1 = shufflevector <2 x i1 > %0 , <2 x i1 > poison, <2 x i32 > zeroinitializer
23- %result = tail call <2 x i32 > @llvm.masked.load.v2i32.p1 (ptr addrspace (1 ) %ptr , i32 2 , <2 x i1 > %1 , <2 x i32 > zeroinitializer )
21+ %partialmaskvec = insertelement <2 x i1 > poison, i1 %mask , i64 0
22+ %maskvec = shufflevector <2 x i1 > %partialmaskvec , <2 x i1 > poison, <2 x i32 > zeroinitializer
23+ %result = tail call <2 x i32 > @llvm.masked.load.v2i32.p1 (ptr addrspace (1 ) %ptr , i32 2 , <2 x i1 > %maskvec , <2 x i32 > zeroinitializer )
2424 ret <2 x i32 > %result
2525}
2626
27- define <4 x i32 > @masked_load_v4i32 (ptr addrspace (1 ) inreg nocapture readonly %ptr , i1 %mask ) {
28- ; GFX942-LABEL: masked_load_v4i32 :
27+ define <4 x i32 > @masked_load_ptr1_mask_v4i32 (ptr addrspace (1 ) inreg nocapture readonly %ptr , i1 %mask ) {
28+ ; GFX942-LABEL: masked_load_ptr1_mask_v4i32 :
2929; GFX942: ; %bb.0: ; %entry
3030; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3131; GFX942-NEXT: v_and_b32_e32 v0, 1, v0
@@ -43,14 +43,14 @@ define <4 x i32> @masked_load_v4i32(ptr addrspace(1) inreg nocapture readonly %p
4343; GFX942-NEXT: s_waitcnt vmcnt(0)
4444; GFX942-NEXT: s_setpc_b64 s[30:31]
4545entry:
46- %0 = insertelement <4 x i1 > poison, i1 %mask , i64 0
47- %1 = shufflevector <4 x i1 > %0 , <4 x i1 > poison, <4 x i32 > zeroinitializer
48- %result = tail call <4 x i32 > @llvm.masked.load.v4i32.p1 (ptr addrspace (1 ) %ptr , i32 4 , <4 x i1 > %1 , <4 x i32 > zeroinitializer )
46+ %partialmaskvec = insertelement <4 x i1 > poison, i1 %mask , i64 0
47+ %maskvec = shufflevector <4 x i1 > %partialmaskvec , <4 x i1 > poison, <4 x i32 > zeroinitializer
48+ %result = tail call <4 x i32 > @llvm.masked.load.v4i32.p1 (ptr addrspace (1 ) %ptr , i32 4 , <4 x i1 > %maskvec , <4 x i32 > zeroinitializer )
4949 ret <4 x i32 > %result
5050}
5151
52- define <4 x float > @masked_load_v4f32 (ptr addrspace (1 ) inreg nocapture readonly %ptr , i1 %mask ) {
53- ; GFX942-LABEL: masked_load_v4f32 :
52+ define <4 x float > @masked_load_ptr1_mask_v4f32 (ptr addrspace (1 ) inreg nocapture readonly %ptr , i1 %mask ) {
53+ ; GFX942-LABEL: masked_load_ptr1_mask_v4f32 :
5454; GFX942: ; %bb.0: ; %entry
5555; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5656; GFX942-NEXT: v_and_b32_e32 v0, 1, v0
@@ -68,14 +68,14 @@ define <4 x float> @masked_load_v4f32(ptr addrspace(1) inreg nocapture readonly
6868; GFX942-NEXT: s_waitcnt vmcnt(0)
6969; GFX942-NEXT: s_setpc_b64 s[30:31]
7070entry:
71- %0 = insertelement <4 x i1 > poison, i1 %mask , i64 0
72- %1 = shufflevector <4 x i1 > %0 , <4 x i1 > poison, <4 x i32 > zeroinitializer
73- %result = tail call <4 x float > @llvm.masked.load.v4f32.p1 (ptr addrspace (1 ) %ptr , i32 4 , <4 x i1 > %1 , <4 x float > zeroinitializer )
71+ %partialmaskvec = insertelement <4 x i1 > poison, i1 %mask , i64 0
72+ %maskvec = shufflevector <4 x i1 > %partialmaskvec , <4 x i1 > poison, <4 x i32 > zeroinitializer
73+ %result = tail call <4 x float > @llvm.masked.load.v4f32.p1 (ptr addrspace (1 ) %ptr , i32 4 , <4 x i1 > %maskvec , <4 x float > zeroinitializer )
7474 ret <4 x float > %result
7575}
7676
77- define <8 x i32 > @masked_load_v8i32 (ptr addrspace (1 ) inreg nocapture readonly %ptr , i1 %mask ) {
78- ; GFX942-LABEL: masked_load_v8i32 :
77+ define <8 x i32 > @masked_load_ptr1_mask_v8i32 (ptr addrspace (1 ) inreg nocapture readonly %ptr , i1 %mask ) {
78+ ; GFX942-LABEL: masked_load_ptr1_mask_v8i32 :
7979; GFX942: ; %bb.0: ; %entry
8080; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8181; GFX942-NEXT: v_and_b32_e32 v0, 1, v0
@@ -99,14 +99,14 @@ define <8 x i32> @masked_load_v8i32(ptr addrspace(1) inreg nocapture readonly %p
9999; GFX942-NEXT: s_waitcnt vmcnt(0)
100100; GFX942-NEXT: s_setpc_b64 s[30:31]
101101entry:
102- %0 = insertelement <8 x i1 > poison, i1 %mask , i64 0
103- %1 = shufflevector <8 x i1 > %0 , <8 x i1 > poison, <8 x i32 > zeroinitializer
104- %result = tail call <8 x i32 > @llvm.masked.load.v8i32.p1 (ptr addrspace (1 ) %ptr , i32 4 , <8 x i1 > %1 , <8 x i32 > zeroinitializer )
102+ %partialmaskvec = insertelement <8 x i1 > poison, i1 %mask , i64 0
103+ %maskvec = shufflevector <8 x i1 > %partialmaskvec , <8 x i1 > poison, <8 x i32 > zeroinitializer
104+ %result = tail call <8 x i32 > @llvm.masked.load.v8i32.p1 (ptr addrspace (1 ) %ptr , i32 4 , <8 x i1 > %maskvec , <8 x i32 > zeroinitializer )
105105 ret <8 x i32 > %result
106106}
107107
108- define <8 x float > @masked_load_v8f32 (ptr addrspace (1 ) inreg nocapture readonly %ptr , i1 %mask ) {
109- ; GFX942-LABEL: masked_load_v8f32 :
108+ define <8 x float > @masked_load_ptr1_mask_v8f32 (ptr addrspace (1 ) inreg nocapture readonly %ptr , i1 %mask ) {
109+ ; GFX942-LABEL: masked_load_ptr1_mask_v8f32 :
110110; GFX942: ; %bb.0: ; %entry
111111; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
112112; GFX942-NEXT: v_and_b32_e32 v0, 1, v0
@@ -130,14 +130,14 @@ define <8 x float> @masked_load_v8f32(ptr addrspace(1) inreg nocapture readonly
130130; GFX942-NEXT: s_waitcnt vmcnt(0)
131131; GFX942-NEXT: s_setpc_b64 s[30:31]
132132entry:
133- %0 = insertelement <8 x i1 > poison, i1 %mask , i64 0
134- %1 = shufflevector <8 x i1 > %0 , <8 x i1 > poison, <8 x i32 > zeroinitializer
135- %result = tail call <8 x float > @llvm.masked.load.v8f32.p1 (ptr addrspace (1 ) %ptr , i32 4 , <8 x i1 > %1 , <8 x float > zeroinitializer )
133+ %partialmaskvec = insertelement <8 x i1 > poison, i1 %mask , i64 0
134+ %maskvec = shufflevector <8 x i1 > %partialmaskvec , <8 x i1 > poison, <8 x i32 > zeroinitializer
135+ %result = tail call <8 x float > @llvm.masked.load.v8f32.p1 (ptr addrspace (1 ) %ptr , i32 4 , <8 x i1 > %maskvec , <8 x float > zeroinitializer )
136136 ret <8 x float > %result
137137}
138138
139- define <8 x i16 > @masked_load_v8i16 (ptr addrspace (1 ) inreg nocapture readonly %ptr , i1 %mask ) {
140- ; GFX942-LABEL: masked_load_v8i16 :
139+ define <8 x i16 > @masked_load_ptr1_mask_v8i16 (ptr addrspace (1 ) inreg nocapture readonly %ptr , i1 %mask ) {
140+ ; GFX942-LABEL: masked_load_ptr1_mask_v8i16 :
141141; GFX942: ; %bb.0: ; %entry
142142; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
143143; GFX942-NEXT: v_and_b32_e32 v0, 1, v0
@@ -155,14 +155,14 @@ define <8 x i16> @masked_load_v8i16(ptr addrspace(1) inreg nocapture readonly %p
155155; GFX942-NEXT: s_waitcnt vmcnt(0)
156156; GFX942-NEXT: s_setpc_b64 s[30:31]
157157entry:
158- %0 = insertelement <8 x i1 > poison, i1 %mask , i16 0
159- %1 = shufflevector <8 x i1 > %0 , <8 x i1 > poison, <8 x i32 > zeroinitializer
160- %result = tail call <8 x i16 > @llvm.masked.load.v8i16.p1 (ptr addrspace (1 ) %ptr , i32 4 , <8 x i1 > %1 , <8 x i16 > zeroinitializer )
158+ %partialmaskvec = insertelement <8 x i1 > poison, i1 %mask , i16 0
159+ %maskvec = shufflevector <8 x i1 > %partialmaskvec , <8 x i1 > poison, <8 x i32 > zeroinitializer
160+ %result = tail call <8 x i16 > @llvm.masked.load.v8i16.p1 (ptr addrspace (1 ) %ptr , i32 4 , <8 x i1 > %maskvec , <8 x i16 > zeroinitializer )
161161 ret <8 x i16 > %result
162162}
163163
164- define <8 x half > @masked_load_v8f16 (ptr addrspace (1 ) inreg nocapture readonly %ptr , i1 %mask ) {
165- ; GFX942-LABEL: masked_load_v8f16 :
164+ define <8 x half > @masked_load_ptr1_mask_v8f16 (ptr addrspace (1 ) inreg nocapture readonly %ptr , i1 %mask ) {
165+ ; GFX942-LABEL: masked_load_ptr1_mask_v8f16 :
166166; GFX942: ; %bb.0: ; %entry
167167; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
168168; GFX942-NEXT: v_and_b32_e32 v0, 1, v0
@@ -180,14 +180,14 @@ define <8 x half> @masked_load_v8f16(ptr addrspace(1) inreg nocapture readonly %
180180; GFX942-NEXT: s_waitcnt vmcnt(0)
181181; GFX942-NEXT: s_setpc_b64 s[30:31]
182182entry:
183- %0 = insertelement <8 x i1 > poison, i1 %mask , i16 0
184- %1 = shufflevector <8 x i1 > %0 , <8 x i1 > poison, <8 x i32 > zeroinitializer
185- %result = tail call <8 x half > @llvm.masked.load.v8f16.p1 (ptr addrspace (1 ) %ptr , i32 4 , <8 x i1 > %1 , <8 x half > zeroinitializer )
183+ %partialmaskvec = insertelement <8 x i1 > poison, i1 %mask , i16 0
184+ %maskvec = shufflevector <8 x i1 > %partialmaskvec , <8 x i1 > poison, <8 x i32 > zeroinitializer
185+ %result = tail call <8 x half > @llvm.masked.load.v8f16.p1 (ptr addrspace (1 ) %ptr , i32 4 , <8 x i1 > %maskvec , <8 x half > zeroinitializer )
186186 ret <8 x half > %result
187187}
188188
189- define <8 x bfloat> @masked_load_v8bf16 (ptr addrspace (1 ) inreg nocapture readonly %ptr , i1 %mask ) {
190- ; GFX942-LABEL: masked_load_v8bf16 :
189+ define <8 x bfloat> @masked_load_ptr1_mask_v8bf16 (ptr addrspace (1 ) inreg nocapture readonly %ptr , i1 %mask ) {
190+ ; GFX942-LABEL: masked_load_ptr1_mask_v8bf16 :
191191; GFX942: ; %bb.0: ; %entry
192192; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
193193; GFX942-NEXT: v_and_b32_e32 v0, 1, v0
@@ -205,14 +205,14 @@ define <8 x bfloat> @masked_load_v8bf16(ptr addrspace(1) inreg nocapture readonl
205205; GFX942-NEXT: s_waitcnt vmcnt(0)
206206; GFX942-NEXT: s_setpc_b64 s[30:31]
207207entry:
208- %0 = insertelement <8 x i1 > poison, i1 %mask , i32 0
209- %1 = shufflevector <8 x i1 > %0 , <8 x i1 > poison, <8 x i32 > zeroinitializer
210- %result = tail call <8 x bfloat> @llvm.masked.load.v8bf16.p1 (ptr addrspace (1 ) %ptr , i32 4 , <8 x i1 > %1 , <8 x bfloat> zeroinitializer )
208+ %partialmaskvec = insertelement <8 x i1 > poison, i1 %mask , i32 0
209+ %maskvec = shufflevector <8 x i1 > %partialmaskvec , <8 x i1 > poison, <8 x i32 > zeroinitializer
210+ %result = tail call <8 x bfloat> @llvm.masked.load.v8bf16.p1 (ptr addrspace (1 ) %ptr , i32 4 , <8 x i1 > %maskvec , <8 x bfloat> zeroinitializer )
211211 ret <8 x bfloat> %result
212212}
213213
214- define <16 x i8 > @masked_load_v16i8 (ptr addrspace (1 ) inreg nocapture readonly %ptr , i1 %mask ) {
215- ; GFX942-LABEL: masked_load_v16i8 :
214+ define <16 x i8 > @masked_load_ptr1_mask_v16i8 (ptr addrspace (1 ) inreg nocapture readonly %ptr , i1 %mask ) {
215+ ; GFX942-LABEL: masked_load_ptr1_mask_v16i8 :
216216; GFX942: ; %bb.0: ; %entry
217217; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
218218; GFX942-NEXT: v_and_b32_e32 v0, 1, v0
@@ -248,8 +248,8 @@ define <16 x i8> @masked_load_v16i8(ptr addrspace(1) inreg nocapture readonly %p
248248; GFX942-NEXT: v_mov_b32_e32 v12, v19
249249; GFX942-NEXT: s_setpc_b64 s[30:31]
250250entry:
251- %0 = insertelement <16 x i1 > poison, i1 %mask , i32 0
252- %1 = shufflevector <16 x i1 > %0 , <16 x i1 > poison, <16 x i32 > zeroinitializer
253- %result = tail call <16 x i8 > @llvm.masked.load.v16i8.p1 (ptr addrspace (1 ) %ptr , i32 4 , <16 x i1 > %1 , <16 x i8 > zeroinitializer )
251+ %partialmaskvec = insertelement <16 x i1 > poison, i1 %mask , i32 0
252+ %maskvec = shufflevector <16 x i1 > %partialmaskvec , <16 x i1 > poison, <16 x i32 > zeroinitializer
253+ %result = tail call <16 x i8 > @llvm.masked.load.v16i8.p1 (ptr addrspace (1 ) %ptr , i32 4 , <16 x i1 > %maskvec , <16 x i8 > zeroinitializer )
254254 ret <16 x i8 > %result
255255}
0 commit comments