@@ -240,6 +240,49 @@ define amdgpu_kernel void @i64_2d_load_store_subvec_3_i64_offset(ptr %out) {
240240 ret void
241241}
242242
243+ define amdgpu_kernel void @i64_2d_load_store_subvec_3_i64_offset_index (ptr %out ) {
244+ ; CHECK-LABEL: define amdgpu_kernel void @i64_2d_load_store_subvec_3_i64_offset_index(
245+ ; CHECK-SAME: ptr [[OUT:%.*]]) {
246+ ; CHECK-NEXT: [[X:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x()
247+ ; CHECK-NEXT: [[Y:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.y()
248+ ; CHECK-NEXT: [[C1:%.*]] = icmp uge i32 [[X]], 3
249+ ; CHECK-NEXT: [[C2:%.*]] = icmp uge i32 [[Y]], 3
250+ ; CHECK-NEXT: [[SEL1:%.*]] = select i1 [[C1]], i32 1, i32 2
251+ ; CHECK-NEXT: [[SEL2:%.*]] = select i1 [[C2]], i32 0, i32 [[SEL1]]
252+ ; CHECK-NEXT: [[SEL3:%.*]] = zext i32 [[SEL2]] to i64
253+ ; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[SEL3]], 3
254+ ; CHECK-NEXT: [[TMP2:%.*]] = add i64 6, [[TMP1]]
255+ ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <6 x i64> <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5>, i64 [[TMP2]]
256+ ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <3 x i64> poison, i64 [[TMP3]], i64 0
257+ ; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[TMP2]], 1
258+ ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <6 x i64> <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5>, i64 [[TMP5]]
259+ ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <3 x i64> [[TMP4]], i64 [[TMP6]], i64 1
260+ ; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[TMP2]], 2
261+ ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <6 x i64> <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5>, i64 [[TMP8]]
262+ ; CHECK-NEXT: [[TMP10:%.*]] = insertelement <3 x i64> [[TMP7]], i64 [[TMP9]], i64 2
263+ ; CHECK-NEXT: [[ELEM:%.*]] = extractelement <3 x i64> [[TMP10]], i32 2
264+ ; CHECK-NEXT: store i64 [[ELEM]], ptr [[OUT]], align 8
265+ ; CHECK-NEXT: ret void
266+ ;
267+ %x = tail call i32 @llvm.amdgcn.workitem.id.x ()
268+ %y = tail call i32 @llvm.amdgcn.workitem.id.y ()
269+ %c1 = icmp uge i32 %x , 3
270+ %c2 = icmp uge i32 %y , 3
271+ %sel1 = select i1 %c1 , i32 1 , i32 2
272+ %sel2 = select i1 %c2 , i32 0 , i32 %sel1
273+ %sel3 = zext i32 %sel2 to i64
274+ %alloca = alloca [2 x [3 x i64 ]], align 16 , addrspace (5 )
275+ %gep.00 = getelementptr inbounds [2 x [3 x i64 ]], ptr addrspace (5 ) %alloca , i32 0
276+ %gep.01 = getelementptr inbounds [2 x [3 x i64 ]], ptr addrspace (5 ) %alloca , i32 0 , i32 1 , i32 0
277+ store <3 x i64 > <i64 0 , i64 1 , i64 2 >, ptr addrspace (5 ) %gep.00
278+ store <3 x i64 > <i64 3 , i64 4 , i64 5 >, ptr addrspace (5 ) %gep.01
279+ %gep = getelementptr inbounds [2 x [3 x i64 ]], ptr addrspace (5 ) %alloca , i64 1 , i64 %sel3
280+ %load = load <3 x i64 >, ptr addrspace (5 ) %gep
281+ %elem = extractelement <3 x i64 > %load , i32 2
282+ store i64 %elem , ptr %out
283+ ret void
284+ }
285+
243286define amdgpu_kernel void @i64_2d_load_store_subvec_4 (ptr %out ) {
244287; CHECK-LABEL: define amdgpu_kernel void @i64_2d_load_store_subvec_4(
245288; CHECK-SAME: ptr [[OUT:%.*]]) {
0 commit comments