@@ -294,6 +294,56 @@ define amdgpu_kernel void @i64_2d_load_store_subvec_3_i64_offset(ptr %out) {
294294 ret void
295295}
296296
297+ define amdgpu_kernel void @i64_2d_load_store_subvec_3_i64_offset_index (ptr %out ) {
298+ ; CHECK-LABEL: define amdgpu_kernel void @i64_2d_load_store_subvec_3_i64_offset_index(
299+ ; CHECK-SAME: ptr [[OUT:%.*]]) {
300+ ; CHECK-NEXT: [[X:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x()
301+ ; CHECK-NEXT: [[Y:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.y()
302+ ; CHECK-NEXT: [[C1:%.*]] = icmp uge i32 [[X]], 3
303+ ; CHECK-NEXT: [[C2:%.*]] = icmp uge i32 [[Y]], 3
304+ ; CHECK-NEXT: [[SEL1:%.*]] = select i1 [[C1]], i32 1, i32 2
305+ ; CHECK-NEXT: [[SEL2:%.*]] = select i1 [[C2]], i32 0, i32 [[SEL1]]
306+ ; CHECK-NEXT: [[SEL3:%.*]] = zext i32 [[SEL2]] to i64
307+ ; CHECK-NEXT: [[ALLOCA:%.*]] = freeze <6 x i64> poison
308+ ; CHECK-NEXT: [[TMP11:%.*]] = insertelement <6 x i64> [[ALLOCA]], i64 0, i32 0
309+ ; CHECK-NEXT: [[TMP12:%.*]] = insertelement <6 x i64> [[TMP11]], i64 1, i32 1
310+ ; CHECK-NEXT: [[TMP13:%.*]] = insertelement <6 x i64> [[TMP12]], i64 2, i32 2
311+ ; CHECK-NEXT: [[TMP14:%.*]] = insertelement <6 x i64> [[TMP13]], i64 3, i32 3
312+ ; CHECK-NEXT: [[TMP15:%.*]] = insertelement <6 x i64> [[TMP14]], i64 4, i32 4
313+ ; CHECK-NEXT: [[TMP16:%.*]] = insertelement <6 x i64> [[TMP15]], i64 5, i32 5
314+ ; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[SEL3]], 3
315+ ; CHECK-NEXT: [[TMP2:%.*]] = add i64 6, [[TMP1]]
316+ ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <6 x i64> [[TMP16]], i64 [[TMP2]]
317+ ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <3 x i64> poison, i64 [[TMP3]], i64 0
318+ ; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[TMP2]], 1
319+ ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <6 x i64> [[TMP16]], i64 [[TMP5]]
320+ ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <3 x i64> [[TMP4]], i64 [[TMP6]], i64 1
321+ ; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[TMP2]], 2
322+ ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <6 x i64> [[TMP16]], i64 [[TMP8]]
323+ ; CHECK-NEXT: [[TMP10:%.*]] = insertelement <3 x i64> [[TMP7]], i64 [[TMP9]], i64 2
324+ ; CHECK-NEXT: [[ELEM:%.*]] = extractelement <3 x i64> [[TMP10]], i32 2
325+ ; CHECK-NEXT: store i64 [[ELEM]], ptr [[OUT]], align 8
326+ ; CHECK-NEXT: ret void
327+ ;
328+ %x = tail call i32 @llvm.amdgcn.workitem.id.x ()
329+ %y = tail call i32 @llvm.amdgcn.workitem.id.y ()
330+ %c1 = icmp uge i32 %x , 3
331+ %c2 = icmp uge i32 %y , 3
332+ %sel1 = select i1 %c1 , i32 1 , i32 2
333+ %sel2 = select i1 %c2 , i32 0 , i32 %sel1
334+ %sel3 = zext i32 %sel2 to i64
335+ %alloca = alloca [2 x [3 x i64 ]], align 16 , addrspace (5 )
336+ %gep.00 = getelementptr inbounds [2 x [3 x i64 ]], ptr addrspace (5 ) %alloca , i32 0
337+ %gep.01 = getelementptr inbounds [2 x [3 x i64 ]], ptr addrspace (5 ) %alloca , i32 0 , i32 1 , i32 0
338+ store <3 x i64 > <i64 0 , i64 1 , i64 2 >, ptr addrspace (5 ) %gep.00
339+ store <3 x i64 > <i64 3 , i64 4 , i64 5 >, ptr addrspace (5 ) %gep.01
340+ %gep = getelementptr inbounds [2 x [3 x i64 ]], ptr addrspace (5 ) %alloca , i64 1 , i64 %sel3
341+ %load = load <3 x i64 >, ptr addrspace (5 ) %gep
342+ %elem = extractelement <3 x i64 > %load , i32 2
343+ store i64 %elem , ptr %out
344+ ret void
345+ }
346+
297347define amdgpu_kernel void @i64_2d_load_store_subvec_4 (ptr %out ) {
298348; CHECK-LABEL: define amdgpu_kernel void @i64_2d_load_store_subvec_4(
299349; CHECK-SAME: ptr [[OUT:%.*]]) {
0 commit comments