@@ -288,5 +288,104 @@ define amdgpu_kernel void @i32_3d_load_store(ptr %out) {
288288 ret void
289289}
290290
291+ define amdgpu_kernel void @i16_2d_load_store (ptr %out , i32 %sel ) {
292+ ; CHECK-LABEL: define amdgpu_kernel void @i16_2d_load_store(
293+ ; CHECK-SAME: ptr [[OUT:%.*]], i32 [[SEL:%.*]]) {
294+ ; CHECK-NEXT: [[TMP1:%.*]] = add i32 3, [[SEL]]
295+ ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <6 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5>, i32 [[TMP1]]
296+ ; CHECK-NEXT: store i16 [[TMP2]], ptr [[OUT]], align 2
297+ ; CHECK-NEXT: ret void
298+ ;
299+ %alloca = alloca [2 x [3 x i16 ]], align 16 , addrspace (5 )
300+ %gep.00 = getelementptr inbounds [2 x [3 x i16 ]], ptr addrspace (5 ) %alloca , i32 0 , i32 0 , i32 0
301+ %gep.01 = getelementptr inbounds [2 x [3 x i16 ]], ptr addrspace (5 ) %alloca , i32 0 , i32 0 , i32 1
302+ %gep.02 = getelementptr inbounds [2 x [3 x i16 ]], ptr addrspace (5 ) %alloca , i32 0 , i32 0 , i32 2
303+ %gep.10 = getelementptr inbounds [2 x [3 x i16 ]], ptr addrspace (5 ) %alloca , i32 0 , i32 1 , i32 0
304+ %gep.11 = getelementptr inbounds [2 x [3 x i16 ]], ptr addrspace (5 ) %alloca , i32 0 , i32 1 , i32 1
305+ %gep.12 = getelementptr inbounds [2 x [3 x i16 ]], ptr addrspace (5 ) %alloca , i32 0 , i32 1 , i32 2
306+ store i16 0 , ptr addrspace (5 ) %gep.00
307+ store i16 1 , ptr addrspace (5 ) %gep.01
308+ store i16 2 , ptr addrspace (5 ) %gep.02
309+ store i16 3 , ptr addrspace (5 ) %gep.10
310+ store i16 4 , ptr addrspace (5 ) %gep.11
311+ store i16 5 , ptr addrspace (5 ) %gep.12
312+ %gep = getelementptr inbounds [2 x [3 x i16 ]], ptr addrspace (5 ) %alloca , i32 0 , i32 1 , i32 %sel
313+ %load = load i16 , ptr addrspace (5 ) %gep
314+ store i16 %load , ptr %out
315+ ret void
316+ }
317+
318+ define amdgpu_kernel void @float_2d_load_store (ptr %out , i32 %sel ) {
319+ ; CHECK-LABEL: define amdgpu_kernel void @float_2d_load_store(
320+ ; CHECK-SAME: ptr [[OUT:%.*]], i32 [[SEL:%.*]]) {
321+ ; CHECK-NEXT: [[TMP1:%.*]] = add i32 3, [[SEL]]
322+ ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <6 x float> <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00, float 5.000000e+00>, i32 [[TMP1]]
323+ ; CHECK-NEXT: store float [[TMP2]], ptr [[OUT]], align 4
324+ ; CHECK-NEXT: ret void
325+ ;
326+ %alloca = alloca [2 x [3 x float ]], align 16 , addrspace (5 )
327+ %gep.00 = getelementptr inbounds [2 x [3 x float ]], ptr addrspace (5 ) %alloca , i32 0 , i32 0 , i32 0
328+ %gep.01 = getelementptr inbounds [2 x [3 x float ]], ptr addrspace (5 ) %alloca , i32 0 , i32 0 , i32 1
329+ %gep.02 = getelementptr inbounds [2 x [3 x float ]], ptr addrspace (5 ) %alloca , i32 0 , i32 0 , i32 2
330+ %gep.10 = getelementptr inbounds [2 x [3 x float ]], ptr addrspace (5 ) %alloca , i32 0 , i32 1 , i32 0
331+ %gep.11 = getelementptr inbounds [2 x [3 x float ]], ptr addrspace (5 ) %alloca , i32 0 , i32 1 , i32 1
332+ %gep.12 = getelementptr inbounds [2 x [3 x float ]], ptr addrspace (5 ) %alloca , i32 0 , i32 1 , i32 2
333+ store float 0 .0 , ptr addrspace (5 ) %gep.00
334+ store float 1 .0 , ptr addrspace (5 ) %gep.01
335+ store float 2 .0 , ptr addrspace (5 ) %gep.02
336+ store float 3 .0 , ptr addrspace (5 ) %gep.10
337+ store float 4 .0 , ptr addrspace (5 ) %gep.11
338+ store float 5 .0 , ptr addrspace (5 ) %gep.12
339+ %gep = getelementptr inbounds [2 x [3 x float ]], ptr addrspace (5 ) %alloca , i32 0 , i32 1 , i32 %sel
340+ %load = load float , ptr addrspace (5 ) %gep
341+ store float %load , ptr %out
342+ ret void
343+ }
344+
345+ define amdgpu_kernel void @ptr_2d_load_store (ptr %out , i32 %sel ) {
346+ ; CHECK-LABEL: define amdgpu_kernel void @ptr_2d_load_store(
347+ ; CHECK-SAME: ptr [[OUT:%.*]], i32 [[SEL:%.*]]) {
348+ ; CHECK-NEXT: [[PTR_0:%.*]] = getelementptr inbounds ptr, ptr [[OUT]], i32 0
349+ ; CHECK-NEXT: [[PTR_1:%.*]] = getelementptr inbounds ptr, ptr [[OUT]], i32 1
350+ ; CHECK-NEXT: [[PTR_2:%.*]] = getelementptr inbounds ptr, ptr [[OUT]], i32 2
351+ ; CHECK-NEXT: [[PTR_3:%.*]] = getelementptr inbounds ptr, ptr [[OUT]], i32 3
352+ ; CHECK-NEXT: [[PTR_4:%.*]] = getelementptr inbounds ptr, ptr [[OUT]], i32 4
353+ ; CHECK-NEXT: [[PTR_5:%.*]] = getelementptr inbounds ptr, ptr [[OUT]], i32 5
354+ ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <6 x ptr> undef, ptr [[PTR_0]], i32 0
355+ ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <6 x ptr> [[TMP1]], ptr [[PTR_1]], i32 1
356+ ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <6 x ptr> [[TMP2]], ptr [[PTR_2]], i32 2
357+ ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <6 x ptr> [[TMP3]], ptr [[PTR_3]], i32 3
358+ ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <6 x ptr> [[TMP4]], ptr [[PTR_4]], i32 4
359+ ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <6 x ptr> [[TMP5]], ptr [[PTR_5]], i32 5
360+ ; CHECK-NEXT: [[TMP7:%.*]] = add i32 3, [[SEL]]
361+ ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <6 x ptr> [[TMP6]], i32 [[TMP7]]
362+ ; CHECK-NEXT: store ptr [[TMP8]], ptr [[OUT]], align 8
363+ ; CHECK-NEXT: ret void
364+ ;
365+ %alloca = alloca [2 x [3 x ptr ]], align 16 , addrspace (5 )
366+ %gep.00 = getelementptr inbounds [2 x [3 x ptr ]], ptr addrspace (5 ) %alloca , i32 0 , i32 0 , i32 0
367+ %gep.01 = getelementptr inbounds [2 x [3 x ptr ]], ptr addrspace (5 ) %alloca , i32 0 , i32 0 , i32 1
368+ %gep.02 = getelementptr inbounds [2 x [3 x ptr ]], ptr addrspace (5 ) %alloca , i32 0 , i32 0 , i32 2
369+ %gep.10 = getelementptr inbounds [2 x [3 x ptr ]], ptr addrspace (5 ) %alloca , i32 0 , i32 1 , i32 0
370+ %gep.11 = getelementptr inbounds [2 x [3 x ptr ]], ptr addrspace (5 ) %alloca , i32 0 , i32 1 , i32 1
371+ %gep.12 = getelementptr inbounds [2 x [3 x ptr ]], ptr addrspace (5 ) %alloca , i32 0 , i32 1 , i32 2
372+ %ptr.0 = getelementptr inbounds ptr , ptr %out , i32 0
373+ %ptr.1 = getelementptr inbounds ptr , ptr %out , i32 1
374+ %ptr.2 = getelementptr inbounds ptr , ptr %out , i32 2
375+ %ptr.3 = getelementptr inbounds ptr , ptr %out , i32 3
376+ %ptr.4 = getelementptr inbounds ptr , ptr %out , i32 4
377+ %ptr.5 = getelementptr inbounds ptr , ptr %out , i32 5
378+ store ptr %ptr.0 , ptr addrspace (5 ) %gep.00
379+ store ptr %ptr.1 , ptr addrspace (5 ) %gep.01
380+ store ptr %ptr.2 , ptr addrspace (5 ) %gep.02
381+ store ptr %ptr.3 , ptr addrspace (5 ) %gep.10
382+ store ptr %ptr.4 , ptr addrspace (5 ) %gep.11
383+ store ptr %ptr.5 , ptr addrspace (5 ) %gep.12
384+ %gep = getelementptr inbounds [2 x [3 x ptr ]], ptr addrspace (5 ) %alloca , i32 0 , i32 1 , i32 %sel
385+ %load = load ptr , ptr addrspace (5 ) %gep
386+ store ptr %load , ptr %out
387+ ret void
388+ }
389+
291390declare i32 @llvm.amdgcn.workitem.id.x ()
292391declare i32 @llvm.amdgcn.workitem.id.y ()
0 commit comments