11; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
22; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-promote-alloca < %s | FileCheck %s
3- target triple = "amdgcn-amd-amdhsa"
43define amdgpu_ps void @scalar_alloca_ptr_with_vector_gep_of_gep (i32 %j ) #0 {
54; CHECK-LABEL: define amdgpu_ps void @scalar_alloca_ptr_with_vector_gep_of_gep(
65; CHECK-SAME: i32 [[J:%.*]]) #[[ATTR0:[0-9]+]] {
@@ -14,28 +13,28 @@ define amdgpu_ps void @scalar_alloca_ptr_with_vector_gep_of_gep(i32 %j) #0 {
1413;
1514entry:
1615 %SortedFragments = alloca [10 x <2 x i32 >], align 8 , addrspace (5 )
17- %0 = getelementptr [10 x <2 x i32 >], ptr addrspace (5 ) %SortedFragments , i32 0 , i32 %j
18- %1 = getelementptr i8 , ptr addrspace (5 ) %0 , i32 4
19- %2 = load i32 , ptr addrspace (5 ) %1 , align 4
16+ %row = getelementptr [10 x <2 x i32 >], ptr addrspace (5 ) %SortedFragments , i32 0 , i32 %j
17+ %elt1 = getelementptr i8 , ptr addrspace (5 ) %row , i32 4
18+ %val = load i32 , ptr addrspace (5 ) %elt1 , align 4
2019 ret void
2120}
2221
2322attributes #0 = { "amdgpu-promote-alloca-to-vector-max-regs" ="32" }
2423
25- define amdgpu_cs void @scalar_alloca_ptr_with_vector_gep_of_scratch (i32 inreg , i32 inreg , i32 inreg , <3 x i32 > inreg , i32 inreg , <3 x i32 > %coord , <2 x i32 > %in , i32 %extra , i32 %idx ) # 1 {
24+ define amdgpu_cs void @scalar_alloca_ptr_with_vector_gep_of_scratch (i32 inreg %0 , i32 inreg %1 , i32 inreg %2 , <3 x i32 > inreg %coord , i32 inreg %4 , <3 x i32 > %v_in , <2 x i32 > %in , i32 %extra , i32 %idx , ptr addrspace ( 8 ) %buffer ) # 0 {
2625; CHECK-LABEL: define amdgpu_cs void @scalar_alloca_ptr_with_vector_gep_of_scratch(
27- ; CHECK-SAME: i32 inreg [[TMP0:%.*]], i32 inreg [[TMP1:%.*]], i32 inreg [[TMP2:%.*]], <3 x i32> inreg [[TMP3 :%.*]], i32 inreg [[TMP4 :%.*]], <3 x i32> [[COORD :%.*]], <2 x i32> [[IN:%.*]], i32 [[EXTRA:%.*]], i32 [[IDX:%.*]]) #[[ATTR1:[0-9]+ ]] {
26+ ; CHECK-SAME: i32 inreg [[TMP0:%.*]], i32 inreg [[TMP1:%.*]], i32 inreg [[TMP2:%.*]], <3 x i32> inreg [[COORD :%.*]], i32 inreg [[TMP3 :%.*]], <3 x i32> [[V_IN :%.*]], <2 x i32> [[IN:%.*]], i32 [[EXTRA:%.*]], i32 [[IDX:%.*]], ptr addrspace(8) [[BUFFER:%.*]]) #[[ATTR0 ]] {
2827; CHECK-NEXT: [[ENTRY:.*:]]
2928; CHECK-NEXT: [[V:%.*]] = freeze <3 x i32> poison
30- ; CHECK-NEXT: [[TMP5 :%.*]] = insertelement <3 x i32> [[V]], i32 [[EXTRA]], i32 0
31- ; CHECK-NEXT: [[TMP6 :%.*]] = extractelement <2 x i32> [[IN]], i64 0
32- ; CHECK-NEXT: [[TMP7 :%.*]] = insertelement <3 x i32> [[TMP5 ]], i32 [[TMP6 ]], i32 1
33- ; CHECK-NEXT: [[TMP8 :%.*]] = extractelement <2 x i32> [[IN]], i64 1
34- ; CHECK-NEXT: [[TMP9 :%.*]] = insertelement <3 x i32> [[TMP7 ]], i32 [[TMP8 ]], i32 2
35- ; CHECK-NEXT: [[TMP10 :%.*]] = add i32 1, [[IDX]]
36- ; CHECK-NEXT: [[TMP11 :%.*]] = extractelement <3 x i32> [[TMP9 ]], i32 [[TMP10 ]]
37- ; CHECK-NEXT: [[XF:%.*]] = bitcast i32 [[TMP11 ]] to float
38- ; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer. store.f32( float [[XF]], ptr addrspace(8) poison, i32 0, i32 0, i32 0)
29+ ; CHECK-NEXT: [[TMP4 :%.*]] = insertelement <3 x i32> [[V]], i32 [[EXTRA]], i32 0
30+ ; CHECK-NEXT: [[TMP5 :%.*]] = extractelement <2 x i32> [[IN]], i64 0
31+ ; CHECK-NEXT: [[TMP6 :%.*]] = insertelement <3 x i32> [[TMP4 ]], i32 [[TMP5 ]], i32 1
32+ ; CHECK-NEXT: [[TMP7 :%.*]] = extractelement <2 x i32> [[IN]], i64 1
33+ ; CHECK-NEXT: [[TMP8 :%.*]] = insertelement <3 x i32> [[TMP6 ]], i32 [[TMP7 ]], i32 2
34+ ; CHECK-NEXT: [[TMP9 :%.*]] = add i32 1, [[IDX]]
35+ ; CHECK-NEXT: [[TMP10 :%.*]] = extractelement <3 x i32> [[TMP8 ]], i32 [[TMP9 ]]
36+ ; CHECK-NEXT: [[XF:%.*]] = bitcast i32 [[TMP10 ]] to float
37+ ; CHECK-NEXT: store float [[XF]], ptr addrspace(8) [[BUFFER]], align 4
3938; CHECK-NEXT: ret void
4039;
4140entry:
@@ -46,10 +45,6 @@ entry:
4645 %e = getelementptr [2 x i32 ], ptr addrspace (5 ) %v1 , i32 0 , i32 %idx
4746 %x = load i32 , ptr addrspace (5 ) %e
4847 %xf = bitcast i32 %x to float
49- call void @llvm.amdgcn.raw.ptr.buffer. store.f32 ( float %xf , ptr addrspace (8 ) poison, i32 0 , i32 0 , i32 0 )
48+ store float %xf , ptr addrspace (8 ) %buffer , align 4
5049 ret void
5150}
52-
53- attributes #1 = { nounwind "amdgpu-git-ptr-high" ="0x1234" }
54-
55- declare void @llvm.amdgcn.raw.ptr.buffer.store.f32 (float , ptr addrspace (8 ), i32 , i32 , i32 immarg)
0 commit comments