11; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals
2- ; RUN: opt -mtriple=amdgcn-unknown-amdhsa -S -passes=amdgpu-attributor < %s | FileCheck -check-prefixes =HSA,ATTRIBUTOR_HSA %s
2+ ; RUN: opt -mtriple=amdgcn-unknown-amdhsa -S -passes=amdgpu-attributor < %s | FileCheck -check-prefix =HSA %s
33
44declare void @llvm.memcpy.p1.p4.i32 (ptr addrspace (1 ) nocapture , ptr addrspace (4 ) nocapture , i32 , i1 ) #0
55
@@ -26,30 +26,30 @@ define amdgpu_kernel void @store_cast_0_flat_to_group_addrspacecast() #1 {
2626}
2727
2828define amdgpu_kernel void @store_cast_0_group_to_flat_addrspacecast () #1 {
29- ; ATTRIBUTOR_HSA -LABEL: define {{[^@]+}}@store_cast_0_group_to_flat_addrspacecast
30- ; ATTRIBUTOR_HSA -SAME: () #[[ATTR2:[0-9]+]] {
31- ; ATTRIBUTOR_HSA -NEXT: store i32 7, ptr addrspace(4) addrspacecast (ptr addrspace(3) null to ptr addrspace(4)), align 4
32- ; ATTRIBUTOR_HSA -NEXT: ret void
29+ ; HSA -LABEL: define {{[^@]+}}@store_cast_0_group_to_flat_addrspacecast
30+ ; HSA -SAME: () #[[ATTR2:[0-9]+]] {
31+ ; HSA -NEXT: store i32 7, ptr addrspace(4) addrspacecast (ptr addrspace(3) null to ptr addrspace(4)), align 4
32+ ; HSA -NEXT: ret void
3333;
3434 store i32 7 , ptr addrspace (4 ) addrspacecast (ptr addrspace (3 ) null to ptr addrspace (4 ))
3535 ret void
3636}
3737
3838define amdgpu_kernel void @store_constant_cast_group_gv_to_flat () #1 {
39- ; ATTRIBUTOR_HSA -LABEL: define {{[^@]+}}@store_constant_cast_group_gv_to_flat
40- ; ATTRIBUTOR_HSA -SAME: () #[[ATTR2]] {
41- ; ATTRIBUTOR_HSA -NEXT: store i32 7, ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.i32 to ptr addrspace(4)), align 4
42- ; ATTRIBUTOR_HSA -NEXT: ret void
39+ ; HSA -LABEL: define {{[^@]+}}@store_constant_cast_group_gv_to_flat
40+ ; HSA -SAME: () #[[ATTR2]] {
41+ ; HSA -NEXT: store i32 7, ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.i32 to ptr addrspace(4)), align 4
42+ ; HSA -NEXT: ret void
4343;
4444 store i32 7 , ptr addrspace (4 ) addrspacecast (ptr addrspace (3 ) @lds.i32 to ptr addrspace (4 ))
4545 ret void
4646}
4747
4848define amdgpu_kernel void @store_constant_cast_group_gv_gep_to_flat () #1 {
49- ; ATTRIBUTOR_HSA -LABEL: define {{[^@]+}}@store_constant_cast_group_gv_gep_to_flat
50- ; ATTRIBUTOR_HSA -SAME: () #[[ATTR2]] {
51- ; ATTRIBUTOR_HSA -NEXT: store i32 7, ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8), align 4
52- ; ATTRIBUTOR_HSA -NEXT: ret void
49+ ; HSA -LABEL: define {{[^@]+}}@store_constant_cast_group_gv_gep_to_flat
50+ ; HSA -SAME: () #[[ATTR2]] {
51+ ; HSA -NEXT: store i32 7, ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8), align 4
52+ ; HSA -NEXT: ret void
5353;
5454 store i32 7 , ptr addrspace (4 ) getelementptr ([256 x i32 ], ptr addrspace (4 ) addrspacecast (ptr addrspace (3 ) @lds.arr to ptr addrspace (4 )), i64 0 , i64 8 )
5555 ret void
@@ -76,36 +76,36 @@ define amdgpu_kernel void @store_constant_cast_global_gv_gep_to_flat() #1 {
7676}
7777
7878define amdgpu_kernel void @load_constant_cast_group_gv_gep_to_flat (ptr addrspace (1 ) %out ) #1 {
79- ; ATTRIBUTOR_HSA -LABEL: define {{[^@]+}}@load_constant_cast_group_gv_gep_to_flat
80- ; ATTRIBUTOR_HSA -SAME: (ptr addrspace(1) [[OUT:%.*]]) #[[ATTR2]] {
81- ; ATTRIBUTOR_HSA -NEXT: [[VAL:%.*]] = load i32, ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8), align 4
82- ; ATTRIBUTOR_HSA -NEXT: store i32 [[VAL]], ptr addrspace(1) [[OUT]], align 4
83- ; ATTRIBUTOR_HSA -NEXT: ret void
79+ ; HSA -LABEL: define {{[^@]+}}@load_constant_cast_group_gv_gep_to_flat
80+ ; HSA -SAME: (ptr addrspace(1) [[OUT:%.*]]) #[[ATTR2]] {
81+ ; HSA -NEXT: [[VAL:%.*]] = load i32, ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8), align 4
82+ ; HSA -NEXT: store i32 [[VAL]], ptr addrspace(1) [[OUT]], align 4
83+ ; HSA -NEXT: ret void
8484;
8585 %val = load i32 , ptr addrspace (4 ) getelementptr ([256 x i32 ], ptr addrspace (4 ) addrspacecast (ptr addrspace (3 ) @lds.arr to ptr addrspace (4 )), i64 0 , i64 8 )
8686 store i32 %val , ptr addrspace (1 ) %out
8787 ret void
8888}
8989
9090define amdgpu_kernel void @atomicrmw_constant_cast_group_gv_gep_to_flat (ptr addrspace (1 ) %out ) #1 {
91- ; ATTRIBUTOR_HSA -LABEL: define {{[^@]+}}@atomicrmw_constant_cast_group_gv_gep_to_flat
92- ; ATTRIBUTOR_HSA -SAME: (ptr addrspace(1) [[OUT:%.*]]) #[[ATTR2]] {
93- ; ATTRIBUTOR_HSA -NEXT: [[VAL:%.*]] = atomicrmw add ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8), i32 1 seq_cst, align 4
94- ; ATTRIBUTOR_HSA -NEXT: store i32 [[VAL]], ptr addrspace(1) [[OUT]], align 4
95- ; ATTRIBUTOR_HSA -NEXT: ret void
91+ ; HSA -LABEL: define {{[^@]+}}@atomicrmw_constant_cast_group_gv_gep_to_flat
92+ ; HSA -SAME: (ptr addrspace(1) [[OUT:%.*]]) #[[ATTR2]] {
93+ ; HSA -NEXT: [[VAL:%.*]] = atomicrmw add ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8), i32 1 seq_cst, align 4
94+ ; HSA -NEXT: store i32 [[VAL]], ptr addrspace(1) [[OUT]], align 4
95+ ; HSA -NEXT: ret void
9696;
9797 %val = atomicrmw add ptr addrspace (4 ) getelementptr ([256 x i32 ], ptr addrspace (4 ) addrspacecast (ptr addrspace (3 ) @lds.arr to ptr addrspace (4 )), i64 0 , i64 8 ), i32 1 seq_cst
9898 store i32 %val , ptr addrspace (1 ) %out
9999 ret void
100100}
101101
102102define amdgpu_kernel void @cmpxchg_constant_cast_group_gv_gep_to_flat (ptr addrspace (1 ) %out ) #1 {
103- ; ATTRIBUTOR_HSA -LABEL: define {{[^@]+}}@cmpxchg_constant_cast_group_gv_gep_to_flat
104- ; ATTRIBUTOR_HSA -SAME: (ptr addrspace(1) [[OUT:%.*]]) #[[ATTR2]] {
105- ; ATTRIBUTOR_HSA -NEXT: [[VAL:%.*]] = cmpxchg ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8), i32 0, i32 1 seq_cst seq_cst, align 4
106- ; ATTRIBUTOR_HSA -NEXT: [[VAL0:%.*]] = extractvalue { i32, i1 } [[VAL]], 0
107- ; ATTRIBUTOR_HSA -NEXT: store i32 [[VAL0]], ptr addrspace(1) [[OUT]], align 4
108- ; ATTRIBUTOR_HSA -NEXT: ret void
103+ ; HSA -LABEL: define {{[^@]+}}@cmpxchg_constant_cast_group_gv_gep_to_flat
104+ ; HSA -SAME: (ptr addrspace(1) [[OUT:%.*]]) #[[ATTR2]] {
105+ ; HSA -NEXT: [[VAL:%.*]] = cmpxchg ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8), i32 0, i32 1 seq_cst seq_cst, align 4
106+ ; HSA -NEXT: [[VAL0:%.*]] = extractvalue { i32, i1 } [[VAL]], 0
107+ ; HSA -NEXT: store i32 [[VAL0]], ptr addrspace(1) [[OUT]], align 4
108+ ; HSA -NEXT: ret void
109109;
110110 %val = cmpxchg ptr addrspace (4 ) getelementptr ([256 x i32 ], ptr addrspace (4 ) addrspacecast (ptr addrspace (3 ) @lds.arr to ptr addrspace (4 )), i64 0 , i64 8 ), i32 0 , i32 1 seq_cst seq_cst
111111 %val0 = extractvalue { i32 , i1 } %val , 0
@@ -114,52 +114,52 @@ define amdgpu_kernel void @cmpxchg_constant_cast_group_gv_gep_to_flat(ptr addrsp
114114}
115115
116116define amdgpu_kernel void @memcpy_constant_cast_group_gv_gep_to_flat (ptr addrspace (1 ) %out ) #1 {
117- ; ATTRIBUTOR_HSA -LABEL: define {{[^@]+}}@memcpy_constant_cast_group_gv_gep_to_flat
118- ; ATTRIBUTOR_HSA -SAME: (ptr addrspace(1) [[OUT:%.*]]) #[[ATTR2]] {
119- ; ATTRIBUTOR_HSA -NEXT: call void @llvm.memcpy.p1.p4.i32(ptr addrspace(1) align 4 [[OUT]], ptr addrspace(4) align 4 getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8), i32 32, i1 false)
120- ; ATTRIBUTOR_HSA -NEXT: ret void
117+ ; HSA -LABEL: define {{[^@]+}}@memcpy_constant_cast_group_gv_gep_to_flat
118+ ; HSA -SAME: (ptr addrspace(1) [[OUT:%.*]]) #[[ATTR2]] {
119+ ; HSA -NEXT: call void @llvm.memcpy.p1.p4.i32(ptr addrspace(1) align 4 [[OUT]], ptr addrspace(4) align 4 getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8), i32 32, i1 false)
120+ ; HSA -NEXT: ret void
121121;
122122 call void @llvm.memcpy.p1.p4.i32 (ptr addrspace (1 ) align 4 %out , ptr addrspace (4 ) align 4 getelementptr ([256 x i32 ], ptr addrspace (4 ) addrspacecast (ptr addrspace (3 ) @lds.arr to ptr addrspace (4 )), i64 0 , i64 8 ), i32 32 , i1 false )
123123 ret void
124124}
125125
126126; Can't just search the pointer value
127127define amdgpu_kernel void @store_value_constant_cast_lds_gv_gep_to_flat (ptr addrspace (1 ) %out ) #1 {
128- ; ATTRIBUTOR_HSA -LABEL: define {{[^@]+}}@store_value_constant_cast_lds_gv_gep_to_flat
129- ; ATTRIBUTOR_HSA -SAME: (ptr addrspace(1) [[OUT:%.*]]) #[[ATTR2]] {
130- ; ATTRIBUTOR_HSA -NEXT: store ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8), ptr addrspace(1) [[OUT]], align 8
131- ; ATTRIBUTOR_HSA -NEXT: ret void
128+ ; HSA -LABEL: define {{[^@]+}}@store_value_constant_cast_lds_gv_gep_to_flat
129+ ; HSA -SAME: (ptr addrspace(1) [[OUT:%.*]]) #[[ATTR2]] {
130+ ; HSA -NEXT: store ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8), ptr addrspace(1) [[OUT]], align 8
131+ ; HSA -NEXT: ret void
132132;
133133 store ptr addrspace (4 ) getelementptr ([256 x i32 ], ptr addrspace (4 ) addrspacecast (ptr addrspace (3 ) @lds.arr to ptr addrspace (4 )), i64 0 , i64 8 ), ptr addrspace (1 ) %out
134134 ret void
135135}
136136
137137; Can't just search pointer types
138138define amdgpu_kernel void @store_ptrtoint_value_constant_cast_lds_gv_gep_to_flat (ptr addrspace (1 ) %out ) #1 {
139- ; ATTRIBUTOR_HSA -LABEL: define {{[^@]+}}@store_ptrtoint_value_constant_cast_lds_gv_gep_to_flat
140- ; ATTRIBUTOR_HSA -SAME: (ptr addrspace(1) [[OUT:%.*]]) #[[ATTR2]] {
141- ; ATTRIBUTOR_HSA -NEXT: store i64 ptrtoint (ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8) to i64), ptr addrspace(1) [[OUT]], align 8
142- ; ATTRIBUTOR_HSA -NEXT: ret void
139+ ; HSA -LABEL: define {{[^@]+}}@store_ptrtoint_value_constant_cast_lds_gv_gep_to_flat
140+ ; HSA -SAME: (ptr addrspace(1) [[OUT:%.*]]) #[[ATTR2]] {
141+ ; HSA -NEXT: store i64 ptrtoint (ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8) to i64), ptr addrspace(1) [[OUT]], align 8
142+ ; HSA -NEXT: ret void
143143;
144144 store i64 ptrtoint (ptr addrspace (4 ) getelementptr ([256 x i32 ], ptr addrspace (4 ) addrspacecast (ptr addrspace (3 ) @lds.arr to ptr addrspace (4 )), i64 0 , i64 8 ) to i64 ), ptr addrspace (1 ) %out
145145 ret void
146146}
147147
148148; Cast group to flat, do GEP, cast back to group
149149define amdgpu_kernel void @store_constant_cast_group_gv_gep_to_flat_to_group () #1 {
150- ; ATTRIBUTOR_HSA -LABEL: define {{[^@]+}}@store_constant_cast_group_gv_gep_to_flat_to_group
151- ; ATTRIBUTOR_HSA -SAME: () #[[ATTR2]] {
152- ; ATTRIBUTOR_HSA -NEXT: store i32 7, ptr addrspace(3) addrspacecast (ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8) to ptr addrspace(3)), align 4
153- ; ATTRIBUTOR_HSA -NEXT: ret void
150+ ; HSA -LABEL: define {{[^@]+}}@store_constant_cast_group_gv_gep_to_flat_to_group
151+ ; HSA -SAME: () #[[ATTR2]] {
152+ ; HSA -NEXT: store i32 7, ptr addrspace(3) addrspacecast (ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8) to ptr addrspace(3)), align 4
153+ ; HSA -NEXT: ret void
154154;
155155 store i32 7 , ptr addrspace (3 ) addrspacecast (ptr addrspace (4 ) getelementptr ([256 x i32 ], ptr addrspace (4 ) addrspacecast (ptr addrspace (3 ) @lds.arr to ptr addrspace (4 )), i64 0 , i64 8 ) to ptr addrspace (3 ))
156156 ret void
157157}
158158
159159define ptr addrspace (3 ) @ret_constant_cast_group_gv_gep_to_flat_to_group () #1 {
160- ; ATTRIBUTOR_HSA -LABEL: define {{[^@]+}}@ret_constant_cast_group_gv_gep_to_flat_to_group
161- ; ATTRIBUTOR_HSA -SAME: () #[[ATTR2]] {
162- ; ATTRIBUTOR_HSA -NEXT: ret ptr addrspace(3) addrspacecast (ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8) to ptr addrspace(3))
160+ ; HSA -LABEL: define {{[^@]+}}@ret_constant_cast_group_gv_gep_to_flat_to_group
161+ ; HSA -SAME: () #[[ATTR2]] {
162+ ; HSA -NEXT: ret ptr addrspace(3) addrspacecast (ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8) to ptr addrspace(3))
163163;
164164 ret ptr addrspace (3 ) addrspacecast (ptr addrspace (4 ) getelementptr ([256 x i32 ], ptr addrspace (4 ) addrspacecast (ptr addrspace (3 ) @lds.arr to ptr addrspace (4 )), i64 0 , i64 8 ) to ptr addrspace (3 ))
165165}
@@ -170,11 +170,9 @@ attributes #1 = { nounwind }
170170!llvm.module.flags = !{!0 }
171171!0 = !{i32 1 , !"amdhsa_code_object_version" , i32 500 }
172172;.
173+ ; HSA: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
174+ ; HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
175+ ; HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
173176;.
174- ; ATTRIBUTOR_HSA: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
175- ; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
176- ; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
177- ;.
178- ;.
179- ; ATTRIBUTOR_HSA: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500}
177+ ; HSA: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500}
180178;.
0 commit comments