@@ -8,17 +8,20 @@ target triple = "amdgcn-amd-amdhsa"
8
8
9
9
@G = internal addrspace (3 ) global i32 undef , align 4
10
10
@H = internal addrspace (3 ) global i32 undef , align 4
11
+ @X = internal addrspace (3 ) global i32 undef , align 4
11
12
@str = private unnamed_addr addrspace (4 ) constant [1 x i8 ] c "\00 " , align 1
12
13
13
14
; Make sure we do not delete the stores to @G without also replacing the load with `1`.
14
15
;.
15
16
; TUNIT: @[[G:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4
16
17
; TUNIT: @[[H:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4
18
+ ; TUNIT: @[[X:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4
17
19
; TUNIT: @[[STR:[a-zA-Z0-9_$"\\.-]+]] = private unnamed_addr addrspace(4) constant [1 x i8] zeroinitializer, align 1
18
20
; TUNIT: @[[KERNEL_NESTED_PARALLELISM:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0
19
21
;.
20
22
; CGSCC: @[[G:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4
21
23
; CGSCC: @[[H:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4
24
+ ; CGSCC: @[[X:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef, align 4
22
25
; CGSCC: @[[STR:[a-zA-Z0-9_$"\\.-]+]] = private unnamed_addr addrspace(4) constant [1 x i8] zeroinitializer, align 1
23
26
;.
24
27
define void @kernel () "kernel" {
@@ -30,20 +33,17 @@ define void @kernel() "kernel" {
30
33
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[CALL]], -1
31
34
; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
32
35
; CHECK: if.then:
33
- ; CHECK-NEXT: store i32 1, ptr addrspace(3) @G, align 4
34
36
; CHECK-NEXT: br label [[IF_MERGE:%.*]]
35
37
; CHECK: if.else:
36
- ; CHECK-NEXT: call void @barrier() #[[ATTR5:[0-9]+]]
37
- ; CHECK-NEXT: [[L:%.*]] = load i32, ptr addrspace(3) @G, align 4
38
- ; CHECK-NEXT: call void @use1(i32 [[L]]) #[[ATTR5]]
39
- ; CHECK-NEXT: call void @barrier() #[[ATTR5]]
38
+ ; CHECK-NEXT: call void @barrier() #[[ATTR6:[0-9]+]]
39
+ ; CHECK-NEXT: call void @use1(i32 undef) #[[ATTR6]]
40
+ ; CHECK-NEXT: call void @barrier() #[[ATTR6]]
40
41
; CHECK-NEXT: br label [[IF_MERGE]]
41
42
; CHECK: if.merge:
42
- ; CHECK-NEXT: call void @use1(i32 2) #[[ATTR5 ]]
43
+ ; CHECK-NEXT: call void @use1(i32 2) #[[ATTR6 ]]
43
44
; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN2:%.*]], label [[IF_END:%.*]]
44
45
; CHECK: if.then2:
45
- ; CHECK-NEXT: store i32 2, ptr addrspace(3) @G, align 4
46
- ; CHECK-NEXT: call void @barrier() #[[ATTR5]]
46
+ ; CHECK-NEXT: call void @barrier() #[[ATTR6]]
47
47
; CHECK-NEXT: br label [[IF_END]]
48
48
; CHECK: if.end:
49
49
; CHECK-NEXT: call void @__kmpc_target_deinit(ptr undef, i8 1)
@@ -87,31 +87,90 @@ define void @test_assume() {
87
87
ret void
88
88
}
89
89
90
+ ; We can't ignore the sync, hence this might store 2 into %p
91
+ define void @kernel2 (ptr %p ) "kernel" {
92
+ ; CHECK-LABEL: define {{[^@]+}}@kernel2
93
+ ; CHECK-SAME: (ptr [[P:%.*]]) #[[ATTR1:[0-9]+]] {
94
+ ; CHECK-NEXT: store i32 1, ptr addrspace(3) @X, align 4
95
+ ; CHECK-NEXT: call void @sync()
96
+ ; CHECK-NEXT: [[V:%.*]] = load i32, ptr addrspace(3) @X, align 4
97
+ ; CHECK-NEXT: store i32 2, ptr addrspace(3) @X, align 4
98
+ ; CHECK-NEXT: store i32 [[V]], ptr [[P]], align 4
99
+ ; CHECK-NEXT: ret void
100
+ ;
101
+ store i32 1 , ptr addrspace (3 ) @X
102
+ call void @sync ()
103
+ %v = load i32 , ptr addrspace (3 ) @X
104
+ store i32 2 , ptr addrspace (3 ) @X
105
+ store i32 %v , ptr %p
106
+ ret void
107
+ }
108
+
109
+ ; We can't ignore the sync, hence this might store 2 into %p
110
+ define void @kernel3 (ptr %p ) "kernel" {
111
+ ; TUNIT-LABEL: define {{[^@]+}}@kernel3
112
+ ; TUNIT-SAME: (ptr [[P:%.*]]) #[[ATTR1]] {
113
+ ; TUNIT-NEXT: store i32 1, ptr addrspace(3) @X, align 4
114
+ ; TUNIT-NEXT: call void @sync_def.internalized()
115
+ ; TUNIT-NEXT: [[V:%.*]] = load i32, ptr addrspace(3) @X, align 4
116
+ ; TUNIT-NEXT: store i32 2, ptr addrspace(3) @X, align 4
117
+ ; TUNIT-NEXT: store i32 [[V]], ptr [[P]], align 4
118
+ ; TUNIT-NEXT: ret void
119
+ ;
120
+ ; CGSCC-LABEL: define {{[^@]+}}@kernel3
121
+ ; CGSCC-SAME: (ptr [[P:%.*]]) #[[ATTR1]] {
122
+ ; CGSCC-NEXT: store i32 1, ptr addrspace(3) @X, align 4
123
+ ; CGSCC-NEXT: call void @sync_def()
124
+ ; CGSCC-NEXT: [[V:%.*]] = load i32, ptr addrspace(3) @X, align 4
125
+ ; CGSCC-NEXT: store i32 2, ptr addrspace(3) @X, align 4
126
+ ; CGSCC-NEXT: store i32 [[V]], ptr [[P]], align 4
127
+ ; CGSCC-NEXT: ret void
128
+ ;
129
+ store i32 1 , ptr addrspace (3 ) @X
130
+ call void @sync_def ()
131
+ %v = load i32 , ptr addrspace (3 ) @X
132
+ store i32 2 , ptr addrspace (3 ) @X
133
+ store i32 %v , ptr %p
134
+ ret void
135
+ }
136
+
137
+ define void @sync_def () {
138
+ ; CHECK-LABEL: define {{[^@]+}}@sync_def() {
139
+ ; CHECK-NEXT: call void @sync()
140
+ ; CHECK-NEXT: ret void
141
+ ;
142
+ call void @sync ()
143
+ ret void
144
+ }
145
+
146
+ declare void @sync ()
90
147
declare void @barrier () norecurse nounwind nocallback "llvm.assume" ="ompx_aligned_barrier"
91
148
declare void @use1 (i32 ) nosync norecurse nounwind nocallback
92
149
declare i32 @__kmpc_target_init (ptr , i8 , i1 ) nocallback
93
150
declare void @__kmpc_target_deinit (ptr , i8 ) nocallback
94
151
declare void @llvm.assume (i1 )
95
152
96
153
!llvm.module.flags = !{!0 , !1 }
97
- !nvvm.annotations = !{!2 }
154
+ !nvvm.annotations = !{!2 , !3 , !4 }
98
155
99
156
!0 = !{i32 7 , !"openmp" , i32 50 }
100
157
!1 = !{i32 7 , !"openmp-device" , i32 50 }
101
158
!2 = !{ptr @kernel , !"kernel" , i32 1 }
159
+ !3 = !{ptr @kernel2 , !"kernel" , i32 1 }
160
+ !4 = !{ptr @kernel3 , !"kernel" , i32 1 }
102
161
103
162
;.
104
163
; CHECK: attributes #[[ATTR0]] = { norecurse "kernel" }
105
- ; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback norecurse nounwind "llvm.assume"="ompx_aligned_barrier" }
106
- ; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback norecurse nosync nounwind }
107
- ; CHECK: attributes #[[ATTR3:[0-9]+]] = { nocallback }
108
- ; CHECK: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) }
109
- ; CHECK: attributes #[[ATTR5]] = { nounwind }
164
+ ; CHECK: attributes #[[ATTR1]] = { "kernel" }
165
+ ; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback norecurse nounwind "llvm.assume"="ompx_aligned_barrier" }
166
+ ; CHECK: attributes #[[ATTR3:[0-9]+]] = { nocallback norecurse nosync nounwind }
167
+ ; CHECK: attributes #[[ATTR4:[0-9]+]] = { nocallback }
168
+ ; CHECK: attributes #[[ATTR5:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) }
169
+ ; CHECK: attributes #[[ATTR6]] = { nounwind }
110
170
;.
111
171
; CHECK: [[META0:![0-9]+]] = !{i32 7, !"openmp", i32 50}
112
172
; CHECK: [[META1:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
113
173
; CHECK: [[META2:![0-9]+]] = !{ptr @kernel, !"kernel", i32 1}
174
+ ; CHECK: [[META3:![0-9]+]] = !{ptr @kernel2, !"kernel", i32 1}
175
+ ; CHECK: [[META4:![0-9]+]] = !{ptr @kernel3, !"kernel", i32 1}
114
176
;.
115
- ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
116
- ; CGSCC: {{.*}}
117
- ; TUNIT: {{.*}}
0 commit comments