1616; CHECK-NEXT: remark: test.c:17:5: in artificial function '[[OFF_FUNC]]_debug__', direct call to defined function, callee is 'g'
1717; CHECK-NEXT: remark: test.c:18:3: in artificial function '[[OFF_FUNC]]_debug__', direct call, callee is '@__kmpc_target_deinit'
1818; CHECK-NEXT: remark: test.c:13:0: in artificial function '[[OFF_FUNC]]_debug__', ExternalNotKernel = 0
19- ; CHECK-NEXT: remark: test.c:13:0: in artificial function '[[OFF_FUNC]]_debug__', omp_target_thread_limit = 256
2019; CHECK-NEXT: remark: test.c:13:0: in artificial function '[[OFF_FUNC]]_debug__', amdgpu-max-num-workgroups[0] = 0
2120; CHECK-NEXT: remark: test.c:13:0: in artificial function '[[OFF_FUNC]]_debug__', amdgpu-max-num-workgroups[1] = 0
2221; CHECK-NEXT: remark: test.c:13:0: in artificial function '[[OFF_FUNC]]_debug__', amdgpu-max-num-workgroups[2] = 0
2322; CHECK-NEXT: remark: test.c:13:0: in artificial function '[[OFF_FUNC]]_debug__', amdgpu-flat-work-group-size[0] = 1
24- ; CHECK-NEXT: remark: test.c:13:0: in artificial function '[[OFF_FUNC]]_debug__', amdgpu-flat-work-group-size[1] = 256
25- ; CHECK-NEXT: remark: test.c:13:0: in artificial function '[[OFF_FUNC]]_debug__', amdgpu-waves-per-eu[0] = 1
23+ ; CHECK-NEXT: remark: test.c:13:0: in artificial function '[[OFF_FUNC]]_debug__', amdgpu-flat-work-group-size[1] = 1024
24+ ; CHECK-NEXT: remark: test.c:13:0: in artificial function '[[OFF_FUNC]]_debug__', amdgpu-waves-per-eu[0] = 4
2625; CHECK-NEXT: remark: test.c:13:0: in artificial function '[[OFF_FUNC]]_debug__', amdgpu-waves-per-eu[1] = 10
2726; CHECK-NEXT: remark: test.c:13:0: in artificial function '[[OFF_FUNC]]_debug__', Allocas = 3
2827; CHECK-NEXT: remark: test.c:13:0: in artificial function '[[OFF_FUNC]]_debug__', AllocasStaticSizeSum = 20
3938; CHECK-NEXT: remark: test.c:12:1: in artificial function '[[OFF_FUNC]]', 'load' instruction ('%[[#]]') accesses memory in flat address space
4039; CHECK-NEXT: remark: test.c:12:1: in artificial function '[[OFF_FUNC]]', direct call to defined function, callee is artificial '[[OFF_FUNC]]_debug__'
4140; CHECK-NEXT: remark: test.c:12:0: in artificial function '[[OFF_FUNC]]', ExternalNotKernel = 0
41+ ; CHECK-NEXT: remark: test.c:12:0: in artificial function '[[OFF_FUNC]]', omp_target_thread_limit = 256
4242; CHECK-NEXT: remark: test.c:12:0: in artificial function '[[OFF_FUNC]]', amdgpu-max-num-workgroups[0] = 0
4343; CHECK-NEXT: remark: test.c:12:0: in artificial function '[[OFF_FUNC]]', amdgpu-max-num-workgroups[1] = 0
4444; CHECK-NEXT: remark: test.c:12:0: in artificial function '[[OFF_FUNC]]', amdgpu-max-num-workgroups[2] = 0
4545; CHECK-NEXT: remark: test.c:12:0: in artificial function '[[OFF_FUNC]]', amdgpu-flat-work-group-size[0] = 1
46- ; CHECK-NEXT: remark: test.c:12:0: in artificial function '[[OFF_FUNC]]', amdgpu-flat-work-group-size[1] = 1024
47- ; CHECK-NEXT: remark: test.c:12:0: in artificial function '[[OFF_FUNC]]', amdgpu-waves-per-eu[0] = 4
46+ ; CHECK-NEXT: remark: test.c:12:0: in artificial function '[[OFF_FUNC]]', amdgpu-flat-work-group-size[1] = 256
47+ ; CHECK-NEXT: remark: test.c:12:0: in artificial function '[[OFF_FUNC]]', amdgpu-waves-per-eu[0] = 1
4848; CHECK-NEXT: remark: test.c:12:0: in artificial function '[[OFF_FUNC]]', amdgpu-waves-per-eu[1] = 10
4949; CHECK-NEXT: remark: test.c:12:0: in artificial function '[[OFF_FUNC]]', Allocas = 1
5050; CHECK-NEXT: remark: test.c:12:0: in artificial function '[[OFF_FUNC]]', AllocasStaticSizeSum = 8
@@ -95,82 +95,75 @@ target triple = "amdgcn-amd-amdhsa"
9595@__omp_rtl_assume_threads_oversubscription = weak_odr hidden addrspace (1 ) constant i32 0
9696@__omp_rtl_assume_no_thread_state = weak_odr hidden addrspace (1 ) constant i32 0
9797@__omp_rtl_assume_no_nested_parallelism = weak_odr hidden addrspace (1 ) constant i32 0
98- @0 = private unnamed_addr constant [57 x i8 ] c ";test.c;__omp_offloading_fd02_71f35_h_l12_debug__ ;13;3;;\00 " , align 1
98+ @0 = private unnamed_addr constant [57 x i8 ] c ";test.c;__omp_offloading_fd02_6f0c0_h_l12_debug__ ;13;3;;\00 " , align 1
9999@1 = private unnamed_addr addrspace (1 ) constant %struct.ident_t { i32 0 , i32 2 , i32 0 , i32 56 , ptr @0 }, align 8
100- @__omp_offloading_fd02_71f35_h_l12_dynamic_environment = weak_odr protected addrspace (1 ) global %struct.DynamicEnvironmentTy zeroinitializer
101- @__omp_offloading_fd02_71f35_h_l12_kernel_environment = weak_odr protected addrspace (1 ) constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1 , i8 1 , i8 1 , i32 1 , i32 256 , i32 -1 , i32 -1 , i32 0 , i32 0 }, ptr addrspacecast (ptr addrspace (1 ) @1 to ptr ), ptr addrspacecast (ptr addrspace (1 ) @__omp_offloading_fd02_71f35_h_l12_dynamic_environment to ptr ) }
100+ @__omp_offloading_fd02_6f0c0_h_l12_dynamic_environment = weak_odr protected addrspace (1 ) global %struct.DynamicEnvironmentTy zeroinitializer
101+ @__omp_offloading_fd02_6f0c0_h_l12_kernel_environment = weak_odr protected addrspace (1 ) constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1 , i8 1 , i8 1 , i32 1 , i32 256 , i32 -1 , i32 -1 , i32 0 , i32 0 }, ptr addrspacecast (ptr addrspace (1 ) @1 to ptr ), ptr addrspacecast (ptr addrspace (1 ) @__omp_offloading_fd02_6f0c0_h_l12_dynamic_environment to ptr ) }
102102@__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace (4 ) constant i32 500
103103
104104; Function Attrs: convergent noinline norecurse nounwind optnone
105- define internal void @__omp_offloading_fd02_71f35_h_l12_debug__ (ptr noalias noundef %dyn_ptr ) #0 !dbg !16 {
106- entry:
107- %dyn_ptr.addr = alloca ptr , align 8 , addrspace (5 )
108- %i = alloca i32 , align 4 , addrspace (5 )
109- %a = alloca [2 x i32 ], align 4 , addrspace (5 )
110- %dyn_ptr.addr.ascast = addrspacecast ptr addrspace (5 ) %dyn_ptr.addr to ptr
111- %i.ascast = addrspacecast ptr addrspace (5 ) %i to ptr
112- %a.ascast = addrspacecast ptr addrspace (5 ) %a to ptr
113- store ptr %dyn_ptr , ptr %dyn_ptr.addr.ascast , align 8
114- tail call void @llvm.dbg.declare (metadata ptr addrspace (5 ) %dyn_ptr.addr , metadata !24 , metadata !DIExpression ()), !dbg !25
115- %0 = call i32 @__kmpc_target_init (ptr addrspacecast (ptr addrspace (1 ) @__omp_offloading_fd02_71f35_h_l12_kernel_environment to ptr ), ptr %dyn_ptr ), !dbg !26
116- %exec_user_code = icmp eq i32 %0 , -1 , !dbg !26
117- br i1 %exec_user_code , label %user_code.entry , label %worker.exit , !dbg !26
118-
119- user_code.entry: ; preds = %entry
120- tail call void @llvm.dbg.declare (metadata ptr addrspace (5 ) %i , metadata !27 , metadata !DIExpression ()), !dbg !30
121- tail call void @llvm.dbg.declare (metadata ptr addrspace (5 ) %a , metadata !31 , metadata !DIExpression ()), !dbg !35
122- call void @f () #5 , !dbg !36
123- call void @g () #5 , !dbg !37
105+ define internal void @__omp_offloading_fd02_6f0c0_h_l12_debug__ (ptr noalias noundef %0 ) #0 !dbg !16 {
106+ %2 = alloca ptr , align 8 , addrspace (5 )
107+ %3 = alloca i32 , align 4 , addrspace (5 )
108+ %4 = alloca [2 x i32 ], align 4 , addrspace (5 )
109+ %5 = addrspacecast ptr addrspace (5 ) %2 to ptr
110+ %6 = addrspacecast ptr addrspace (5 ) %3 to ptr
111+ %7 = addrspacecast ptr addrspace (5 ) %4 to ptr
112+ store ptr %0 , ptr %5 , align 8
113+ #dbg_declare(ptr addrspace (5 ) %2 , !24 , !DIExpression (), !25 )
114+ %8 = call i32 @__kmpc_target_init (ptr addrspacecast (ptr addrspace (1 ) @__omp_offloading_fd02_6f0c0_h_l12_kernel_environment to ptr ), ptr %0 ), !dbg !26
115+ %9 = icmp eq i32 %8 , -1 , !dbg !26
116+ br i1 %9 , label %10 , label %11 , !dbg !26
117+
118+ 10 : ; preds = %1
119+ #dbg_declare(ptr addrspace (5 ) %3 , !27 , !DIExpression (), !30 )
120+ #dbg_declare(ptr addrspace (5 ) %4 , !31 , !DIExpression (), !35 )
121+ call void @f () #4 , !dbg !36
122+ call void @g () #4 , !dbg !37
124123 call void @__kmpc_target_deinit (), !dbg !38
125124 ret void , !dbg !39
126125
127- worker.exit : ; preds = %entry
126+ 11 : ; preds = %1
128127 ret void , !dbg !26
129128}
130129
130+ ; Function Attrs: convergent mustprogress noinline norecurse nounwind optnone
131+ define weak_odr protected amdgpu_kernel void @__omp_offloading_fd02_6f0c0_h_l12 (ptr noalias noundef %0 ) #1 !dbg !40 {
132+ %2 = alloca ptr , align 8 , addrspace (5 )
133+ %3 = addrspacecast ptr addrspace (5 ) %2 to ptr
134+ store ptr %0 , ptr %3 , align 8
135+ #dbg_declare(ptr addrspace (5 ) %2 , !41 , !DIExpression (), !42 )
136+ %4 = load ptr , ptr %3 , align 8 , !dbg !43
137+ call void @__omp_offloading_fd02_6f0c0_h_l12_debug__ (ptr %4 ) #5 , !dbg !43
138+ ret void , !dbg !43
139+ }
140+
131141declare i32 @__kmpc_target_init (ptr , ptr )
132142
133143; Function Attrs: convergent
134- declare void @f (...) #1
144+ declare void @f (...) #2
135145
136146declare void @__kmpc_target_deinit ()
137147
138- ; Function Attrs: convergent mustprogress noinline norecurse nounwind optnone
139- define weak_odr protected amdgpu_kernel void @__omp_offloading_fd02_71f35_h_l12 (ptr noalias noundef %dyn_ptr ) #2 !dbg !40 {
140- entry:
141- %dyn_ptr.addr = alloca ptr , align 8 , addrspace (5 )
142- %dyn_ptr.addr.ascast = addrspacecast ptr addrspace (5 ) %dyn_ptr.addr to ptr
143- store ptr %dyn_ptr , ptr %dyn_ptr.addr.ascast , align 8
144- tail call void @llvm.dbg.declare (metadata ptr addrspace (5 ) %dyn_ptr.addr , metadata !41 , metadata !DIExpression ()), !dbg !42
145- %0 = load ptr , ptr %dyn_ptr.addr.ascast , align 8 , !dbg !43
146- call void @__omp_offloading_fd02_71f35_h_l12_debug__ (ptr %0 ) #6 , !dbg !43
147- ret void , !dbg !43
148- }
149-
150148; Function Attrs: convergent noinline nounwind optnone
151149define hidden void @g () #3 !dbg !44 {
152- entry:
153- %i = alloca i32 , align 4 , addrspace (5 )
154- %a = alloca [2 x i32 ], align 4 , addrspace (5 )
155- %i.ascast = addrspacecast ptr addrspace (5 ) %i to ptr
156- %a.ascast = addrspacecast ptr addrspace (5 ) %a to ptr
157- tail call void @llvm.dbg.declare (metadata ptr addrspace (5 ) %i , metadata !47 , metadata !DIExpression ()), !dbg !48
158- tail call void @llvm.dbg.declare (metadata ptr addrspace (5 ) %a , metadata !49 , metadata !DIExpression ()), !dbg !50
159- call void @f () #5 , !dbg !51
160- call void @g () #5 , !dbg !52
150+ %1 = alloca i32 , align 4 , addrspace (5 )
151+ %2 = alloca [2 x i32 ], align 4 , addrspace (5 )
152+ %3 = addrspacecast ptr addrspace (5 ) %1 to ptr
153+ %4 = addrspacecast ptr addrspace (5 ) %2 to ptr
154+ #dbg_declare(ptr addrspace (5 ) %1 , !47 , !DIExpression (), !48 )
155+ #dbg_declare(ptr addrspace (5 ) %2 , !49 , !DIExpression (), !50 )
156+ call void @f () #4 , !dbg !51
157+ call void @g () #4 , !dbg !52
161158 ret void , !dbg !53
162159}
163160
164- ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
165- declare void @llvm.dbg.declare (metadata , metadata , metadata ) #4
166-
167- attributes #0 = { convergent noinline norecurse nounwind optnone "amdgpu-flat-work-group-size" ="1,256" "frame-pointer" ="all" "no-trapping-math" ="true" "omp_target_thread_limit" ="256" "stack-protector-buffer-size" ="8" "target-cpu" ="gfx906" "target-features" ="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" }
168- attributes #1 = { convergent "frame-pointer" ="all" "no-trapping-math" ="true" "stack-protector-buffer-size" ="8" "target-cpu" ="gfx906" "target-features" ="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" }
169- attributes #2 = { convergent mustprogress noinline norecurse nounwind optnone "frame-pointer" ="all" "kernel" "no-trapping-math" ="true" "stack-protector-buffer-size" ="8" "target-cpu" ="gfx906" "target-features" ="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" "uniform-work-group-size" ="true" }
161+ attributes #0 = { convergent noinline norecurse nounwind optnone "frame-pointer" ="all" "no-trapping-math" ="true" "stack-protector-buffer-size" ="8" "target-cpu" ="gfx906" "target-features" ="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" }
162+ attributes #1 = { convergent mustprogress noinline norecurse nounwind optnone "amdgpu-flat-work-group-size" ="1,256" "frame-pointer" ="all" "kernel" "no-trapping-math" ="true" "omp_target_thread_limit" ="256" "stack-protector-buffer-size" ="8" "target-cpu" ="gfx906" "target-features" ="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" "uniform-work-group-size" ="true" }
163+ attributes #2 = { convergent "frame-pointer" ="all" "no-trapping-math" ="true" "stack-protector-buffer-size" ="8" "target-cpu" ="gfx906" "target-features" ="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" }
170164attributes #3 = { convergent noinline nounwind optnone "frame-pointer" ="all" "no-trapping-math" ="true" "stack-protector-buffer-size" ="8" "target-cpu" ="gfx906" "target-features" ="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64" }
171- attributes #4 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
172- attributes #5 = { convergent }
173- attributes #6 = { nounwind }
165+ attributes #4 = { convergent }
166+ attributes #5 = { nounwind }
174167
175168!llvm.dbg.cu = !{!0 }
176169!omp_offload.info = !{!2 }
@@ -179,10 +172,10 @@ attributes #6 = { nounwind }
179172!llvm.ident = !{!13 , !14 , !14 , !14 , !14 , !14 , !14 , !14 , !14 , !14 , !14 , !14 , !14 , !14 , !14 , !14 , !14 }
180173!opencl.ocl.version = !{!15 , !15 , !15 , !15 , !15 , !15 , !15 , !15 , !15 , !15 , !15 , !15 , !15 , !15 , !15 , !15 }
181174
182- !0 = distinct !DICompileUnit (language: DW_LANG_C11, file: !1 , producer: "clang version 19 .0.0git (/tmp/llvm/clang 5a5e94265d423fa9eb39dc1b855511195f8dc0fe )" , isOptimized: false , runtimeVersion: 0 , emissionKind: FullDebug, splitDebugInlining: false , nameTableKind: None)
183- !1 = !DIFile (filename: "test.c" , directory: "/tmp" , checksumkind: CSK_MD5, checksum: "eff61a7cf33c8dd1bd6933250fc90157 " )
184- !2 = !{i32 0 , i32 64770 , i32 466741 , !"h" , i32 12 , i32 0 , i32 0 }
185- !3 = !{ptr @__omp_offloading_fd02_71f35_h_l12 , !"kernel" , i32 1 }
175+ !0 = distinct !DICompileUnit (language: DW_LANG_C11, file: !1 , producer: "clang version 20 .0.0git (/tmp/llvm/clang 0c30e7ceeb36294f4523da2590101314ca1c662d )" , isOptimized: false , runtimeVersion: 0 , emissionKind: FullDebug, splitDebugInlining: false , nameTableKind: None)
176+ !1 = !DIFile (filename: "test.c" , directory: "/tmp" , checksumkind: CSK_MD5, checksum: "854099697e49b3ca7d3b3c08503e6fef " )
177+ !2 = !{i32 0 , i32 64770 , i32 454848 , !"h" , i32 12 , i32 0 , i32 0 }
178+ !3 = !{ptr @__omp_offloading_fd02_6f0c0_h_l12 , !"kernel" , i32 1 }
186179!4 = !{i32 1 , !"amdhsa_code_object_version" , i32 500 }
187180!5 = !{i32 7 , !"Dwarf Version" , i32 5 }
188181!6 = !{i32 2 , !"Debug Info Version" , i32 3 }
@@ -192,10 +185,10 @@ attributes #6 = { nounwind }
192185!10 = !{i32 8 , !"PIC Level" , i32 2 }
193186!11 = !{i32 7 , !"frame-pointer" , i32 2 }
194187!12 = !{i32 4 , !"amdgpu_hostcall" , i32 1 }
195- !13 = !{!"clang version 19 .0.0git (/tmp/llvm/clang 5a5e94265d423fa9eb39dc1b855511195f8dc0fe )" }
188+ !13 = !{!"clang version 20 .0.0git (/tmp/llvm/clang 0c30e7ceeb36294f4523da2590101314ca1c662d )" }
196189!14 = !{!"AMD clang version 17.0.0 (https://github.com/RadeonOpenCompute/llvm-project roc-6.0.2 24012 af27734ed982b52a9f1be0f035ac91726fc697e4)" }
197190!15 = !{i32 2 , i32 0 }
198- !16 = distinct !DISubprogram (name: "__omp_offloading_fd02_71f35_h_l12_debug__ " , scope: !17 , file: !17 , line: 13 , type: !18 , scopeLine: 13 , flags: DIFlagArtificial | DIFlagPrototyped, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition, unit: !0 , retainedNodes: !23 )
191+ !16 = distinct !DISubprogram (name: "__omp_offloading_fd02_6f0c0_h_l12_debug__ " , scope: !17 , file: !17 , line: 13 , type: !18 , scopeLine: 13 , flags: DIFlagArtificial | DIFlagPrototyped, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition, unit: !0 , retainedNodes: !23 )
199192!17 = !DIFile (filename: "test.c" , directory: "/tmp" )
200193!18 = !DISubroutineType (types: !19 )
201194!19 = !{null , !20 }
@@ -219,7 +212,7 @@ attributes #6 = { nounwind }
219212!37 = !DILocation (line: 17 , column: 5 , scope: !28 )
220213!38 = !DILocation (line: 18 , column: 3 , scope: !28 )
221214!39 = !DILocation (line: 18 , column: 3 , scope: !16 )
222- !40 = distinct !DISubprogram (name: "__omp_offloading_fd02_71f35_h_l12 " , scope: !17 , file: !17 , line: 12 , type: !18 , scopeLine: 12 , flags: DIFlagArtificial | DIFlagPrototyped, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition, unit: !0 , retainedNodes: !23 )
215+ !40 = distinct !DISubprogram (name: "__omp_offloading_fd02_6f0c0_h_l12 " , scope: !17 , file: !17 , line: 12 , type: !18 , scopeLine: 12 , flags: DIFlagArtificial | DIFlagPrototyped, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition, unit: !0 , retainedNodes: !23 )
223216!41 = !DILocalVariable (name: "dyn_ptr" , arg: 1 , scope: !40 , type: !20 , flags: DIFlagArtificial)
224217!42 = !DILocation (line: 0 , scope: !40 )
225218!43 = !DILocation (line: 12 , column: 1 , scope: !40 )
0 commit comments