diff --git a/clang/test/OpenMP/irbuilder_nested_parallel_for.c b/clang/test/OpenMP/irbuilder_nested_parallel_for.c index ae3570fda412d..5cc5640a5173b 100644 --- a/clang/test/OpenMP/irbuilder_nested_parallel_for.c +++ b/clang/test/OpenMP/irbuilder_nested_parallel_for.c @@ -1679,6 +1679,9 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-DEBUG-NEXT: [[TMP1:%.*]] = load i32, ptr [[TID_ADDR]], align 4 // CHECK-DEBUG-NEXT: store i32 [[TMP1]], ptr [[TID_ADDR_LOCAL]], align 4 // CHECK-DEBUG-NEXT: [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4 +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOADGEP_A_ADDR]], [[META88:![0-9]+]], !DIExpression(), [[META89:![0-9]+]]) +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOADGEP_B_ADDR]], [[META90:![0-9]+]], !DIExpression(), [[META91:![0-9]+]]) +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOADGEP_R_ADDR]], [[META92:![0-9]+]], !DIExpression(), [[META93:![0-9]+]]) // CHECK-DEBUG-NEXT: br label [[OMP_PAR_REGION:%.*]] // CHECK-DEBUG: omp.par.region: // CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB8:[0-9]+]]), !dbg [[DBG86:![0-9]+]] @@ -1723,6 +1726,9 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-DEBUG-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK-DEBUG-NEXT: [[AGG_CAPTURED12:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK-DEBUG-NEXT: [[DOTCOUNT_ADDR:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOADGEP_A_ADDR]], [[META102:![0-9]+]], !DIExpression(), [[META103:![0-9]+]]) +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOADGEP_B_ADDR]], [[META104:![0-9]+]], !DIExpression(), [[META105:![0-9]+]]) +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOADGEP_R_ADDR]], [[META106:![0-9]+]], !DIExpression(), [[META107:![0-9]+]]) // CHECK-DEBUG-NEXT: br label [[OMP_PAR_REGION5:%.*]] // CHECK-DEBUG: omp.par.region5: // CHECK-DEBUG-NEXT: #dbg_declare(ptr [[I]], [[META94:![0-9]+]], !DIExpression(), [[META99:![0-9]+]]) @@ -1964,6 +1970,9 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-DEBUG-NEXT: [[AGG_CAPTURED161:%.*]] = alloca [[STRUCT_ANON_15:%.*]], align 8 // CHECK-DEBUG-NEXT: [[AGG_CAPTURED162:%.*]] = alloca [[STRUCT_ANON_16:%.*]], align 4 // CHECK-DEBUG-NEXT: [[DOTCOUNT_ADDR163:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOADGEP_A_ADDR]], [[META171:![0-9]+]], !DIExpression(), [[META172:![0-9]+]]) +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOADGEP_B_ADDR]], [[META173:![0-9]+]], !DIExpression(), [[META174:![0-9]+]]) +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOADGEP_R_ADDR]], [[META175:![0-9]+]], !DIExpression(), [[META176:![0-9]+]]) // CHECK-DEBUG-NEXT: br label [[OMP_PAR_REGION:%.*]] // CHECK-DEBUG: omp.par.region: // CHECK-DEBUG-NEXT: #dbg_declare(ptr [[I]], [[META157:![0-9]+]], !DIExpression(), [[META161:![0-9]+]]) @@ -2122,6 +2131,9 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-DEBUG-NEXT: [[AGG_CAPTURED136:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 8 // CHECK-DEBUG-NEXT: [[AGG_CAPTURED137:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 4 // CHECK-DEBUG-NEXT: [[DOTCOUNT_ADDR138:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOADGEP_A_ADDR]], [[META207:![0-9]+]], !DIExpression(), [[META208:![0-9]+]]) +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOADGEP_B_ADDR]], [[META209:![0-9]+]], !DIExpression(), [[META210:![0-9]+]]) +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOADGEP_R_ADDR]], [[META211:![0-9]+]], !DIExpression(), [[META212:![0-9]+]]) // CHECK-DEBUG-NEXT: br label [[OMP_PAR_REGION9:%.*]] // CHECK-DEBUG: omp.par.region9: // CHECK-DEBUG-NEXT: #dbg_declare(ptr [[I16]], [[META187:![0-9]+]], !DIExpression(), [[META192:![0-9]+]]) @@ -2322,6 +2334,9 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-DEBUG-NEXT: [[AGG_CAPTURED111:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 8 // CHECK-DEBUG-NEXT: [[AGG_CAPTURED112:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 4 // CHECK-DEBUG-NEXT: [[DOTCOUNT_ADDR113:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOADGEP_A_ADDR]], [[META260:![0-9]+]], !DIExpression(), [[META261:![0-9]+]]) +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOADGEP_B_ADDR]], [[META262:![0-9]+]], !DIExpression(), [[META263:![0-9]+]]) +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOADGEP_R_ADDR]], [[META264:![0-9]+]], !DIExpression(), [[META265:![0-9]+]]) // CHECK-DEBUG-NEXT: br label [[OMP_PAR_REGION103:%.*]] // CHECK-DEBUG: omp.par.region103: // CHECK-DEBUG-NEXT: #dbg_declare(ptr [[I110]], [[META234:![0-9]+]], !DIExpression(), [[META240:![0-9]+]]) @@ -2402,6 +2417,9 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-DEBUG-NEXT: [[AGG_CAPTURED52:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK-DEBUG-NEXT: [[AGG_CAPTURED53:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 4 // CHECK-DEBUG-NEXT: [[DOTCOUNT_ADDR54:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOADGEP_A_ADDR]], [[META282:![0-9]+]], !DIExpression(), [[META283:![0-9]+]]) +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOADGEP_B_ADDR]], [[META284:![0-9]+]], !DIExpression(), [[META285:![0-9]+]]) +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOADGEP_R_ADDR]], [[META286:![0-9]+]], !DIExpression(), [[META287:![0-9]+]]) // CHECK-DEBUG-NEXT: br label [[OMP_PAR_REGION44:%.*]] // CHECK-DEBUG: omp.par.region44: // CHECK-DEBUG-NEXT: #dbg_declare(ptr [[I51]], [[META250:![0-9]+]], !DIExpression(), [[META256:![0-9]+]]) diff --git a/clang/test/OpenMP/nested_loop_codegen.cpp b/clang/test/OpenMP/nested_loop_codegen.cpp index d8fab26bf1e7f..9aefc6a739e51 100644 --- a/clang/test/OpenMP/nested_loop_codegen.cpp +++ b/clang/test/OpenMP/nested_loop_codegen.cpp @@ -889,6 +889,8 @@ int inline_decl() { // CHECK4-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK4-NEXT: [[AGG_CAPTURED1:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK4-NEXT: [[DOTCOUNT_ADDR:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: #dbg_declare(ptr [[LOADGEP_I]], [[META24:![0-9]+]], !DIExpression(), [[META25:![0-9]+]]) +// CHECK4-NEXT: #dbg_declare(ptr [[LOADGEP_K]], [[META26:![0-9]+]], !DIExpression(), [[META25]]) // CHECK4-NEXT: br label [[OMP_PAR_REGION:%.*]] // CHECK4: omp.par.region: // CHECK4-NEXT: store i32 0, ptr [[LOADGEP_I]], align 4, !dbg [[DBG23:![0-9]+]] @@ -1066,6 +1068,8 @@ int inline_decl() { // CHECK4-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK4-NEXT: [[AGG_CAPTURED1:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK4-NEXT: [[DOTCOUNT_ADDR:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: #dbg_declare(ptr [[LOADGEP_I]], [[META91:![0-9]+]], !DIExpression(), [[META92:![0-9]+]]) +// CHECK4-NEXT: #dbg_declare(ptr [[LOADGEP_RES]], [[META93:![0-9]+]], !DIExpression(), [[META92]]) // CHECK4-NEXT: br label [[OMP_PAR_REGION:%.*]] // CHECK4: omp.par.region: // CHECK4-NEXT: store i32 0, ptr [[LOADGEP_I]], align 4, !dbg [[DBG86:![0-9]+]] diff --git a/clang/test/OpenMP/parallel_codegen.cpp b/clang/test/OpenMP/parallel_codegen.cpp index c63c6f554f4ae..e8e57aedaa164 100644 --- a/clang/test/OpenMP/parallel_codegen.cpp +++ b/clang/test/OpenMP/parallel_codegen.cpp @@ -893,6 +893,7 @@ int main (int argc, char **argv) { // CHECK4-NEXT: [[TMP1:%.*]] = load i32, ptr [[TID_ADDR]], align 4 // CHECK4-NEXT: store i32 [[TMP1]], ptr [[TID_ADDR_LOCAL]], align 4 // CHECK4-NEXT: [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4 +// CHECK4-NEXT: #dbg_declare(ptr [[LOADGEP_VLA]], [[META36:![0-9]+]], !DIExpression(), [[META37:![0-9]+]]) // CHECK4-NEXT: br label [[OMP_PAR_REGION:%.*]] // CHECK4: omp.par.region: // CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[LOADGEP_VLA]], i64 1, !dbg [[DBG35:![0-9]+]] @@ -960,6 +961,7 @@ int main (int argc, char **argv) { // CHECK4-NEXT: [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4 // CHECK4-NEXT: [[VAR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[TMP2:%.*]] = load i64, ptr [[LOADGEP__RELOADED]], align 8 +// CHECK4-NEXT: #dbg_declare(ptr [[LOADGEP_ARGC_ADDR]], [[META60:![0-9]+]], !DIExpression(), [[META61:![0-9]+]]) // CHECK4-NEXT: br label [[OMP_PAR_REGION:%.*]] // CHECK4: omp.par.region: // CHECK4-NEXT: [[TMP3:%.*]] = load ptr, ptr [[LOADGEP_ARGC_ADDR]], align 8, !dbg [[DBG56:![0-9]+]] diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index be05f01c94603..4f3745cdd2d23 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -788,8 +788,13 @@ void OpenMPIRBuilder::finalize(Function *Fn) { Instruction &I = *It; It++; - if (I.isTerminator()) + if (I.isTerminator()) { + // Absorb any debug value that terminator may have + if (OI.EntryBB->getTerminator()) + OI.EntryBB->getTerminator()->adoptDbgRecords( + &ArtificialEntry, I.getIterator(), false); continue; + } I.moveBeforePreserving(*OI.EntryBB, OI.EntryBB->getFirstInsertionPt()); } diff --git a/mlir/test/Target/LLVMIR/omptarget-parallel-llvm-debug.mlir b/mlir/test/Target/LLVMIR/omptarget-parallel-llvm-debug.mlir new file mode 100644 index 0000000000000..3c45f1f1c76fb --- /dev/null +++ b/mlir/test/Target/LLVMIR/omptarget-parallel-llvm-debug.mlir @@ -0,0 +1,43 @@ +// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s + + +#di_file = #llvm.di_file<"target.f90" in ""> +#di_null_type = #llvm.di_null_type +#cu = #llvm.di_compile_unit, sourceLanguage = DW_LANG_Fortran95, file = #di_file, producer = "flang", isOptimized = false, emissionKind = Full> +#sp_ty = #llvm.di_subroutine_type +#sp = #llvm.di_subprogram +#int_ty = #llvm.di_basic_type +#var_x = #llvm.di_local_variable +module attributes {dlti.dl_spec = #dlti.dl_spec : vector<2xi64>, f16 = dense<16> : vector<2xi64>, f64 = dense<64> : vector<2xi64>, f128 = dense<128> : vector<2xi64>, i128 = dense<128> : vector<2xi64>, i64 = dense<64> : vector<2xi64>, f80 = dense<128> : vector<2xi64>, !llvm.ptr<272> = dense<64> : vector<4xi64>, !llvm.ptr<271> = dense<32> : vector<4xi64>, !llvm.ptr = dense<64> : vector<4xi64>, !llvm.ptr<270> = dense<32> : vector<4xi64>, i1 = dense<8> : vector<2xi64>, i8 = dense<8> : vector<2xi64>, i16 = dense<16> : vector<2xi64>, "dlti.endianness" = "little", "dlti.stack_alignment" = 128 : i64, "dlti.mangling_mode" = "e">, fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", fir.target_cpu = "x86-64", llvm.data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", llvm.ident = "flang version 21.0.0 (/home/haqadeer/work/src/aomp-llvm-project/flang 793f9220ab32f92fc3b253efec2e332c18090e53)", llvm.target_triple = "x86_64-unknown-linux-gnu", omp.is_gpu = false, omp.is_target_device = false, omp.requires = #omp, omp.target_triples = ["amdgcn-amd-amdhsa"], omp.version = #omp.version} { + llvm.func @_QQmain() attributes {fir.bindc_name = "test", frame_pointer = #llvm.framePointerKind, target_cpu = "x86-64"} { + %0 = llvm.mlir.constant(1 : i64) : i64 + %1 = llvm.alloca %0 x i32 {bindc_name = "x"} : (i64) -> !llvm.ptr + llvm.intr.dbg.declare #var_x = %1 : !llvm.ptr loc(#loc2) + %5 = omp.map.info var_ptr(%1 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "x"} + omp.target map_entries(%5 -> %arg0 : !llvm.ptr) { + %6 = llvm.mlir.constant(1 : i32) : i32 + llvm.intr.dbg.declare #var_x = %arg0 : !llvm.ptr loc(#loc2) + omp.parallel { + %7 = llvm.load %arg0 : !llvm.ptr -> i32 + %8 = llvm.add %7, %6 : i32 + llvm.store %8, %arg0 : i32, !llvm.ptr + omp.terminator + } + omp.terminator + } + llvm.return + } loc(#loc10) +} +#loc1 = loc("target.f90":1:7) +#loc2 = loc("target.f90":3:18) +#loc10 = loc(fused<#sp>[#loc1]) + + +// CHECK: define internal void @__omp_offloading{{.*}}omp_par{{.*}} !dbg ![[FN:[0-9]+]] { +// CHECK-NEXT: omp.par.entry: +// CHECK: #dbg_declare(ptr {{.*}}, ![[VAR:[0-9]+]], {{.*}}) +// CHECK-NEXT: br + +// CHECK: ![[FN]] = {{.*}}!DISubprogram(name: "__omp_offloading_{{.*}}omp_par"{{.*}}) +// CHECK: ![[VAR]] = !DILocalVariable(name: "x", scope: ![[FN]]{{.*}}) +