|
| 1 | +// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s |
| 2 | + |
| 3 | +// Only check the overall shape of the code and the presence of relevant |
| 4 | +// runtime calls. Actual IR checking is done at the OpenMPIRBuilder level. |
| 5 | + |
| 6 | +module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.data_layout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8", llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_gpu = true, omp.is_target_device = true } { |
| 7 | + omp.declare_reduction @add_reduction_i32 : i32 init { |
| 8 | + ^bb0(%arg0: i32): |
| 9 | + %0 = llvm.mlir.constant(0 : i32) : i32 |
| 10 | + omp.yield(%0 : i32) |
| 11 | + } combiner { |
| 12 | + ^bb0(%arg0: i32, %arg1: i32): |
| 13 | + %0 = llvm.add %arg0, %arg1 : i32 |
| 14 | + omp.yield(%0 : i32) |
| 15 | + } |
| 16 | + llvm.func @simple_target_teams_only_reduction_() attributes {fir.internal_name = "_QPsimple_target_teams_only_reduction", frame_pointer = #llvm.framePointerKind<all>, omp.declare_target = #omp.declaretarget<device_type = (host), capture_clause = (to)>, target_cpu = "gfx1030", target_features = #llvm.target_features<["+16-bit-insts", "+ci-insts", "+dl-insts", "+dot1-insts", "+dot10-insts", "+dot2-insts", "+dot5-insts", "+dot6-insts", "+dot7-insts", "+dpp", "+gfx10-3-insts", "+gfx10-insts", "+gfx8-insts", "+gfx9-insts", "+gws", "+image-insts", "+s-memrealtime", "+s-memtime-inst", "+wavefrontsize32"]>} { |
| 17 | + %0 = llvm.mlir.constant(1 : i64) : i64 |
| 18 | + %1 = llvm.alloca %0 x i32 {bindc_name = "sum"} : (i64) -> !llvm.ptr<5> |
| 19 | + %2 = llvm.addrspacecast %1 : !llvm.ptr<5> to !llvm.ptr |
| 20 | + %3 = llvm.mlir.constant(1 : i64) : i64 |
| 21 | + %4 = llvm.alloca %3 x i32 {bindc_name = "index_"} : (i64) -> !llvm.ptr<5> |
| 22 | + %5 = llvm.addrspacecast %4 : !llvm.ptr<5> to !llvm.ptr |
| 23 | + %6 = llvm.mlir.constant(0 : i32) : i32 |
| 24 | + %7 = llvm.mlir.constant(1 : i64) : i64 |
| 25 | + %8 = llvm.mlir.constant(1 : i64) : i64 |
| 26 | + llvm.store %6, %2 : i32, !llvm.ptr |
| 27 | + %9 = omp.map.info var_ptr(%2 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = "sum"} |
| 28 | + %10 = omp.map.info var_ptr(%5 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "index_"} |
| 29 | + omp.target map_entries(%9 -> %arg0, %10 -> %arg1 : !llvm.ptr, !llvm.ptr) { |
| 30 | + %11 = llvm.mlir.constant(0 : index) : i64 |
| 31 | + %12 = llvm.mlir.constant(10000 : index) : i64 |
| 32 | + %13 = llvm.mlir.constant(1 : index) : i64 |
| 33 | + omp.teams reduction(@add_reduction_i32 %arg0 -> %arg2 : !llvm.ptr) { |
| 34 | + %14 = llvm.trunc %13 : i64 to i32 |
| 35 | + llvm.br ^bb1(%14, %12 : i32, i64) |
| 36 | + ^bb1(%15: i32, %16: i64): // 2 preds: ^bb0, ^bb2 |
| 37 | + %17 = llvm.icmp "sgt" %16, %11 : i64 |
| 38 | + llvm.cond_br %17, ^bb2, ^bb3 |
| 39 | + ^bb2: // pred: ^bb1 |
| 40 | + llvm.store %15, %arg1 : i32, !llvm.ptr |
| 41 | + %18 = llvm.load %arg2 : !llvm.ptr -> i32 |
| 42 | + %19 = llvm.load %arg1 : !llvm.ptr -> i32 |
| 43 | + %20 = llvm.add %18, %19 : i32 |
| 44 | + llvm.store %20, %arg2 : i32, !llvm.ptr |
| 45 | + %21 = llvm.load %arg1 : !llvm.ptr -> i32 |
| 46 | + %22 = llvm.add %21, %14 overflow<nsw> : i32 |
| 47 | + %23 = llvm.sub %16, %13 : i64 |
| 48 | + llvm.br ^bb1(%22, %23 : i32, i64) |
| 49 | + ^bb3: // pred: ^bb1 |
| 50 | + llvm.store %15, %arg1 : i32, !llvm.ptr |
| 51 | + omp.terminator |
| 52 | + } |
| 53 | + omp.terminator |
| 54 | + } |
| 55 | + llvm.return |
| 56 | + } |
| 57 | +} |
| 58 | + |
| 59 | +// CHECK: call i32 @__kmpc_target_init |
| 60 | +// CHECK: call void @[[OUTLINED:__omp_offloading_[A-Za-z0-9_.]*]] |
| 61 | +// CHECK: %[[MASTER:.+]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2 |
| 62 | +// CHECK: icmp eq i32 %[[MASTER]], 1 |
| 63 | +// CHECK: i1 %{{.+}}, label %[[THEN:[A-Za-z0-9_.]*]], label %[[DONE:[A-Za-z0-9_.]*]] |
| 64 | +// CHECK: [[THEN]]: |
| 65 | +// CHECK-NEXT: %[[FINAL_RHS:[A-Za-z0-9_.]*]] = load i32 |
| 66 | +// CHECK-NEXT: %[[FINAL_LHS:[A-Za-z0-9_.]*]] = load i32 |
| 67 | +// CHECK-NEXT: %[[FINAL_RESULT:[A-Za-z0-9_.]*]] = add i32 %[[FINAL_LHS]], %[[FINAL_RHS]] |
| 68 | +// CHECK-NEXT: store i32 %[[FINAL_RESULT]] |
| 69 | + |
| 70 | +// CHECK: call void @__kmpc_barrier |
| 71 | +// CHECK: call void @__kmpc_target_deinit |
| 72 | + |
| 73 | +// CHECK: define internal void @[[OUTLINED]] |
| 74 | +// Skip to the loop |
| 75 | +// CHECK: br i1 |
| 76 | +// CHECK: %[[TEAM_RHS:[A-Za-z0-9_.]*]] = load i32 |
| 77 | +// CHECK-NEXT: %[[TEAM_LHS:[A-Za-z0-9_.]*]] = load i32 |
| 78 | +// CHECK-NEXT: %[[TEAM_RESULT:[A-Za-z0-9_.]*]] = add i32 %[[TEAM_RHS]], %[[TEAM_LHS]] |
| 79 | +// CHECK-NEXT: store i32 %[[TEAM_RESULT]] |
0 commit comments