|
| 1 | +// RUN: mlir-translate --mlir-to-llvmir %s | FileCheck %s |
| 2 | + |
| 3 | +omp.private {type = private} @_QFsimd_reductionEi_private_i32 : i32 |
| 4 | +omp.declare_reduction @add_reduction_f32 : f32 init { |
| 5 | +^bb0(%arg0: f32): |
| 6 | + %0 = llvm.mlir.constant(0.000000e+00 : f32) : f32 |
| 7 | + omp.yield(%0 : f32) |
| 8 | +} combiner { |
| 9 | +^bb0(%arg0: f32, %arg1: f32): |
| 10 | + %0 = llvm.fadd %arg0, %arg1 {fastmathFlags = #llvm.fastmath<contract>} : f32 |
| 11 | + omp.yield(%0 : f32) |
| 12 | +} |
| 13 | +llvm.func @_QPsimd_reduction(%arg0: !llvm.ptr {fir.bindc_name = "a", llvm.nocapture}, %arg1: !llvm.ptr {fir.bindc_name = "sum", llvm.nocapture}) { |
| 14 | + %0 = llvm.mlir.constant(0.000000e+00 : f32) : f32 |
| 15 | + %1 = llvm.mlir.constant(1 : i32) : i32 |
| 16 | + %2 = llvm.mlir.constant(1024 : i32) : i32 |
| 17 | + %3 = llvm.mlir.constant(1 : i64) : i64 |
| 18 | + %4 = llvm.alloca %3 x i32 {bindc_name = "i"} : (i64) -> !llvm.ptr |
| 19 | + llvm.store %0, %arg1 : f32, !llvm.ptr |
| 20 | + omp.simd private(@_QFsimd_reductionEi_private_i32 %4 -> %arg2 : !llvm.ptr) reduction(@add_reduction_f32 %arg1 -> %arg3 : !llvm.ptr) { |
| 21 | + omp.loop_nest (%arg4) : i32 = (%1) to (%2) inclusive step (%1) { |
| 22 | + llvm.store %arg4, %arg2 : i32, !llvm.ptr |
| 23 | + %5 = llvm.load %arg3 : !llvm.ptr -> f32 |
| 24 | + %6 = llvm.load %arg2 : !llvm.ptr -> i32 |
| 25 | + %7 = llvm.sext %6 : i32 to i64 |
| 26 | + %8 = llvm.sub %7, %3 overflow<nsw> : i64 |
| 27 | + %9 = llvm.getelementptr %arg0[%8] : (!llvm.ptr, i64) -> !llvm.ptr, f32 |
| 28 | + %10 = llvm.load %9 : !llvm.ptr -> f32 |
| 29 | + %11 = llvm.fadd %5, %10 {fastmathFlags = #llvm.fastmath<contract>} : f32 |
| 30 | + llvm.store %11, %arg3 : f32, !llvm.ptr |
| 31 | + omp.yield |
| 32 | + } |
| 33 | + } |
| 34 | + llvm.return |
| 35 | +} |
| 36 | + |
| 37 | +// CHECK-LABEL: define void @_QPsimd_reduction( |
| 38 | +// CHECK: %[[ORIG_I:.*]] = alloca i32, i64 1, align 4 |
| 39 | +// CHECK: store float 0.000000e+00, ptr %[[ORIG_SUM:.*]], align 4 |
| 40 | +// CHECK: %[[PRIV_I:.*]] = alloca i32, align 4 |
| 41 | +// CHECK: %[[RED_VAR:.*]] = alloca float, align 4 |
| 42 | +// CHECK: br label %[[VAL_4:.*]] |
| 43 | +// CHECK: omp.region.after_alloca: ; preds = %[[VAL_5:.*]] |
| 44 | +// CHECK: br label %[[VAL_6:.*]] |
| 45 | +// CHECK: entry: ; preds = %[[VAL_4]] |
| 46 | +// CHECK: br label %[[VAL_7:.*]] |
| 47 | +// CHECK: omp.private.init: ; preds = %[[VAL_6]] |
| 48 | +// CHECK: br label %[[VAL_8:.*]] |
| 49 | +// CHECK: omp.reduction.init: ; preds = %[[VAL_7]] |
| 50 | +// CHECK: store float 0.000000e+00, ptr %[[RED_VAR]], align 4 |
| 51 | +// CHECK: br label %[[VAL_9:.*]] |
| 52 | +// CHECK: omp.simd.region: ; preds = %[[VAL_8]] |
| 53 | +// CHECK: br label %[[VAL_10:.*]] |
| 54 | +// CHECK: omp_loop.preheader: ; preds = %[[VAL_9]] |
| 55 | +// CHECK: br label %[[VAL_11:.*]] |
| 56 | +// CHECK: omp_loop.header: ; preds = %[[VAL_12:.*]], %[[VAL_10]] |
| 57 | +// CHECK: %[[VAL_13:.*]] = phi i32 [ 0, %[[VAL_10]] ], [ %[[VAL_14:.*]], %[[VAL_12]] ] |
| 58 | +// CHECK: br label %[[VAL_15:.*]] |
| 59 | +// CHECK: omp_loop.cond: ; preds = %[[VAL_11]] |
| 60 | +// CHECK: %[[VAL_16:.*]] = icmp ult i32 %[[VAL_13]], 1024 |
| 61 | +// CHECK: br i1 %[[VAL_16]], label %[[VAL_17:.*]], label %[[VAL_18:.*]] |
| 62 | +// CHECK: omp_loop.body: ; preds = %[[VAL_15]] |
| 63 | +// CHECK: %[[VAL_19:.*]] = mul i32 %[[VAL_13]], 1 |
| 64 | +// CHECK: %[[VAL_20:.*]] = add i32 %[[VAL_19]], 1 |
| 65 | +// CHECK: br label %[[VAL_21:.*]] |
| 66 | +// CHECK: omp.loop_nest.region: ; preds = %[[VAL_17]] |
| 67 | +// CHECK: store i32 %[[VAL_20]], ptr %[[PRIV_I]], align 4, !llvm.access.group ![[ACCESS_GROUP:.*]] |
| 68 | +// CHECK: %[[RED_VAL:.*]] = load float, ptr %[[RED_VAR]], align 4, !llvm.access.group ![[ACCESS_GROUP]] |
| 69 | +// CHECK: %[[VAL_23:.*]] = load i32, ptr %[[PRIV_I]], align 4, !llvm.access.group ![[ACCESS_GROUP]] |
| 70 | +// CHECK: %[[VAL_24:.*]] = sext i32 %[[VAL_23]] to i64 |
| 71 | +// CHECK: %[[VAL_25:.*]] = sub nsw i64 %[[VAL_24]], 1 |
| 72 | +// CHECK: %[[VAL_26:.*]] = getelementptr float, ptr %[[VAL_27:.*]], i64 %[[VAL_25]] |
| 73 | +// CHECK: %[[VAL_28:.*]] = load float, ptr %[[VAL_26]], align 4, !llvm.access.group ![[ACCESS_GROUP]] |
| 74 | +// CHECK: %[[VAL_29:.*]] = fadd contract float %[[RED_VAL]], %[[VAL_28]] |
| 75 | +// CHECK: store float %[[VAL_29]], ptr %[[RED_VAR]], align 4, !llvm.access.group ![[ACCESS_GROUP]] |
| 76 | +// CHECK: br label %[[VAL_30:.*]] |
| 77 | +// CHECK: omp.region.cont1: ; preds = %[[VAL_21]] |
| 78 | +// CHECK: br label %[[VAL_12]] |
| 79 | +// CHECK: omp_loop.inc: ; preds = %[[VAL_30]] |
| 80 | +// CHECK: %[[VAL_14]] = add nuw i32 %[[VAL_13]], 1 |
| 81 | +// CHECK: br label %[[VAL_11]], !llvm.loop ![[LOOP:.*]] |
| 82 | +// CHECK: omp_loop.exit: ; preds = %[[VAL_15]] |
| 83 | +// CHECK: br label %[[VAL_31:.*]] |
| 84 | +// CHECK: omp_loop.after: ; preds = %[[VAL_18]] |
| 85 | +// CHECK: br label %[[VAL_32:.*]] |
| 86 | +// CHECK: omp.region.cont: ; preds = %[[VAL_31]] |
| 87 | +// CHECK: %[[SUM_VAL:.*]] = load float, ptr %[[ORIG_SUM]], align 4 |
| 88 | +// CHECK: %[[RED_VAL:.*]] = load float, ptr %[[RED_VAR]], align 4 |
| 89 | +// CHECK: %[[COMBINED_VAL:.*]] = fadd contract float %[[SUM_VAL]], %[[RED_VAL]] |
| 90 | +// CHECK: store float %[[COMBINED_VAL]], ptr %[[ORIG_SUM]], align 4 |
| 91 | +// CHECK: ret void |
| 92 | + |
| 93 | +// CHECK: ![[ACCESS_GROUP]] = distinct !{} |
| 94 | +// CHECK: ![[LOOP]] = distinct !{![[LOOP]], ![[PARALLEL_ACCESS:.*]], ![[VECTORIZE:.*]]} |
| 95 | +// CHECK: ![[PARALLEL_ACCESS]] = !{!"llvm.loop.parallel_accesses", ![[ACCESS_GROUP]]} |
| 96 | +// CHECK: ![[VECTORIZE]] = !{!"llvm.loop.vectorize.enable", i1 true} |
0 commit comments