Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions mlir/lib/Dialect/Affine/Analysis/AffineAnalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,9 +66,10 @@ static Value getSupportedReduction(AffineForOp forOp, unsigned pos,
.Case([](arith::MaxSIOp) { return arith::AtomicRMWKind::maxs; })
.Case([](arith::MinUIOp) { return arith::AtomicRMWKind::minu; })
.Case([](arith::MaxUIOp) { return arith::AtomicRMWKind::maxu; })
.Case([](arith::XOrIOp) { return arith::AtomicRMWKind::xori; })
.Case([](arith::MaxNumFOp) { return arith::AtomicRMWKind::maxnumf; })
.Case([](arith::MinNumFOp) { return arith::AtomicRMWKind::minnumf; })
.Default([](Operation *) -> std::optional<arith::AtomicRMWKind> {
// TODO: AtomicRMW supports other kinds of reductions this is
// currently not detecting, add those when the need arises.
return std::nullopt;
});
if (!maybeKind)
Expand Down
10 changes: 9 additions & 1 deletion mlir/lib/Dialect/Vector/IR/VectorOps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -717,7 +717,15 @@ Value mlir::vector::getVectorReductionOp(arith::AtomicRMWKind op,
case arith::AtomicRMWKind::ori:
return vector::ReductionOp::create(builder, vector.getLoc(),
CombiningKind::OR, vector);
// TODO: Add remaining reduction operations.
case arith::AtomicRMWKind::minnumf:
return vector::ReductionOp::create(builder, vector.getLoc(),
CombiningKind::MINNUMF, vector);
case arith::AtomicRMWKind::maxnumf:
return vector::ReductionOp::create(builder, vector.getLoc(),
CombiningKind::MAXNUMF, vector);
case arith::AtomicRMWKind::xori:
return vector::ReductionOp::create(builder, vector.getLoc(),
CombiningKind::XOR, vector);
default:
(void)emitOptionalError(loc, "Reduction operation type not supported");
break;
Expand Down
100 changes: 100 additions & 0 deletions mlir/test/Dialect/Affine/SuperVectorize/vectorize_reduction.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,106 @@ func.func @vecdim_reduction_ori(%in: memref<256x512xi32>, %out: memref<256xi32>)
// CHECK: affine.store %[[final_red]], %{{.*}} : memref<256xi32>
// CHECK: }

// -----

func.func @vecdim_reduction_xori(%in: memref<256x512xi32>, %out: memref<256xi32>) {
%cst = arith.constant 0 : i32
affine.for %i = 0 to 256 {
%final_red = affine.for %j = 0 to 512 iter_args(%red_iter = %cst) -> (i32) {
%ld = affine.load %in[%i, %j] : memref<256x512xi32>
%xor = arith.xori %red_iter, %ld : i32
affine.yield %xor : i32
}
affine.store %final_red, %out[%i] : memref<256xi32>
}
return
}

// CHECK-LABEL: func.func @vecdim_reduction_xori(
// CHECK-SAME: %[[ARG0:.*]]: memref<256x512xi32>,
// CHECK-SAME: %[[ARG1:.*]]: memref<256xi32>) {
// CHECK: %[[VAL_0:.*]] = arith.constant 0 : i32
// CHECK: affine.for %[[VAL_1:.*]] = 0 to 256 {
// CHECK: %[[VAL_2:.*]] = arith.constant dense<0> : vector<128xi32>
// CHECK: %[[VAL_3:.*]] = affine.for %[[VAL_4:.*]] = 0 to 512 step 128 iter_args(%[[VAL_5:.*]] = %[[VAL_2]]) -> (vector<128xi32>) {
// CHECK: %[[VAL_6:.*]] = ub.poison : i32
// CHECK: %[[VAL_7:.*]] = vector.transfer_read %[[ARG0]]{{\[}}%[[VAL_1]], %[[VAL_4]]], %[[VAL_6]] : memref<256x512xi32>, vector<128xi32>
// CHECK: %[[VAL_8:.*]] = arith.xori %[[VAL_5]], %[[VAL_7]] : vector<128xi32>
// CHECK: affine.yield %[[VAL_8]] : vector<128xi32>
// CHECK: }
// CHECK: %[[VAL_9:.*]] = vector.reduction <xor>, %[[VAL_3]] : vector<128xi32> into i32
// CHECK: affine.store %[[VAL_9]], %[[ARG1]]{{\[}}%[[VAL_1]]] : memref<256xi32>
// CHECK: }
// CHECK: return
// CHECK: }
Comment on lines 261 to 277
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please use meaningful FileCheck variable names. See comprehensive guidelines here: https://mlir.llvm.org/getting_started/TestingGuide/


// -----

func.func @vecdim_reduction_minnumf(%in: memref<256x512xf32>, %out: memref<256xf32>) {
%cst = arith.constant 0xFF800000 : f32
affine.for %i = 0 to 256 {
%final_red = affine.for %j = 0 to 512 iter_args(%red_iter = %cst) -> (f32) {
%ld = affine.load %in[%i, %j] : memref<256x512xf32>
%min = arith.minnumf %red_iter, %ld : f32
affine.yield %min : f32
}
affine.store %final_red, %out[%i] : memref<256xf32>
}
return
}

// CHECK-LABEL: func.func @vecdim_reduction_minnumf(
// CHECK-SAME: %[[ARG0:.*]]: memref<256x512xf32>,
// CHECK-SAME: %[[ARG1:.*]]: memref<256xf32>) {
// CHECK: %[[VAL_0:.*]] = arith.constant 0xFF800000 : f32
// CHECK: affine.for %[[VAL_1:.*]] = 0 to 256 {
// CHECK: %[[VAL_2:.*]] = arith.constant dense<0x7FC00000> : vector<128xf32>
// CHECK: %[[VAL_3:.*]] = affine.for %[[VAL_4:.*]] = 0 to 512 step 128 iter_args(%[[VAL_5:.*]] = %[[VAL_2]]) -> (vector<128xf32>) {
// CHECK: %[[VAL_6:.*]] = ub.poison : f32
// CHECK: %[[VAL_7:.*]] = vector.transfer_read %[[ARG0]]{{\[}}%[[VAL_1]], %[[VAL_4]]], %[[VAL_6]] : memref<256x512xf32>, vector<128xf32>
// CHECK: %[[VAL_8:.*]] = arith.minnumf %[[VAL_5]], %[[VAL_7]] : vector<128xf32>
// CHECK: affine.yield %[[VAL_8]] : vector<128xf32>
// CHECK: }
// CHECK: %[[VAL_9:.*]] = vector.reduction <minnumf>, %[[VAL_3]] : vector<128xf32> into f32
// CHECK: %[[VAL_10:.*]] = arith.minnumf %[[VAL_9]], %[[VAL_0]] : f32
// CHECK: affine.store %[[VAL_10]], %[[ARG1]]{{\[}}%[[VAL_1]]] : memref<256xf32>
// CHECK: }
// CHECK: return
// CHECK: }

// -----

func.func @vecdim_reduction_maxnumf(%in: memref<256x512xf32>, %out: memref<256xf32>) {
%cst = arith.constant 0xFF800000 : f32
affine.for %i = 0 to 256 {
%final_red = affine.for %j = 0 to 512 iter_args(%red_iter = %cst) -> (f32) {
%ld = affine.load %in[%i, %j] : memref<256x512xf32>
%max = arith.maxnumf %red_iter, %ld : f32
affine.yield %max : f32
}
affine.store %final_red, %out[%i] : memref<256xf32>
}
return
}

// CHECK-LABEL: func.func @vecdim_reduction_maxnumf(
// CHECK-SAME: %[[ARG0:.*]]: memref<256x512xf32>,
// CHECK-SAME: %[[ARG1:.*]]: memref<256xf32>) {
// CHECK: %[[VAL_0:.*]] = arith.constant 0xFF800000 : f32
// CHECK: affine.for %[[VAL_1:.*]] = 0 to 256 {
// CHECK: %[[VAL_2:.*]] = arith.constant dense<0xFFC00000> : vector<128xf32>
// CHECK: %[[VAL_3:.*]] = affine.for %[[VAL_4:.*]] = 0 to 512 step 128 iter_args(%[[VAL_5:.*]] = %[[VAL_2]]) -> (vector<128xf32>) {
// CHECK: %[[VAL_6:.*]] = ub.poison : f32
// CHECK: %[[VAL_7:.*]] = vector.transfer_read %[[ARG0]]{{\[}}%[[VAL_1]], %[[VAL_4]]], %[[VAL_6]] : memref<256x512xf32>, vector<128xf32>
// CHECK: %[[VAL_8:.*]] = arith.maxnumf %[[VAL_5]], %[[VAL_7]] : vector<128xf32>
// CHECK: affine.yield %[[VAL_8]] : vector<128xf32>
// CHECK: }
// CHECK: %[[VAL_9:.*]] = vector.reduction <maxnumf>, %[[VAL_3]] : vector<128xf32> into f32
// CHECK: %[[VAL_10:.*]] = arith.maxnumf %[[VAL_9]], %[[VAL_0]] : f32
// CHECK: affine.store %[[VAL_10]], %[[ARG1]]{{\[}}%[[VAL_1]]] : memref<256xf32>
// CHECK: }
// CHECK: return
// CHECK: }

// -----

Expand Down