llvm · leewei05 · Oct 14, 2025 · Oct 23, 2025 · Oct 23, 2025 · banach-space
diff --git a/mlir/lib/Dialect/Affine/Analysis/AffineAnalysis.cpp b/mlir/lib/Dialect/Affine/Analysis/AffineAnalysis.cpp
@@ -66,9 +66,10 @@ static Value getSupportedReduction(AffineForOp forOp, unsigned pos,
           .Case([](arith::MaxSIOp) { return arith::AtomicRMWKind::maxs; })
           .Case([](arith::MinUIOp) { return arith::AtomicRMWKind::minu; })
           .Case([](arith::MaxUIOp) { return arith::AtomicRMWKind::maxu; })
+          .Case([](arith::XOrIOp) { return arith::AtomicRMWKind::xori; })
+          .Case([](arith::MaxNumFOp) { return arith::AtomicRMWKind::maxnumf; })
+          .Case([](arith::MinNumFOp) { return arith::AtomicRMWKind::minnumf; })
           .Default([](Operation *) -> std::optional<arith::AtomicRMWKind> {
-            // TODO: AtomicRMW supports other kinds of reductions this is
-            // currently not detecting, add those when the need arises.
             return std::nullopt;
           });
   if (!maybeKind)

@@ -717,7 +717,15 @@ Value mlir::vector::getVectorReductionOp(arith::AtomicRMWKind op,
   case arith::AtomicRMWKind::ori:
     return vector::ReductionOp::create(builder, vector.getLoc(),
                                        CombiningKind::OR, vector);
-  // TODO: Add remaining reduction operations.
+  case arith::AtomicRMWKind::minnumf:
+    return vector::ReductionOp::create(builder, vector.getLoc(),
+                                       CombiningKind::MINNUMF, vector);
+  case arith::AtomicRMWKind::maxnumf:
+    return vector::ReductionOp::create(builder, vector.getLoc(),
+                                       CombiningKind::MAXNUMF, vector);
+  case arith::AtomicRMWKind::xori:
+    return vector::ReductionOp::create(builder, vector.getLoc(),
+                                       CombiningKind::XOR, vector);
   default:
     (void)emitOptionalError(loc, "Reduction operation type not supported");
     break;

diff --git a/mlir/test/Dialect/Affine/SuperVectorize/vectorize_reduction.mlir b/mlir/test/Dialect/Affine/SuperVectorize/vectorize_reduction.mlir
@@ -243,6 +243,106 @@ func.func @vecdim_reduction_ori(%in: memref<256x512xi32>, %out: memref<256xi32>)
 // CHECK:         affine.store %[[final_red]], %{{.*}} : memref<256xi32>
 // CHECK:       }
 
+// -----
+
+func.func @vecdim_reduction_xori(%in: memref<256x512xi32>, %out: memref<256xi32>) {
+ %cst = arith.constant 0 : i32
+ affine.for %i = 0 to 256 {
+   %final_red = affine.for %j = 0 to 512 iter_args(%red_iter = %cst) -> (i32) {
+     %ld = affine.load %in[%i, %j] : memref<256x512xi32>
+     %xor = arith.xori %red_iter, %ld : i32
+     affine.yield %xor : i32
+   }
+   affine.store %final_red, %out[%i] : memref<256xi32>
+ }
+ return
+}
+
+// CHECK-LABEL:   func.func @vecdim_reduction_xori(
+// CHECK-SAME:      %[[ARG0:.*]]: memref<256x512xi32>,
+// CHECK-SAME:      %[[ARG1:.*]]: memref<256xi32>) {
+// CHECK:           %[[VAL_0:.*]] = arith.constant 0 : i32
+// CHECK:           affine.for %[[VAL_1:.*]] = 0 to 256 {
+// CHECK:             %[[VAL_2:.*]] = arith.constant dense<0> : vector<128xi32>
+// CHECK:             %[[VAL_3:.*]] = affine.for %[[VAL_4:.*]] = 0 to 512 step 128 iter_args(%[[VAL_5:.*]] = %[[VAL_2]]) -> (vector<128xi32>) {
+// CHECK:               %[[VAL_6:.*]] = ub.poison : i32
+// CHECK:               %[[VAL_7:.*]] = vector.transfer_read %[[ARG0]]{{\[}}%[[VAL_1]], %[[VAL_4]]], %[[VAL_6]] : memref<256x512xi32>, vector<128xi32>
+// CHECK:               %[[VAL_8:.*]] = arith.xori %[[VAL_5]], %[[VAL_7]] : vector<128xi32>
+// CHECK:               affine.yield %[[VAL_8]] : vector<128xi32>
+// CHECK:             }
+// CHECK:             %[[VAL_9:.*]] = vector.reduction <xor>, %[[VAL_3]] : vector<128xi32> into i32
+// CHECK:             affine.store %[[VAL_9]], %[[ARG1]]{{\[}}%[[VAL_1]]] : memref<256xi32>
+// CHECK:           }
+// CHECK:           return
+// CHECK:         }
+
+// -----
+
+func.func @vecdim_reduction_minnumf(%in: memref<256x512xf32>, %out: memref<256xf32>) {
+ %cst = arith.constant 0xFF800000 : f32
+ affine.for %i = 0 to 256 {
+   %final_red = affine.for %j = 0 to 512 iter_args(%red_iter = %cst) -> (f32) {
+     %ld = affine.load %in[%i, %j] : memref<256x512xf32>
+     %min = arith.minnumf %red_iter, %ld : f32
+     affine.yield %min : f32
+   }
+   affine.store %final_red, %out[%i] : memref<256xf32>
+ }
+ return
+}
+
+// CHECK-LABEL:   func.func @vecdim_reduction_minnumf(
+// CHECK-SAME:      %[[ARG0:.*]]: memref<256x512xf32>,
+// CHECK-SAME:      %[[ARG1:.*]]: memref<256xf32>) {
+// CHECK:           %[[VAL_0:.*]] = arith.constant 0xFF800000 : f32
+// CHECK:           affine.for %[[VAL_1:.*]] = 0 to 256 {
+// CHECK:             %[[VAL_2:.*]] = arith.constant dense<0x7FC00000> : vector<128xf32>
+// CHECK:             %[[VAL_3:.*]] = affine.for %[[VAL_4:.*]] = 0 to 512 step 128 iter_args(%[[VAL_5:.*]] = %[[VAL_2]]) -> (vector<128xf32>) {
+// CHECK:               %[[VAL_6:.*]] = ub.poison : f32
+// CHECK:               %[[VAL_7:.*]] = vector.transfer_read %[[ARG0]]{{\[}}%[[VAL_1]], %[[VAL_4]]], %[[VAL_6]] : memref<256x512xf32>, vector<128xf32>
+// CHECK:               %[[VAL_8:.*]] = arith.minnumf %[[VAL_5]], %[[VAL_7]] : vector<128xf32>
+// CHECK:               affine.yield %[[VAL_8]] : vector<128xf32>
+// CHECK:             }
+// CHECK:             %[[VAL_9:.*]] = vector.reduction <minnumf>, %[[VAL_3]] : vector<128xf32> into f32
+// CHECK:             %[[VAL_10:.*]] = arith.minnumf %[[VAL_9]], %[[VAL_0]] : f32
+// CHECK:             affine.store %[[VAL_10]], %[[ARG1]]{{\[}}%[[VAL_1]]] : memref<256xf32>
+// CHECK:           }
+// CHECK:           return
+// CHECK:         }
+
+// -----
+
+func.func @vecdim_reduction_maxnumf(%in: memref<256x512xf32>, %out: memref<256xf32>) {
+ %cst = arith.constant 0xFF800000 : f32
+ affine.for %i = 0 to 256 {
+   %final_red = affine.for %j = 0 to 512 iter_args(%red_iter = %cst) -> (f32) {
+     %ld = affine.load %in[%i, %j] : memref<256x512xf32>
+     %max = arith.maxnumf %red_iter, %ld : f32
+     affine.yield %max : f32
+   }
+   affine.store %final_red, %out[%i] : memref<256xf32>
+ }
+ return
+}
+
+// CHECK-LABEL:   func.func @vecdim_reduction_maxnumf(
+// CHECK-SAME:      %[[ARG0:.*]]: memref<256x512xf32>,
+// CHECK-SAME:      %[[ARG1:.*]]: memref<256xf32>) {
+// CHECK:           %[[VAL_0:.*]] = arith.constant 0xFF800000 : f32
+// CHECK:           affine.for %[[VAL_1:.*]] = 0 to 256 {
+// CHECK:             %[[VAL_2:.*]] = arith.constant dense<0xFFC00000> : vector<128xf32>
+// CHECK:             %[[VAL_3:.*]] = affine.for %[[VAL_4:.*]] = 0 to 512 step 128 iter_args(%[[VAL_5:.*]] = %[[VAL_2]]) -> (vector<128xf32>) {
+// CHECK:               %[[VAL_6:.*]] = ub.poison : f32
+// CHECK:               %[[VAL_7:.*]] = vector.transfer_read %[[ARG0]]{{\[}}%[[VAL_1]], %[[VAL_4]]], %[[VAL_6]] : memref<256x512xf32>, vector<128xf32>
+// CHECK:               %[[VAL_8:.*]] = arith.maxnumf %[[VAL_5]], %[[VAL_7]] : vector<128xf32>
+// CHECK:               affine.yield %[[VAL_8]] : vector<128xf32>
+// CHECK:             }
+// CHECK:             %[[VAL_9:.*]] = vector.reduction <maxnumf>, %[[VAL_3]] : vector<128xf32> into f32
+// CHECK:             %[[VAL_10:.*]] = arith.maxnumf %[[VAL_9]], %[[VAL_0]] : f32
+// CHECK:             affine.store %[[VAL_10]], %[[ARG1]]{{\[}}%[[VAL_1]]] : memref<256xf32>
+// CHECK:           }
+// CHECK:           return
+// CHECK:         }
 
 // -----