address review comments and add tests

umangyadav · umangyadav · commit 80061d6fbd1d · 2025-06-06T23:40:15.000Z
diff --git a/mlir/include/mlir/Dialect/Arith/IR/ArithOps.td b/mlir/include/mlir/Dialect/Arith/IR/ArithOps.td
@@ -1227,9 +1227,9 @@ def Arith_ScalingExtFOp
           OptionalAttr<Arith_FastMathAttr>:$fastmath)>,
       Results<(outs FloatLike:$out)> {
   let summary =
-      "Upcasts quantized floats using provided scales values following OCP MXFP Spec";
+      "Upcasts input floats using provided scales values following OCP MXFP Spec";
   let description = [{
-  This operation upcasts quantized floating-point values using provided scale 
+  This operation upcasts input floating-point values using provided scale 
   values. It expects both scales and the input operand to be of the same shape, 
   making the operation elementwise. Scales are usually calculated per block 
   following the OCP MXFP spec as described in https://arxiv.org/abs/2310.10537.
@@ -1253,7 +1253,6 @@ def Arith_ScalingExtFOp
     resultTy = get_type(result) 
     scaleTy  = get_type(scale)
     inputTy = get_type(input)
-    assert(scaleTy.shape() == inputTy.shape() == resultTy.shape())
     scale.exponent = arith.truncf(scale) : scaleTy to f8E8M0
     scale.extf = arith.extf(scale.exponent) : f8E8M0 to resultTy
     input.extf = arith.extf(input) : inputTy to resultTy
@@ -1350,7 +1349,7 @@ def Arith_ScalingTruncFOp
   let summary =
       "Downcasts input floating point values using provided scales values following OCP MXFP Spec";
   let description = [{
-    This operation quantizes input using the provided scale values. It expects 
+    This operation downcasts input using the provided scale values. It expects 
     both scales and the input operand to be of the same shape and, therefore, 
     makes the operation elementwise. Scales are usually calculated per block 
     following the OCP MXFP spec as described in https://arxiv.org/abs/2310.10537.
@@ -1378,7 +1377,6 @@ def Arith_ScalingTruncFOp
     scaleTy = get_type(scale)
     inputTy = get_type(input)
     resultTy = get_type(result)
-    assert(scaleTy.shape() == inputTy.shape() == resultTy.shape())
     scale.exponent = arith.truncf(scale) : scaleTy to f8E8M0
     scale.extf = arith.extf(scale.exponent) : f8E8M0 to inputTy
     result = arith.divf(input, scale.extf)
diff --git a/mlir/lib/Dialect/Arith/Transforms/ExpandOps.cpp b/mlir/lib/Dialect/Arith/Transforms/ExpandOps.cpp
@@ -359,7 +359,7 @@ struct F8E8M0ExtFOpConverter : public OpRewritePattern<arith::ExtFOp> {
       result = b.create<arith::TruncFOp>(resultTy, result, nullptr,
                                          op.getFastmathAttr());
     } else if (resultETy.getIntOrFloatBitWidth() > 32) {
-      result = b.create<arith::ExtFOp>(resultTy, result);
+      result = b.create<arith::ExtFOp>(resultTy, result, op.getFastmathAttr());
     }
     rewriter.replaceOp(op, result);
     return success();
@@ -417,11 +417,13 @@ struct ScalingExtFOpConverter : public OpRewritePattern<arith::ScalingExtFOp> {
     ImplicitLocOpBuilder b(op.getLoc(), rewriter);
     Value inputOperand = op.getIn();
     Value scaleOperand = op.getScale();
+    Type scaleTy = scaleOperand.getType();
     Type scaleETy = getElementTypeOrSelf(scaleOperand);
     // allow implicit exponent extraction from 16/32 bits floats
     if (scaleETy.getIntOrFloatBitWidth() >= 16) {
       scaleETy = b.getF8E8M0Type();
-      scaleOperand = b.create<arith::TruncFOp>(scaleETy, scaleOperand, nullptr,
+      scaleTy = cloneToShapedType(scaleTy, scaleETy);
+      scaleOperand = b.create<arith::TruncFOp>(scaleTy, scaleOperand, nullptr,
                                                op.getFastmathAttr());
     }
     if (!llvm::isa<Float8E8M0FNUType>(scaleETy)) {
@@ -461,8 +463,9 @@ struct ScalingTruncFOpConverter
     // allow implicit exponent extraction from 16/32 bits floats
     if (scaleETy.getIntOrFloatBitWidth() >= 16) {
       scaleETy = b.getF8E8M0Type();
-      scaleOperand = b.create<arith::TruncFOp>(scaleETy, scaleOperand);
-      scaleTy = scaleOperand.getType();
+      scaleTy = cloneToShapedType(scaleTy, scaleETy);
+      scaleOperand = b.create<arith::TruncFOp>(scaleTy, scaleOperand, nullptr,
+                                               op.getFastmathAttr());
     }
     if (!llvm::isa<Float8E8M0FNUType>(scaleETy)) {
       return rewriter.notifyMatchFailure(
diff --git a/mlir/test/Dialect/Arith/expand-ops.mlir b/mlir/test/Dialect/Arith/expand-ops.mlir
@@ -336,15 +336,19 @@ func.func @scaling_truncf_vector_f16_to_f6E3M2FN(%arg0 : vector<4xf16>, %arg1: v
 
 // -----
 
-func.func @scaling_truncf_propagate_rounding_mode(%arg0 : vector<4xf16>, %arg1: vector<4xf8E8M0FNU>) -> vector<4xf6E3M2FN> {
-    %0 = arith.scaling_truncf %arg0, %arg1 to_nearest_even : vector<4xf16>, vector<4xf8E8M0FNU> to vector<4xf6E3M2FN>
+func.func @scaling_truncf_propagate_rounding_mode_fast_math(%arg0 : vector<4xf16>, %arg1: vector<4xf16>) -> vector<4xf6E3M2FN> {
+    %0 = arith.scaling_truncf %arg0, %arg1 to_nearest_even fastmath<fast> : vector<4xf16>, vector<4xf16> to vector<4xf6E3M2FN>
     return %0 : vector<4xf6E3M2FN>
 }
-// SCHECK-LABEL: @scaling_truncf_propagate_rounding_mode
-// SCHECK: %[[TRUNCF:.+]] = arith.truncf [[_:%[a-zA-Z0-9_]+]] to_nearest_even : vector<4xf16> to vector<4xf6E3M2FN>
+// SCHECK-LABEL: @scaling_truncf_propagate_rounding_mode_fast_math
+// SCHECK: %[[SCALEF8:.+]] = arith.truncf %arg1 fastmath<fast> : vector<4xf16> to vector<4xf8E8M0FNU>
+// SCHECK: %[[SCALEINTY:.+]] = arith.extf %[[SCALEF8]] fastmath<fast> : vector<4xf8E8M0FNU> to vector<4xf16>
+// SCHECK: %[[DIVF:.+]] = arith.divf %arg0, %[[SCALEINTY]] fastmath<fast> : vector<4xf16>
+// SCHECK: %[[TRUNCF:.+]] = arith.truncf [[_:%[a-zA-Z0-9_]+]] to_nearest_even fastmath<fast> : vector<4xf16> to vector<4xf6E3M2FN>
 // SCHECK: return %[[TRUNCF]] : vector<4xf6E3M2FN>
 
 // -----
+
 func.func @scaling_truncf_f16_to_f4E2M1FN_using_f16_scales(%arg0: f16, %arg1 : f16) -> f4E2M1FN {
     %0 = arith.scaling_truncf %arg0, %arg1 : f16, f16 to f4E2M1FN
     return %0 : f4E2M1FN
@@ -353,6 +357,15 @@ func.func @scaling_truncf_f16_to_f4E2M1FN_using_f16_scales(%arg0: f16, %arg1 : f
 // SCHECK: %[[SCALETRUNCF:.+]] = arith.truncf %arg1 : f16 to f8E8M0FN
 // SCHECK: return
 
+// -----
+func.func @scaling_truncf_vector_f16_to_f4E2M1FN_using_f16_scales(%arg0: vector<4xf16>, %arg1 : vector<4xf16>) -> vector<4xf4E2M1FN> {
+    %0 = arith.scaling_truncf %arg0, %arg1 : vector<4xf16>, vector<4xf16> to vector<4xf4E2M1FN>
+    return %0 : vector<4xf4E2M1FN>
+}
+// SCHECK-LABEL: @scaling_truncf_vector_f16_to_f4E2M1FN_using_f16_scales
+// SCHECK: %[[SCALETRUNCF:.+]] = arith.truncf %arg1 : vector<4xf16> to vector<4xf8E8M0FNU>
+// SCHECK: return
+
 // -----
 
 func.func @invalid_scaling_truncf_to_f4E2M1FN(%arg0: f16, %arg1 : f8E5M2FNUZ) -> f4E2M1FN {
@@ -507,6 +520,34 @@ func.func @scaling_extf_vector_to_bf16(%arg0: vector<4xf4E2M1FN>, %arg1 : vector
 
 // -----
 
+func.func @scaling_extf_vector_to_f32_using_f16_scales(%arg0: vector<4xf4E2M1FN>, %arg1 : vector<4xf16>) -> vector<4xf32> {
+    %0 = arith.scaling_extf %arg0, %arg1 : vector<4xf4E2M1FN>, vector<4xf16> to vector<4xf32>
+    return %0 : vector<4xf32>
+}
+
+// SCHECK-LABEL: @scaling_extf_vector_to_f32_using_f16_scales
+// SCHECK: %[[TRUNCF_SCALE:.+]] = arith.truncf %arg1 : vector<4xf16> to vector<4xf8E8M0FNU>
+// SCHECK: %[[EXT_SCALE:.+]] = arith.extf %[[TRUNCF_SCALE]] : vector<4xf8E8M0FNU> to vector<4xf32>
+// SCHECK: %[[EXT_INPUT:.+]] = arith.extf %arg0 : vector<4xf4E2M1FN> to vector<4xf32>
+// SCHECK: %[[RESULT:.+]] = arith.mulf %[[EXT_INPUT]], %[[EXT_SCALE]] : vector<4xf32>
+// SCHECK: return %[[RESULT]]
+
+// -----
+
+func.func @scaling_extf_vector_to_f32_using_f16_scales_fastmath(%arg0: vector<4xf4E2M1FN>, %arg1 : vector<4xf16>) -> vector<4xf32> {
+    %0 = arith.scaling_extf %arg0, %arg1 fastmath<fast> : vector<4xf4E2M1FN>, vector<4xf16> to vector<4xf32>
+    return %0 : vector<4xf32>
+}
+
+// SCHECK-LABEL: @scaling_extf_vector_to_f32_using_f16_scales_fastmath
+// SCHECK: %[[TRUNCF_SCALE:.+]] = arith.truncf %arg1 fastmath<fast> : vector<4xf16> to vector<4xf8E8M0FNU>
+// SCHECK: %[[EXT_SCALE:.+]] = arith.extf %[[TRUNCF_SCALE]] fastmath<fast> : vector<4xf8E8M0FNU> to vector<4xf32>
+// SCHECK: %[[EXT_INPUT:.+]] = arith.extf %arg0 fastmath<fast> : vector<4xf4E2M1FN> to vector<4xf32>
+// SCHECK: %[[RESULT:.+]] = arith.mulf %[[EXT_INPUT]], %[[EXT_SCALE]] fastmath<fast> : vector<4xf32>
+// SCHECK: return %[[RESULT]]
+
+// -----
+
 func.func @maxsi(%a: i32, %b: i32) -> i32 {
   %result = arith.maxsi %a, %b : i32
   return %result : i32