Support DynamicQuantizeLinear op (microsoft#25905)

Honry · web-flow · commit 53bb79b929c7 · 2025-09-02T12:43:18.000-07:00
diff --git a/js/web/docs/webnn-operators.md b/js/web/docs/webnn-operators.md
@@ -32,6 +32,7 @@ platforms. Check the [WebNN status](https://webmachinelearning.github.io/webnn-s
 | Div | ai.onnx(7-12, 13, 14+) | div | |
 | DequantizeLinear | ai.onnx(10-12, 13-18, 19-20, 21-22, 23+) | dequantizeLinear | The shape of x_scale should be a subsample of the shape of input |
 | Dropout | ai.onnx(7-9, 10-11, 12, 13-21, 22+) | identity | Only supports test mode |
+| DynamicQuantizeLinear | ai.onnx(11+) | cast, clamp, div, div, max, min, quantizeLinear, reduceMax, reduceMin, reshape, roundEven, sub | |
 | Einsum | ai.onnx(12+) | reshape, transpose, matmul, reduceSum, mul, triangular | |
 | Elu | ai.onnx(7+) | elu | |
 | Equal | ai.onnx(7-10, 11-12, 13-18, 19+) | equal | |
diff --git a/onnxruntime/core/providers/webnn/builders/impl/dynamicQuantizeLinear_op_builder.cc b/onnxruntime/core/providers/webnn/builders/impl/dynamicQuantizeLinear_op_builder.cc
@@ -14,34 +14,194 @@
 namespace onnxruntime {
 namespace webnn {
 
-class DynamicQuantizaLinearOpBuilder : public BaseOpBuilder {
+class DynamicQuantizeLinearOpBuilder : public BaseOpBuilder {
   // Add operator related.
  private:
   Status AddToModelBuilderImpl(ModelBuilder& model_builder, const Node& node,
                                const logging::Logger& logger) const override ORT_MUST_USE_RESULT;
+
+  // Operator support related.
+ private:
+  bool HasSupportedInputsImpl(const GraphViewer&, const Node& node,
+                              const emscripten::val& wnn_limits, const logging::Logger& logger) const override;
+  bool HasSupportedOutputsImpl(const Node& node, const emscripten::val& wnn_limits,
+                               const logging::Logger& logger) const override;
 };
 
-Status DynamicQuantizaLinearOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder,
+// DynamicQuantizeLinear is a function defined as follows:
+// DynamicQuantizeLinear (x) => (y, y_scale, y_zero_point)
+// {
+//    Q_Min = Constant <value: tensor = float {0}> ()
+//    Q_Max = Constant <value: tensor = float {255}> ()
+//    X_Min = ReduceMin <keepdims: int = 0> (x)
+//    X_Min_Adjusted = Min (X_Min, Q_Min)
+//    X_Max = ReduceMax <keepdims: int = 0> (x)
+//    X_Max_Adjusted = Max (X_Max, Q_Min)
+//    X_Range = Sub (X_Max_Adjusted, X_Min_Adjusted)
+//    Scale = Div (X_Range, Q_Max)
+//    Min_Scaled = Div (X_Min_Adjusted, Scale)
+//    Initial_ZeroPoint_FP = Sub (Q_Min, Min_Scaled)
+//    Clipped_ZeroPoint_FP = Clip (Initial_ZeroPoint_FP, Q_Min, Q_Max)
+//    Rounded_ZeroPoint_FP = Round (Clipped_ZeroPoint_FP)
+//    Zeropoint = Cast <to: int = 2> (Rounded_ZeroPoint_FP)
+//    y_scale = Identity (Scale) (Skip in WebNN)
+//    y_zero_point = Identity (Zeropoint) (Skip in WebNN)
+//    y = QuantizeLinear (x, Scale, Zeropoint)
+// }
+Status DynamicQuantizeLinearOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder,
                                                              const Node& node,
                                                              const logging::Logger& logger) const {
   const auto& input_defs = node.InputDefs();
   emscripten::val input = model_builder.GetOperand(input_defs[0]->Name());
-  emscripten::val output_array;
-  std::vector<int64_t> input_shape;
-  ORT_RETURN_IF_NOT(GetShape(*input_defs[0], input_shape, logger), "Cannot get shape");
-  emscripten::val options = emscripten::val::object();
-  options.set("label", node.Name());
+  emscripten::val common_options = emscripten::val::object();
+
+  // Q_Min = Constant <value: tensor = float {0}> ()
+  emscripten::val q_min = model_builder.CreateOrGetConstant<float>(ONNX_NAMESPACE::TensorProto_DataType_FLOAT, 0.0f);
+  // Q_Max = Constant <value: tensor = float {255}> ()
+  emscripten::val q_max = model_builder.CreateOrGetConstant<float>(ONNX_NAMESPACE::TensorProto_DataType_FLOAT, 255.0f);
+
+  // X_Min = ReduceMin <keepdims: int = 0> (x)
+  common_options.set("label", node.Name() + "_x_min");
+  emscripten::val x_min = model_builder.GetBuilder().call<emscripten::val>("reduceMin", input, common_options);
+
+  // X_Min_Adjusted = Min (X_Min, Q_Min)
+  common_options.set("label", node.Name() + "_x_min_adjusted");
+  emscripten::val x_min_adjusted = model_builder.GetBuilder().call<emscripten::val>("min", x_min, q_min, common_options);
+
+  // X_Max = ReduceMax <keepdims: int = 0> (x)
+  common_options.set("label", node.Name() + "_x_max");
+  emscripten::val x_max = model_builder.GetBuilder().call<emscripten::val>("reduceMax", input, common_options);
+
+  // X_Max_Adjusted = Max (X_Max, Q_Min)
+  common_options.set("label", node.Name() + "_x_max_adjusted");
+  emscripten::val x_max_adjusted = model_builder.GetBuilder().call<emscripten::val>(
+      "max", x_max, q_min, common_options);
+
+  // X_Range = Sub (X_Max_Adjusted, X_Min_Adjusted)
+  common_options.set("label", node.Name() + "_x_range");
+  emscripten::val x_range = model_builder.GetBuilder().call<emscripten::val>(
+      "sub", x_max_adjusted, x_min_adjusted, common_options);
 
-  output_array = model_builder.GetBuilder().call<emscripten::val>("dynamicQuantizeLinear", input, options);
+  // Scale = Div (X_Range, Q_Max)
+  common_options.set("label", node.Name() + "_scale");
+  emscripten::val scale = model_builder.GetBuilder().call<emscripten::val>("div", x_range, q_max, common_options);
 
-  for (size_t i = 0, count = output_array["length"].as<size_t>(); i < count; i++) {
-    model_builder.AddOperand(node.OutputDefs()[i]->Name(), std::move(output_array[i]));
+  // Min_Scaled = Div (X_Min_Adjusted, Scale)
+  common_options.set("label", node.Name() + "_min_scaled");
+  emscripten::val min_scaled = model_builder.GetBuilder().call<emscripten::val>(
+      "div", x_min_adjusted, scale, common_options);
+
+  // Initial_ZeroPoint_FP = Sub (Q_Min, Min_Scaled)
+  common_options.set("label", node.Name() + "_initial_zero_point_fp");
+  emscripten::val initial_zero_point_fp = model_builder.GetBuilder().call<emscripten::val>(
+      "sub", q_min, min_scaled, common_options);
+
+  // Clipped_ZeroPoint_FP = Clip (Initial_ZeroPoint_FP, Q_Min, Q_Max)
+  emscripten::val clip_options = emscripten::val::object();
+  clip_options.set("label", node.Name() + "_clipped_zero_point_fp");
+  clip_options.set("minValue", 0);
+  clip_options.set("maxValue", 255);
+  emscripten::val clipped_zero_point_fp = model_builder.GetBuilder().call<emscripten::val>(
+      "clamp", initial_zero_point_fp, clip_options);
+
+  // Rounded_ZeroPoint_FP = Round (Clipped_ZeroPoint_FP)
+  common_options.set("label", node.Name() + "_rounded_zero_point_fp");
+  emscripten::val rounded_zero_point_fp = model_builder.GetBuilder().call<emscripten::val>(
+      "roundEven", clipped_zero_point_fp, common_options);
+
+  // Zeropoint = Cast <to: int = 2> (Rounded_ZeroPoint_FP)
+  // to: int = 2 means cast to uint8
+  common_options.set("label", node.Name() + "_zero_point");
+  emscripten::val zero_point = model_builder.GetBuilder().call<emscripten::val>(
+      "cast", rounded_zero_point_fp, emscripten::val("uint8"), common_options);
+
+  // The WebNN quantizeLinear op requires the scale and zero_point tensors to have the same rank as the input tensor.
+  // The scale and zero_point outputs are both scalars, so we need to reshape them to match the input rank.
+  std::vector<int64_t> input_shape;
+  ORT_RETURN_IF_NOT(GetShape(*input_defs[0], input_shape, logger), "Cannot get input shape");
+  const auto input_rank = input_shape.size();
+  emscripten::val new_scale = scale;
+  emscripten::val new_zero_point = zero_point;
+  if (input_rank > 0) {
+    std::vector<uint32_t> new_shape(input_rank, 1);
+    common_options.set("label", node.Name() + "_reshape_scale");
+    new_scale = model_builder.GetBuilder().call<emscripten::val>(
+        "reshape", scale, emscripten::val::array(new_shape), common_options);
+
+    common_options.set("label", node.Name() + "_reshape_zero_point");
+    new_zero_point = model_builder.GetBuilder().call<emscripten::val>(
+        "reshape", zero_point, emscripten::val::array(new_shape), common_options);
   }
+
+  // y = QuantizeLinear (x, Scale, Zeropoint)
+  common_options.set("label", node.Name() + "_quantize_linear");
+  emscripten::val y = model_builder.GetBuilder().call<emscripten::val>(
+      "quantizeLinear", input, new_scale, new_zero_point, common_options);
+
+  // Add output: y
+  model_builder.AddOperand(node.OutputDefs()[0]->Name(), std::move(y));
+  // Add output: y_scale
+  model_builder.AddOperand(node.OutputDefs()[1]->Name(), std::move(scale));
+  // Add output: y_zero_point
+  model_builder.AddOperand(node.OutputDefs()[2]->Name(), std::move(zero_point));
+
   return Status::OK();
 }
 
+// Operator support related.
+bool DynamicQuantizeLinearOpBuilder::HasSupportedInputsImpl(const GraphViewer&, const Node& node,
+                                                            const emscripten::val& wnn_limits,
+                                                            const logging::Logger& logger) const {
+  const auto& input_defs = node.InputDefs();
+
+  int32_t input_type = 0;
+  if (!GetType(*input_defs[0], input_type, logger)) {
+    return false;
+  }
+  if (input_type != ONNX_NAMESPACE::TensorProto_DataType_FLOAT) {
+    LOGS(logger, VERBOSE) << "DynamicQuantizeLinear only supports input data type float.";
+    return false;
+  }
+
+  std::vector<int64_t> input_shape;
+  if (!GetShape(*input_defs[0], input_shape, logger)) {
+    return false;
+  }
+  // It's complicated to check all the decomposed ops' input rank support.
+  // Ensure at least the first input rank is supported by the decomposed ops.
+  // (reduceMax, reduceMin and quantizeLinear accept the first input).
+  const std::array<std::string_view, 3> operations = {"reduceMax", "reduceMin", "quantizeLinear"};
+  for (const auto& op : operations) {
+    if (!IsInputRankSupported(wnn_limits, op, "input", input_shape.size(), node.Name(), logger)) {
+      return false;
+    }
+  }
+
+  return true;
+}
+
+bool DynamicQuantizeLinearOpBuilder::HasSupportedOutputsImpl(const Node& node,
+                                                             const emscripten::val& wnn_limits,
+                                                             const logging::Logger& logger) const {
+  const auto& output_defs = node.OutputDefs();
+  const std::string_view op_type = node.OpType();
+  int32_t y_type, y_scale_type, y_zero_point_type;
+  if (!GetType(*output_defs[0], y_type, logger) ||
+      !GetType(*output_defs[1], y_scale_type, logger) ||
+      !GetType(*output_defs[2], y_zero_point_type, logger)) {
+    return false;
+  }
+
+  // Only need to check the output data type of ops that produce the outputs of DynamicQuantizeLinear.
+  // 1. QuantizeLinear -> y (uint8)
+  // 2. Div -> y_scale (float32) (skip it as WebNN should support it by default)
+  // 3. Cast -> y_zero_point (uint8)
+  return IsDataTypeSupportedByWebNNOp(op_type, "quantizeLinear", y_type, wnn_limits, "output", "y", logger) &&
+         IsDataTypeSupportedByWebNNOp(op_type, "cast", y_zero_point_type, wnn_limits, "output", "y_zero_point", logger);
+}
+
 void CreateDynamicQuantizeLinearOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations) {
-  op_registrations.builders.push_back(std::make_unique<DynamicQuantizaLinearOpBuilder>());
+  op_registrations.builders.push_back(std::make_unique<DynamicQuantizeLinearOpBuilder>());
   op_registrations.op_builder_map.emplace(op_type, op_registrations.builders.back().get());
 }
 
diff --git a/onnxruntime/core/providers/webnn/builders/map_info.h b/onnxruntime/core/providers/webnn/builders/map_info.h
@@ -47,6 +47,8 @@ constexpr std::array<ONNX_NAMESPACE::TensorProto_DataType, 5> supported_fallback
 // Use ONNX-to-ONNX op mapping to improve the search complexity for WebNN ops in the op_inputs_map.
 const std::map<std::string_view, std::vector<std::string_view>> decomposed_op_map = {
     {"ConvInteger", {"Cast", "Conv", "DequantizeLinear"}},
+    {"DynamicQuantizeLinear",
+     {"Cast", "Clip", "Div", "Max", "Min", "QuantizeLinear", "ReduceMax", "ReduceMin", "Reshape", "Round", "Sub"}},
     {"Einsum", {"MatMul", "Mul", "ReduceSum", "Reshape", "Transpose", "Trilu"}},
     {"GroupQueryAttention",
      {"Add", "Cast", "Concat", "CumSum", "Div", "Expand", "Less", "MatMul", "Reshape", "ScatterND",
@@ -190,7 +192,6 @@ const std::unordered_map<std::string_view, WebnnOpInfo> op_inputs_map = {
     {"GatherND", {"gatherND", {{0, "input"}, {1, "indices"}}}},
     {"GreaterOrEqual", {"greaterOrEqual", {{0, "a"}, {1, "b"}}}},
     {"Conv", {"conv2d", {{0, "input"}, {1, "filter"}, {2, "bias"}}}},
-    {"DynamicQuantizeLinear", {"dynamicQuantizeLinear", {{0, "input"}}}},
     {"GatherElements", {"gatherElements", {{0, "input"}, {1, "indices"}}}},
     {"ScatterND", {"scatterND", {{0, "input"}, {1, "indices"}, {2, "updates"}}}},
     {"Where", {"where", {{0, "condition"}, {1, "trueValue"}, {2, "falseValue"}}}},