Skip to content

Commit 53bb79b

Browse files
authored
Support DynamicQuantizeLinear op (microsoft#25905)
1 parent af4bf43 commit 53bb79b

File tree

3 files changed

+174
-12
lines changed

3 files changed

+174
-12
lines changed

js/web/docs/webnn-operators.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ platforms. Check the [WebNN status](https://webmachinelearning.github.io/webnn-s
3232
| Div | ai.onnx(7-12, 13, 14+) | div | |
3333
| DequantizeLinear | ai.onnx(10-12, 13-18, 19-20, 21-22, 23+) | dequantizeLinear | The shape of x_scale should be a subsample of the shape of input |
3434
| Dropout | ai.onnx(7-9, 10-11, 12, 13-21, 22+) | identity | Only supports test mode |
35+
| DynamicQuantizeLinear | ai.onnx(11+) | cast, clamp, div, div, max, min, quantizeLinear, reduceMax, reduceMin, reshape, roundEven, sub | |
3536
| Einsum | ai.onnx(12+) | reshape, transpose, matmul, reduceSum, mul, triangular | |
3637
| Elu | ai.onnx(7+) | elu | |
3738
| Equal | ai.onnx(7-10, 11-12, 13-18, 19+) | equal | |

onnxruntime/core/providers/webnn/builders/impl/dynamicQuantizeLinear_op_builder.cc

Lines changed: 171 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -14,34 +14,194 @@
1414
namespace onnxruntime {
1515
namespace webnn {
1616

17-
class DynamicQuantizaLinearOpBuilder : public BaseOpBuilder {
17+
class DynamicQuantizeLinearOpBuilder : public BaseOpBuilder {
1818
// Add operator related.
1919
private:
2020
Status AddToModelBuilderImpl(ModelBuilder& model_builder, const Node& node,
2121
const logging::Logger& logger) const override ORT_MUST_USE_RESULT;
22+
23+
// Operator support related.
24+
private:
25+
bool HasSupportedInputsImpl(const GraphViewer&, const Node& node,
26+
const emscripten::val& wnn_limits, const logging::Logger& logger) const override;
27+
bool HasSupportedOutputsImpl(const Node& node, const emscripten::val& wnn_limits,
28+
const logging::Logger& logger) const override;
2229
};
2330

24-
Status DynamicQuantizaLinearOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder,
31+
// DynamicQuantizeLinear is a function defined as follows:
32+
// DynamicQuantizeLinear (x) => (y, y_scale, y_zero_point)
33+
// {
34+
// Q_Min = Constant <value: tensor = float {0}> ()
35+
// Q_Max = Constant <value: tensor = float {255}> ()
36+
// X_Min = ReduceMin <keepdims: int = 0> (x)
37+
// X_Min_Adjusted = Min (X_Min, Q_Min)
38+
// X_Max = ReduceMax <keepdims: int = 0> (x)
39+
// X_Max_Adjusted = Max (X_Max, Q_Min)
40+
// X_Range = Sub (X_Max_Adjusted, X_Min_Adjusted)
41+
// Scale = Div (X_Range, Q_Max)
42+
// Min_Scaled = Div (X_Min_Adjusted, Scale)
43+
// Initial_ZeroPoint_FP = Sub (Q_Min, Min_Scaled)
44+
// Clipped_ZeroPoint_FP = Clip (Initial_ZeroPoint_FP, Q_Min, Q_Max)
45+
// Rounded_ZeroPoint_FP = Round (Clipped_ZeroPoint_FP)
46+
// Zeropoint = Cast <to: int = 2> (Rounded_ZeroPoint_FP)
47+
// y_scale = Identity (Scale) (Skip in WebNN)
48+
// y_zero_point = Identity (Zeropoint) (Skip in WebNN)
49+
// y = QuantizeLinear (x, Scale, Zeropoint)
50+
// }
51+
Status DynamicQuantizeLinearOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder,
2552
const Node& node,
2653
const logging::Logger& logger) const {
2754
const auto& input_defs = node.InputDefs();
2855
emscripten::val input = model_builder.GetOperand(input_defs[0]->Name());
29-
emscripten::val output_array;
30-
std::vector<int64_t> input_shape;
31-
ORT_RETURN_IF_NOT(GetShape(*input_defs[0], input_shape, logger), "Cannot get shape");
32-
emscripten::val options = emscripten::val::object();
33-
options.set("label", node.Name());
56+
emscripten::val common_options = emscripten::val::object();
57+
58+
// Q_Min = Constant <value: tensor = float {0}> ()
59+
emscripten::val q_min = model_builder.CreateOrGetConstant<float>(ONNX_NAMESPACE::TensorProto_DataType_FLOAT, 0.0f);
60+
// Q_Max = Constant <value: tensor = float {255}> ()
61+
emscripten::val q_max = model_builder.CreateOrGetConstant<float>(ONNX_NAMESPACE::TensorProto_DataType_FLOAT, 255.0f);
62+
63+
// X_Min = ReduceMin <keepdims: int = 0> (x)
64+
common_options.set("label", node.Name() + "_x_min");
65+
emscripten::val x_min = model_builder.GetBuilder().call<emscripten::val>("reduceMin", input, common_options);
66+
67+
// X_Min_Adjusted = Min (X_Min, Q_Min)
68+
common_options.set("label", node.Name() + "_x_min_adjusted");
69+
emscripten::val x_min_adjusted = model_builder.GetBuilder().call<emscripten::val>("min", x_min, q_min, common_options);
70+
71+
// X_Max = ReduceMax <keepdims: int = 0> (x)
72+
common_options.set("label", node.Name() + "_x_max");
73+
emscripten::val x_max = model_builder.GetBuilder().call<emscripten::val>("reduceMax", input, common_options);
74+
75+
// X_Max_Adjusted = Max (X_Max, Q_Min)
76+
common_options.set("label", node.Name() + "_x_max_adjusted");
77+
emscripten::val x_max_adjusted = model_builder.GetBuilder().call<emscripten::val>(
78+
"max", x_max, q_min, common_options);
79+
80+
// X_Range = Sub (X_Max_Adjusted, X_Min_Adjusted)
81+
common_options.set("label", node.Name() + "_x_range");
82+
emscripten::val x_range = model_builder.GetBuilder().call<emscripten::val>(
83+
"sub", x_max_adjusted, x_min_adjusted, common_options);
3484

35-
output_array = model_builder.GetBuilder().call<emscripten::val>("dynamicQuantizeLinear", input, options);
85+
// Scale = Div (X_Range, Q_Max)
86+
common_options.set("label", node.Name() + "_scale");
87+
emscripten::val scale = model_builder.GetBuilder().call<emscripten::val>("div", x_range, q_max, common_options);
3688

37-
for (size_t i = 0, count = output_array["length"].as<size_t>(); i < count; i++) {
38-
model_builder.AddOperand(node.OutputDefs()[i]->Name(), std::move(output_array[i]));
89+
// Min_Scaled = Div (X_Min_Adjusted, Scale)
90+
common_options.set("label", node.Name() + "_min_scaled");
91+
emscripten::val min_scaled = model_builder.GetBuilder().call<emscripten::val>(
92+
"div", x_min_adjusted, scale, common_options);
93+
94+
// Initial_ZeroPoint_FP = Sub (Q_Min, Min_Scaled)
95+
common_options.set("label", node.Name() + "_initial_zero_point_fp");
96+
emscripten::val initial_zero_point_fp = model_builder.GetBuilder().call<emscripten::val>(
97+
"sub", q_min, min_scaled, common_options);
98+
99+
// Clipped_ZeroPoint_FP = Clip (Initial_ZeroPoint_FP, Q_Min, Q_Max)
100+
emscripten::val clip_options = emscripten::val::object();
101+
clip_options.set("label", node.Name() + "_clipped_zero_point_fp");
102+
clip_options.set("minValue", 0);
103+
clip_options.set("maxValue", 255);
104+
emscripten::val clipped_zero_point_fp = model_builder.GetBuilder().call<emscripten::val>(
105+
"clamp", initial_zero_point_fp, clip_options);
106+
107+
// Rounded_ZeroPoint_FP = Round (Clipped_ZeroPoint_FP)
108+
common_options.set("label", node.Name() + "_rounded_zero_point_fp");
109+
emscripten::val rounded_zero_point_fp = model_builder.GetBuilder().call<emscripten::val>(
110+
"roundEven", clipped_zero_point_fp, common_options);
111+
112+
// Zeropoint = Cast <to: int = 2> (Rounded_ZeroPoint_FP)
113+
// to: int = 2 means cast to uint8
114+
common_options.set("label", node.Name() + "_zero_point");
115+
emscripten::val zero_point = model_builder.GetBuilder().call<emscripten::val>(
116+
"cast", rounded_zero_point_fp, emscripten::val("uint8"), common_options);
117+
118+
// The WebNN quantizeLinear op requires the scale and zero_point tensors to have the same rank as the input tensor.
119+
// The scale and zero_point outputs are both scalars, so we need to reshape them to match the input rank.
120+
std::vector<int64_t> input_shape;
121+
ORT_RETURN_IF_NOT(GetShape(*input_defs[0], input_shape, logger), "Cannot get input shape");
122+
const auto input_rank = input_shape.size();
123+
emscripten::val new_scale = scale;
124+
emscripten::val new_zero_point = zero_point;
125+
if (input_rank > 0) {
126+
std::vector<uint32_t> new_shape(input_rank, 1);
127+
common_options.set("label", node.Name() + "_reshape_scale");
128+
new_scale = model_builder.GetBuilder().call<emscripten::val>(
129+
"reshape", scale, emscripten::val::array(new_shape), common_options);
130+
131+
common_options.set("label", node.Name() + "_reshape_zero_point");
132+
new_zero_point = model_builder.GetBuilder().call<emscripten::val>(
133+
"reshape", zero_point, emscripten::val::array(new_shape), common_options);
39134
}
135+
136+
// y = QuantizeLinear (x, Scale, Zeropoint)
137+
common_options.set("label", node.Name() + "_quantize_linear");
138+
emscripten::val y = model_builder.GetBuilder().call<emscripten::val>(
139+
"quantizeLinear", input, new_scale, new_zero_point, common_options);
140+
141+
// Add output: y
142+
model_builder.AddOperand(node.OutputDefs()[0]->Name(), std::move(y));
143+
// Add output: y_scale
144+
model_builder.AddOperand(node.OutputDefs()[1]->Name(), std::move(scale));
145+
// Add output: y_zero_point
146+
model_builder.AddOperand(node.OutputDefs()[2]->Name(), std::move(zero_point));
147+
40148
return Status::OK();
41149
}
42150

151+
// Operator support related.
152+
bool DynamicQuantizeLinearOpBuilder::HasSupportedInputsImpl(const GraphViewer&, const Node& node,
153+
const emscripten::val& wnn_limits,
154+
const logging::Logger& logger) const {
155+
const auto& input_defs = node.InputDefs();
156+
157+
int32_t input_type = 0;
158+
if (!GetType(*input_defs[0], input_type, logger)) {
159+
return false;
160+
}
161+
if (input_type != ONNX_NAMESPACE::TensorProto_DataType_FLOAT) {
162+
LOGS(logger, VERBOSE) << "DynamicQuantizeLinear only supports input data type float.";
163+
return false;
164+
}
165+
166+
std::vector<int64_t> input_shape;
167+
if (!GetShape(*input_defs[0], input_shape, logger)) {
168+
return false;
169+
}
170+
// It's complicated to check all the decomposed ops' input rank support.
171+
// Ensure at least the first input rank is supported by the decomposed ops.
172+
// (reduceMax, reduceMin and quantizeLinear accept the first input).
173+
const std::array<std::string_view, 3> operations = {"reduceMax", "reduceMin", "quantizeLinear"};
174+
for (const auto& op : operations) {
175+
if (!IsInputRankSupported(wnn_limits, op, "input", input_shape.size(), node.Name(), logger)) {
176+
return false;
177+
}
178+
}
179+
180+
return true;
181+
}
182+
183+
bool DynamicQuantizeLinearOpBuilder::HasSupportedOutputsImpl(const Node& node,
184+
const emscripten::val& wnn_limits,
185+
const logging::Logger& logger) const {
186+
const auto& output_defs = node.OutputDefs();
187+
const std::string_view op_type = node.OpType();
188+
int32_t y_type, y_scale_type, y_zero_point_type;
189+
if (!GetType(*output_defs[0], y_type, logger) ||
190+
!GetType(*output_defs[1], y_scale_type, logger) ||
191+
!GetType(*output_defs[2], y_zero_point_type, logger)) {
192+
return false;
193+
}
194+
195+
// Only need to check the output data type of ops that produce the outputs of DynamicQuantizeLinear.
196+
// 1. QuantizeLinear -> y (uint8)
197+
// 2. Div -> y_scale (float32) (skip it as WebNN should support it by default)
198+
// 3. Cast -> y_zero_point (uint8)
199+
return IsDataTypeSupportedByWebNNOp(op_type, "quantizeLinear", y_type, wnn_limits, "output", "y", logger) &&
200+
IsDataTypeSupportedByWebNNOp(op_type, "cast", y_zero_point_type, wnn_limits, "output", "y_zero_point", logger);
201+
}
202+
43203
void CreateDynamicQuantizeLinearOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations) {
44-
op_registrations.builders.push_back(std::make_unique<DynamicQuantizaLinearOpBuilder>());
204+
op_registrations.builders.push_back(std::make_unique<DynamicQuantizeLinearOpBuilder>());
45205
op_registrations.op_builder_map.emplace(op_type, op_registrations.builders.back().get());
46206
}
47207

onnxruntime/core/providers/webnn/builders/map_info.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,8 @@ constexpr std::array<ONNX_NAMESPACE::TensorProto_DataType, 5> supported_fallback
4747
// Use ONNX-to-ONNX op mapping to improve the search complexity for WebNN ops in the op_inputs_map.
4848
const std::map<std::string_view, std::vector<std::string_view>> decomposed_op_map = {
4949
{"ConvInteger", {"Cast", "Conv", "DequantizeLinear"}},
50+
{"DynamicQuantizeLinear",
51+
{"Cast", "Clip", "Div", "Max", "Min", "QuantizeLinear", "ReduceMax", "ReduceMin", "Reshape", "Round", "Sub"}},
5052
{"Einsum", {"MatMul", "Mul", "ReduceSum", "Reshape", "Transpose", "Trilu"}},
5153
{"GroupQueryAttention",
5254
{"Add", "Cast", "Concat", "CumSum", "Div", "Expand", "Less", "MatMul", "Reshape", "ScatterND",
@@ -190,7 +192,6 @@ const std::unordered_map<std::string_view, WebnnOpInfo> op_inputs_map = {
190192
{"GatherND", {"gatherND", {{0, "input"}, {1, "indices"}}}},
191193
{"GreaterOrEqual", {"greaterOrEqual", {{0, "a"}, {1, "b"}}}},
192194
{"Conv", {"conv2d", {{0, "input"}, {1, "filter"}, {2, "bias"}}}},
193-
{"DynamicQuantizeLinear", {"dynamicQuantizeLinear", {{0, "input"}}}},
194195
{"GatherElements", {"gatherElements", {{0, "input"}, {1, "indices"}}}},
195196
{"ScatterND", {"scatterND", {{0, "input"}, {1, "indices"}, {2, "updates"}}}},
196197
{"Where", {"where", {{0, "condition"}, {1, "trueValue"}, {2, "falseValue"}}}},

0 commit comments

Comments
 (0)