|
14 | 14 | namespace onnxruntime { |
15 | 15 | namespace webnn { |
16 | 16 |
|
17 | | -class DynamicQuantizaLinearOpBuilder : public BaseOpBuilder { |
| 17 | +class DynamicQuantizeLinearOpBuilder : public BaseOpBuilder { |
18 | 18 | // Add operator related. |
19 | 19 | private: |
20 | 20 | Status AddToModelBuilderImpl(ModelBuilder& model_builder, const Node& node, |
21 | 21 | const logging::Logger& logger) const override ORT_MUST_USE_RESULT; |
| 22 | + |
| 23 | + // Operator support related. |
| 24 | + private: |
| 25 | + bool HasSupportedInputsImpl(const GraphViewer&, const Node& node, |
| 26 | + const emscripten::val& wnn_limits, const logging::Logger& logger) const override; |
| 27 | + bool HasSupportedOutputsImpl(const Node& node, const emscripten::val& wnn_limits, |
| 28 | + const logging::Logger& logger) const override; |
22 | 29 | }; |
23 | 30 |
|
24 | | -Status DynamicQuantizaLinearOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, |
| 31 | +// DynamicQuantizeLinear is a function defined as follows: |
| 32 | +// DynamicQuantizeLinear (x) => (y, y_scale, y_zero_point) |
| 33 | +// { |
| 34 | +// Q_Min = Constant <value: tensor = float {0}> () |
| 35 | +// Q_Max = Constant <value: tensor = float {255}> () |
| 36 | +// X_Min = ReduceMin <keepdims: int = 0> (x) |
| 37 | +// X_Min_Adjusted = Min (X_Min, Q_Min) |
| 38 | +// X_Max = ReduceMax <keepdims: int = 0> (x) |
| 39 | +// X_Max_Adjusted = Max (X_Max, Q_Min) |
| 40 | +// X_Range = Sub (X_Max_Adjusted, X_Min_Adjusted) |
| 41 | +// Scale = Div (X_Range, Q_Max) |
| 42 | +// Min_Scaled = Div (X_Min_Adjusted, Scale) |
| 43 | +// Initial_ZeroPoint_FP = Sub (Q_Min, Min_Scaled) |
| 44 | +// Clipped_ZeroPoint_FP = Clip (Initial_ZeroPoint_FP, Q_Min, Q_Max) |
| 45 | +// Rounded_ZeroPoint_FP = Round (Clipped_ZeroPoint_FP) |
| 46 | +// Zeropoint = Cast <to: int = 2> (Rounded_ZeroPoint_FP) |
| 47 | +// y_scale = Identity (Scale) (Skip in WebNN) |
| 48 | +// y_zero_point = Identity (Zeropoint) (Skip in WebNN) |
| 49 | +// y = QuantizeLinear (x, Scale, Zeropoint) |
| 50 | +// } |
| 51 | +Status DynamicQuantizeLinearOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, |
25 | 52 | const Node& node, |
26 | 53 | const logging::Logger& logger) const { |
27 | 54 | const auto& input_defs = node.InputDefs(); |
28 | 55 | emscripten::val input = model_builder.GetOperand(input_defs[0]->Name()); |
29 | | - emscripten::val output_array; |
30 | | - std::vector<int64_t> input_shape; |
31 | | - ORT_RETURN_IF_NOT(GetShape(*input_defs[0], input_shape, logger), "Cannot get shape"); |
32 | | - emscripten::val options = emscripten::val::object(); |
33 | | - options.set("label", node.Name()); |
| 56 | + emscripten::val common_options = emscripten::val::object(); |
| 57 | + |
| 58 | + // Q_Min = Constant <value: tensor = float {0}> () |
| 59 | + emscripten::val q_min = model_builder.CreateOrGetConstant<float>(ONNX_NAMESPACE::TensorProto_DataType_FLOAT, 0.0f); |
| 60 | + // Q_Max = Constant <value: tensor = float {255}> () |
| 61 | + emscripten::val q_max = model_builder.CreateOrGetConstant<float>(ONNX_NAMESPACE::TensorProto_DataType_FLOAT, 255.0f); |
| 62 | + |
| 63 | + // X_Min = ReduceMin <keepdims: int = 0> (x) |
| 64 | + common_options.set("label", node.Name() + "_x_min"); |
| 65 | + emscripten::val x_min = model_builder.GetBuilder().call<emscripten::val>("reduceMin", input, common_options); |
| 66 | + |
| 67 | + // X_Min_Adjusted = Min (X_Min, Q_Min) |
| 68 | + common_options.set("label", node.Name() + "_x_min_adjusted"); |
| 69 | + emscripten::val x_min_adjusted = model_builder.GetBuilder().call<emscripten::val>("min", x_min, q_min, common_options); |
| 70 | + |
| 71 | + // X_Max = ReduceMax <keepdims: int = 0> (x) |
| 72 | + common_options.set("label", node.Name() + "_x_max"); |
| 73 | + emscripten::val x_max = model_builder.GetBuilder().call<emscripten::val>("reduceMax", input, common_options); |
| 74 | + |
| 75 | + // X_Max_Adjusted = Max (X_Max, Q_Min) |
| 76 | + common_options.set("label", node.Name() + "_x_max_adjusted"); |
| 77 | + emscripten::val x_max_adjusted = model_builder.GetBuilder().call<emscripten::val>( |
| 78 | + "max", x_max, q_min, common_options); |
| 79 | + |
| 80 | + // X_Range = Sub (X_Max_Adjusted, X_Min_Adjusted) |
| 81 | + common_options.set("label", node.Name() + "_x_range"); |
| 82 | + emscripten::val x_range = model_builder.GetBuilder().call<emscripten::val>( |
| 83 | + "sub", x_max_adjusted, x_min_adjusted, common_options); |
34 | 84 |
|
35 | | - output_array = model_builder.GetBuilder().call<emscripten::val>("dynamicQuantizeLinear", input, options); |
| 85 | + // Scale = Div (X_Range, Q_Max) |
| 86 | + common_options.set("label", node.Name() + "_scale"); |
| 87 | + emscripten::val scale = model_builder.GetBuilder().call<emscripten::val>("div", x_range, q_max, common_options); |
36 | 88 |
|
37 | | - for (size_t i = 0, count = output_array["length"].as<size_t>(); i < count; i++) { |
38 | | - model_builder.AddOperand(node.OutputDefs()[i]->Name(), std::move(output_array[i])); |
| 89 | + // Min_Scaled = Div (X_Min_Adjusted, Scale) |
| 90 | + common_options.set("label", node.Name() + "_min_scaled"); |
| 91 | + emscripten::val min_scaled = model_builder.GetBuilder().call<emscripten::val>( |
| 92 | + "div", x_min_adjusted, scale, common_options); |
| 93 | + |
| 94 | + // Initial_ZeroPoint_FP = Sub (Q_Min, Min_Scaled) |
| 95 | + common_options.set("label", node.Name() + "_initial_zero_point_fp"); |
| 96 | + emscripten::val initial_zero_point_fp = model_builder.GetBuilder().call<emscripten::val>( |
| 97 | + "sub", q_min, min_scaled, common_options); |
| 98 | + |
| 99 | + // Clipped_ZeroPoint_FP = Clip (Initial_ZeroPoint_FP, Q_Min, Q_Max) |
| 100 | + emscripten::val clip_options = emscripten::val::object(); |
| 101 | + clip_options.set("label", node.Name() + "_clipped_zero_point_fp"); |
| 102 | + clip_options.set("minValue", 0); |
| 103 | + clip_options.set("maxValue", 255); |
| 104 | + emscripten::val clipped_zero_point_fp = model_builder.GetBuilder().call<emscripten::val>( |
| 105 | + "clamp", initial_zero_point_fp, clip_options); |
| 106 | + |
| 107 | + // Rounded_ZeroPoint_FP = Round (Clipped_ZeroPoint_FP) |
| 108 | + common_options.set("label", node.Name() + "_rounded_zero_point_fp"); |
| 109 | + emscripten::val rounded_zero_point_fp = model_builder.GetBuilder().call<emscripten::val>( |
| 110 | + "roundEven", clipped_zero_point_fp, common_options); |
| 111 | + |
| 112 | + // Zeropoint = Cast <to: int = 2> (Rounded_ZeroPoint_FP) |
| 113 | + // to: int = 2 means cast to uint8 |
| 114 | + common_options.set("label", node.Name() + "_zero_point"); |
| 115 | + emscripten::val zero_point = model_builder.GetBuilder().call<emscripten::val>( |
| 116 | + "cast", rounded_zero_point_fp, emscripten::val("uint8"), common_options); |
| 117 | + |
| 118 | + // The WebNN quantizeLinear op requires the scale and zero_point tensors to have the same rank as the input tensor. |
| 119 | + // The scale and zero_point outputs are both scalars, so we need to reshape them to match the input rank. |
| 120 | + std::vector<int64_t> input_shape; |
| 121 | + ORT_RETURN_IF_NOT(GetShape(*input_defs[0], input_shape, logger), "Cannot get input shape"); |
| 122 | + const auto input_rank = input_shape.size(); |
| 123 | + emscripten::val new_scale = scale; |
| 124 | + emscripten::val new_zero_point = zero_point; |
| 125 | + if (input_rank > 0) { |
| 126 | + std::vector<uint32_t> new_shape(input_rank, 1); |
| 127 | + common_options.set("label", node.Name() + "_reshape_scale"); |
| 128 | + new_scale = model_builder.GetBuilder().call<emscripten::val>( |
| 129 | + "reshape", scale, emscripten::val::array(new_shape), common_options); |
| 130 | + |
| 131 | + common_options.set("label", node.Name() + "_reshape_zero_point"); |
| 132 | + new_zero_point = model_builder.GetBuilder().call<emscripten::val>( |
| 133 | + "reshape", zero_point, emscripten::val::array(new_shape), common_options); |
39 | 134 | } |
| 135 | + |
| 136 | + // y = QuantizeLinear (x, Scale, Zeropoint) |
| 137 | + common_options.set("label", node.Name() + "_quantize_linear"); |
| 138 | + emscripten::val y = model_builder.GetBuilder().call<emscripten::val>( |
| 139 | + "quantizeLinear", input, new_scale, new_zero_point, common_options); |
| 140 | + |
| 141 | + // Add output: y |
| 142 | + model_builder.AddOperand(node.OutputDefs()[0]->Name(), std::move(y)); |
| 143 | + // Add output: y_scale |
| 144 | + model_builder.AddOperand(node.OutputDefs()[1]->Name(), std::move(scale)); |
| 145 | + // Add output: y_zero_point |
| 146 | + model_builder.AddOperand(node.OutputDefs()[2]->Name(), std::move(zero_point)); |
| 147 | + |
40 | 148 | return Status::OK(); |
41 | 149 | } |
42 | 150 |
|
| 151 | +// Operator support related. |
| 152 | +bool DynamicQuantizeLinearOpBuilder::HasSupportedInputsImpl(const GraphViewer&, const Node& node, |
| 153 | + const emscripten::val& wnn_limits, |
| 154 | + const logging::Logger& logger) const { |
| 155 | + const auto& input_defs = node.InputDefs(); |
| 156 | + |
| 157 | + int32_t input_type = 0; |
| 158 | + if (!GetType(*input_defs[0], input_type, logger)) { |
| 159 | + return false; |
| 160 | + } |
| 161 | + if (input_type != ONNX_NAMESPACE::TensorProto_DataType_FLOAT) { |
| 162 | + LOGS(logger, VERBOSE) << "DynamicQuantizeLinear only supports input data type float."; |
| 163 | + return false; |
| 164 | + } |
| 165 | + |
| 166 | + std::vector<int64_t> input_shape; |
| 167 | + if (!GetShape(*input_defs[0], input_shape, logger)) { |
| 168 | + return false; |
| 169 | + } |
| 170 | + // It's complicated to check all the decomposed ops' input rank support. |
| 171 | + // Ensure at least the first input rank is supported by the decomposed ops. |
| 172 | + // (reduceMax, reduceMin and quantizeLinear accept the first input). |
| 173 | + const std::array<std::string_view, 3> operations = {"reduceMax", "reduceMin", "quantizeLinear"}; |
| 174 | + for (const auto& op : operations) { |
| 175 | + if (!IsInputRankSupported(wnn_limits, op, "input", input_shape.size(), node.Name(), logger)) { |
| 176 | + return false; |
| 177 | + } |
| 178 | + } |
| 179 | + |
| 180 | + return true; |
| 181 | +} |
| 182 | + |
| 183 | +bool DynamicQuantizeLinearOpBuilder::HasSupportedOutputsImpl(const Node& node, |
| 184 | + const emscripten::val& wnn_limits, |
| 185 | + const logging::Logger& logger) const { |
| 186 | + const auto& output_defs = node.OutputDefs(); |
| 187 | + const std::string_view op_type = node.OpType(); |
| 188 | + int32_t y_type, y_scale_type, y_zero_point_type; |
| 189 | + if (!GetType(*output_defs[0], y_type, logger) || |
| 190 | + !GetType(*output_defs[1], y_scale_type, logger) || |
| 191 | + !GetType(*output_defs[2], y_zero_point_type, logger)) { |
| 192 | + return false; |
| 193 | + } |
| 194 | + |
| 195 | + // Only need to check the output data type of ops that produce the outputs of DynamicQuantizeLinear. |
| 196 | + // 1. QuantizeLinear -> y (uint8) |
| 197 | + // 2. Div -> y_scale (float32) (skip it as WebNN should support it by default) |
| 198 | + // 3. Cast -> y_zero_point (uint8) |
| 199 | + return IsDataTypeSupportedByWebNNOp(op_type, "quantizeLinear", y_type, wnn_limits, "output", "y", logger) && |
| 200 | + IsDataTypeSupportedByWebNNOp(op_type, "cast", y_zero_point_type, wnn_limits, "output", "y_zero_point", logger); |
| 201 | +} |
| 202 | + |
43 | 203 | void CreateDynamicQuantizeLinearOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations) { |
44 | | - op_registrations.builders.push_back(std::make_unique<DynamicQuantizaLinearOpBuilder>()); |
| 204 | + op_registrations.builders.push_back(std::make_unique<DynamicQuantizeLinearOpBuilder>()); |
45 | 205 | op_registrations.op_builder_map.emplace(op_type, op_registrations.builders.back().get()); |
46 | 206 | } |
47 | 207 |
|
|
0 commit comments