|
| 1 | +// Copyright (c) Microsoft Corporation. All rights reserved. |
| 2 | +// Licensed under the MIT License. |
| 3 | + |
| 4 | +#include <vector> |
| 5 | + |
| 6 | +#include "core/util/math.h" |
| 7 | +#include "core/providers/webgpu/quantization/quantize_linear.h" |
| 8 | +#include "core/providers/webgpu/shader_helper.h" |
| 9 | +#include "core/providers/webgpu/webgpu_supported_types.h" |
| 10 | +#include "core/providers/webgpu/webgpu_utils.h" |
| 11 | + |
| 12 | +namespace onnxruntime { |
| 13 | +namespace webgpu { |
| 14 | + |
| 15 | +Status DequantizeLinearProgram::GenerateShaderCode(ShaderHelper& shader) const { |
| 16 | + const auto& x = shader.AddInput("input", ShaderUsage::UseUniform | ShaderUsage::UseIndicesTypeAlias | ShaderUsage::UseElementTypeAlias); |
| 17 | + const auto& scale = shader.AddInput("scale", ShaderUsage::UseUniform | ShaderUsage::UseIndicesTypeAlias | ShaderUsage::UseValueTypeAlias); |
| 18 | + const auto& output = shader.AddOutput("output", ShaderUsage::UseUniform | ShaderUsage::UseShapeAndStride | ShaderUsage::UseValueTypeAlias); |
| 19 | + |
| 20 | + shader.MainFunctionBody() |
| 21 | + << shader.GuardAgainstOutOfBoundsWorkgroupSizes("uniforms.output_size") |
| 22 | + << "let output_indices = " << output.OffsetToIndices("global_idx") << ";\n"; |
| 23 | + |
| 24 | + // Get x input |
| 25 | + if (packed_) { |
| 26 | + std::string unpack = (signed_) ? "unpack4xI8(x)" : "unpack4xU8(x)"; |
| 27 | + if (output.NumComponents() == 1) { |
| 28 | + shader.MainFunctionBody() |
| 29 | + << "let x = " << x.GetByOffset("global_idx / 4") << ";\n" |
| 30 | + << "let x_vec = " << unpack << ";\n" |
| 31 | + << "let x_value = x_vec[global_idx % 4];\n"; |
| 32 | + } else { |
| 33 | + shader.MainFunctionBody() |
| 34 | + << "let x = " << x.GetByOffset("global_idx") << ";\n" |
| 35 | + << "let x_vec = " << unpack << ";\n" |
| 36 | + << "let x_value = x_vec;\n"; |
| 37 | + } |
| 38 | + } else { |
| 39 | + shader.MainFunctionBody() |
| 40 | + << "let x_value = " << x.GetByOffset("global_idx") << ";\n"; |
| 41 | + } |
| 42 | + |
| 43 | + // Get scaler |
| 44 | + if (per_layer_) { |
| 45 | + // scale input is a scalar () |
| 46 | + shader.MainFunctionBody() |
| 47 | + << "let scale_value = " << scale.GetByOffset("0") << ";\n"; |
| 48 | + } else if (per_axis_) { |
| 49 | + shader.MainFunctionBody() |
| 50 | + << "let scale_index = " << output.IndicesGet("output_indices", "uniforms.axis") << ";\n" |
| 51 | + << "let scale_value = " << scale.GetByOffset("scale_index") << ";\n"; |
| 52 | + } else { |
| 53 | + // Block quantization. Scale input rank is same as input/output rank. |
| 54 | + shader.MainFunctionBody() |
| 55 | + << "var scale_indices: scale_indices_t = output_indices;\n" |
| 56 | + << "let index = " << scale.IndicesGet("scale_indices", "uniforms.axis") << "/ uniforms.block_size;\n" |
| 57 | + << scale.IndicesSet("scale_indices", "uniforms.axis", "index") << ";\n" |
| 58 | + << "let scale_value = " << scale.GetByIndices("scale_indices") << ";\n"; |
| 59 | + } |
| 60 | + |
| 61 | + // Get zero-point |
| 62 | + if (has_zeropoint_) { |
| 63 | + const auto& zero_point = shader.AddInput("zero_point", ShaderUsage::UseUniform | ShaderUsage::UseIndicesTypeAlias); |
| 64 | + |
| 65 | + std::string unpack = (signed_) ? "unpack4xI8(zero_point_input)" : "unpack4xU8(zero_point_input)"; |
| 66 | + if (per_layer_) { |
| 67 | + // zero-point input is a scalar |
| 68 | + if (packed_) { |
| 69 | + shader.MainFunctionBody() |
| 70 | + << "let zero_point_input = " << zero_point.GetByOffset("0") << ";\n" |
| 71 | + << "let zero_point_vec = " << unpack << ";\n" |
| 72 | + << "let zero_point_value = zero_point_vec[0];\n"; |
| 73 | + } else { |
| 74 | + shader.MainFunctionBody() |
| 75 | + << "let zero_point_value = " << zero_point.GetByOffset("0") << ";\n"; |
| 76 | + } |
| 77 | + } else if (per_axis_) { |
| 78 | + // zero-point input is a 1D tensor |
| 79 | + if (packed_) { |
| 80 | + shader.MainFunctionBody() |
| 81 | + << "let zero_point_index = " << output.IndicesGet("output_indices", "uniforms.axis") << ";\n" |
| 82 | + << "let zero_point_input = " << zero_point.GetByOffset("zero_point_index / 4") << ";\n" |
| 83 | + << "let zero_point_vec = " << unpack << ";\n" |
| 84 | + << "let zero_point_value = zero_point_vec[zero_point_index % 4];\n"; |
| 85 | + } else { |
| 86 | + shader.MainFunctionBody() |
| 87 | + << "let zero_point_index = " << output.IndicesGet("output_indices", "uniforms.axis") << ";\n" |
| 88 | + << "let zero_point_value = " << zero_point.GetByOffset("zero_point_index") << ";\n"; |
| 89 | + } |
| 90 | + } else { |
| 91 | + // BlockedQuantization. The zero-point input shape is same as the input shape except along axis. |
| 92 | + if (packed_) { |
| 93 | + shader.MainFunctionBody() |
| 94 | + << "let zero_point_offset = " << scale.GetByIndices("scale_indices") << ";\n" |
| 95 | + << "let zero_point_input = " << zero_point.GetByOffset("zero_point_offset / 4") << ";\n" |
| 96 | + << "let zero_point_vec = " << unpack << ";\n" |
| 97 | + << "let zero_point_value = zero_point_vec[zero_point_offset % 4];\n"; |
| 98 | + } else { |
| 99 | + shader.MainFunctionBody() |
| 100 | + << "let zero_point_value = " << zero_point.GetByIndices("scale_indices") << ";\n"; |
| 101 | + } |
| 102 | + } |
| 103 | + } else { |
| 104 | + shader.MainFunctionBody() |
| 105 | + << "let zero_point_value = input_element_t(0);\n"; |
| 106 | + } |
| 107 | + |
| 108 | + // compute and write output |
| 109 | + shader.MainFunctionBody() |
| 110 | + << output.SetByOffset("global_idx", "(output_value_t(x_value) - scale_value_t(zero_point_value)) * scale_value"); |
| 111 | + |
| 112 | + return Status::OK(); |
| 113 | +} |
| 114 | + |
| 115 | +Status DequantizeLinear::ComputeInternal(ComputeContext& context) const { |
| 116 | + const auto* x = context.Input(0); |
| 117 | + const auto* x_scale = context.Input(1); |
| 118 | + const auto* x_zeropoint = context.Input(2); |
| 119 | + const auto x_shape = x->Shape(); |
| 120 | + int64_t x_size = x_shape.Size(); |
| 121 | + auto* output_tensor = context.Output(0, x_shape); |
| 122 | + int64_t x_scale_rank = x_scale->Shape().NumDimensions(); |
| 123 | + |
| 124 | + bool packed = x->GetElementType() == ONNX_TENSOR_ELEMENT_DATA_TYPE_INT8 || x->GetElementType() == ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT8; |
| 125 | + bool is_signed = x->GetElementType() == ONNX_TENSOR_ELEMENT_DATA_TYPE_INT8; |
| 126 | + int64_t axis = (axis_ >= 0) ? axis_ : axis_ + x_shape.NumDimensions(); |
| 127 | + |
| 128 | + int max_components = GetMaxComponents(x_size); |
| 129 | + if (max_components != 4) { |
| 130 | + return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "DequantizeLinear: components must be 4, but got ", max_components); |
| 131 | + } |
| 132 | + |
| 133 | + // scaler - single scaler for all elements |
| 134 | + bool per_layer = x_scale_rank == 0 || (x_scale_rank == 1 && x_scale->Shape()[0] == 1); |
| 135 | + |
| 136 | + // 1D tensor - 1 scaler for per axis |
| 137 | + bool per_axis = per_layer == false && x_scale_rank == 1; |
| 138 | + |
| 139 | + bool use_components = per_layer && (!packed || max_components == 4); |
| 140 | + int components = use_components ? max_components : 1; |
| 141 | + int input_component = use_components && !packed ? max_components : 1; |
| 142 | + |
| 143 | + DequantizeLinearProgram program{packed, is_signed, per_layer, per_axis, x_zeropoint != nullptr}; |
| 144 | + |
| 145 | + program |
| 146 | + .AddInputs({{x, ProgramTensorMetadataDependency::TypeAndRank, input_component}}) |
| 147 | + .AddInputs({{x_scale, ProgramTensorMetadataDependency::TypeAndRank}}) |
| 148 | + .AddOutput({output_tensor, ProgramTensorMetadataDependency::None, components}) |
| 149 | + .SetDispatchGroupSize((x_size / components + WORKGROUP_SIZE - 1) / WORKGROUP_SIZE) |
| 150 | + .AddUniformVariables({{static_cast<uint32_t>(axis)}}) |
| 151 | + .AddUniformVariables({{static_cast<uint32_t>(block_size_)}}) |
| 152 | + .AddUniformVariables({{static_cast<uint32_t>(x_size / components)}}) |
| 153 | + .CacheHint(std::to_string(axis), std::to_string(is_signed), std::to_string(per_layer), std::to_string(per_axis), std::to_string(block_size_)); |
| 154 | + |
| 155 | + if (x_zeropoint != nullptr) { |
| 156 | + program.AddInputs({{x_zeropoint, ProgramTensorMetadataDependency::TypeAndRank}}); |
| 157 | + } |
| 158 | + |
| 159 | + return context.RunProgram(program); |
| 160 | +} |
| 161 | + |
| 162 | +namespace { |
| 163 | +const std::vector<MLDataType>& DequantizeLinearConstraints() { |
| 164 | + static std::vector<MLDataType> types{ |
| 165 | + DataTypeImpl::GetTensorType<int8_t>(), |
| 166 | + DataTypeImpl::GetTensorType<uint8_t>(), |
| 167 | + DataTypeImpl::GetTensorType<int32_t>()}; |
| 168 | + return types; |
| 169 | +} |
| 170 | +} // namespace |
| 171 | + |
| 172 | +ONNX_OPERATOR_VERSIONED_KERNEL_EX( |
| 173 | + DequantizeLinear, |
| 174 | + kOnnxDomain, |
| 175 | + 10, 12, |
| 176 | + kWebGpuExecutionProvider, |
| 177 | + (*KernelDefBuilder::Create()) |
| 178 | + .TypeConstraint("T", DequantizeLinearConstraints()), |
| 179 | + DequantizeLinear); |
| 180 | + |
| 181 | +ONNX_OPERATOR_VERSIONED_KERNEL_EX( |
| 182 | + DequantizeLinear, |
| 183 | + kOnnxDomain, |
| 184 | + 13, 18, |
| 185 | + kWebGpuExecutionProvider, |
| 186 | + (*KernelDefBuilder::Create()) |
| 187 | + .TypeConstraint("T", DequantizeLinearConstraints()), |
| 188 | + DequantizeLinear); |
| 189 | + |
| 190 | +ONNX_OPERATOR_VERSIONED_KERNEL_EX( |
| 191 | + DequantizeLinear, |
| 192 | + kOnnxDomain, |
| 193 | + 19, 20, |
| 194 | + kWebGpuExecutionProvider, |
| 195 | + (*KernelDefBuilder::Create()) |
| 196 | + .TypeConstraint("T1", DequantizeLinearConstraints()) |
| 197 | + .TypeConstraint("T2", WebGpuSupportedFloatTypes()), |
| 198 | + DequantizeLinear); |
| 199 | + |
| 200 | +ONNX_OPERATOR_VERSIONED_KERNEL_EX( |
| 201 | + DequantizeLinear, |
| 202 | + kOnnxDomain, |
| 203 | + 21, 22, |
| 204 | + kWebGpuExecutionProvider, |
| 205 | + (*KernelDefBuilder::Create()) |
| 206 | + .TypeConstraint("T1", DequantizeLinearConstraints()) |
| 207 | + .TypeConstraint("T2", WebGpuSupportedFloatTypes()), |
| 208 | + DequantizeLinear); |
| 209 | + |
| 210 | +ONNX_OPERATOR_KERNEL_EX( |
| 211 | + DequantizeLinear, |
| 212 | + kOnnxDomain, |
| 213 | + 23, |
| 214 | + kWebGpuExecutionProvider, |
| 215 | + (*KernelDefBuilder::Create()) |
| 216 | + .TypeConstraint("T1", DequantizeLinearConstraints()) |
| 217 | + .TypeConstraint("T2", WebGpuSupportedFloatTypes()), |
| 218 | + DequantizeLinear); |
| 219 | + |
| 220 | +} // namespace webgpu |
| 221 | +} // namespace onnxruntime |
0 commit comments