Skip to content

Commit ffa8bc3

Browse files
qti-kromeroGitHub Enterprise
authored andcommitted
[MERGE-UPSTREAM] Automated upstream merge for 20251117
* From PR
2 parents fd4e7f1 + 67da8aa commit ffa8bc3

File tree

73 files changed

+2100
-347
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

73 files changed

+2100
-347
lines changed

.github/workflows/windows_x86.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ jobs:
6161
working-directory: ${{ github.workspace }}
6262

6363
- name: Use .NET 8.x
64-
uses: actions/setup-dotnet@v5
64+
uses: actions/setup-dotnet@v3
6565
with:
6666
dotnet-version: '8.x'
6767
env:

cmake/CMakeLists.txt

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1456,7 +1456,12 @@ if (onnxruntime_USE_CUDA)
14561456
message(STATUS "CUDA Toolkit version is greater or equal than 12.8, enable -DENABLE_FP4 flag")
14571457
endif()
14581458

1459-
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xfatbin=-compress-all")
1459+
if(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL "13.0")
1460+
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xfatbin=-compress-all -compress-mode=size")
1461+
else()
1462+
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xfatbin=-compress-all")
1463+
endif()
1464+
14601465
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-relaxed-constexpr")
14611466
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --Werror default-stream-launch")
14621467

cmake/deps.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,5 +56,5 @@ extensions;https://github.com/microsoft/onnxruntime-extensions/archive/c24b7bab0
5656
directx_headers;https://github.com/microsoft/DirectX-Headers/archive/refs/tags/v1.613.1.zip;47653509a3371eabb156360f42faf582f314bf2e
5757
cudnn_frontend;https://github.com/NVIDIA/cudnn-frontend/archive/refs/tags/v1.12.0.zip;7e733cfdc410d777b76122d64232499205589a96
5858
dawn;https://github.com/google/dawn/archive/13c1635a14574ebb7116b56a69f5519301417fda.zip;0aadd28fc385cf7d657d5fc70a352372d2d3c76a
59-
kleidiai;https://github.com/ARM-software/kleidiai/archive/refs/tags/v1.10.0.tar.gz;11b62149cb2514b3b9069cc435c3aa7a4e82b97a
59+
kleidiai;https://github.com/ARM-software/kleidiai/archive/refs/tags/v1.15.0.tar.gz;62ccd24ab60bcef68766440fb42d79071ac2a5d2
6060
duktape;https://github.com/svaarala/duktape/releases/download/v2.7.0/duktape-2.7.0.tar.xz;8200c8e417dbab7adcc12c4dbdef7651cfc55794

js/common/lib/inference-session.ts

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -258,6 +258,20 @@ export declare namespace InferenceSession {
258258
*/
259259
forceCpuNodeNames?: readonly string[];
260260

261+
/**
262+
* Specify the validation mode for WebGPU execution provider.
263+
* - 'disabled': Disable all validation.
264+
* When used in Node.js, disable validation may cause process crash if WebGPU errors occur. Be cautious when using
265+
* this mode.
266+
* When used in web, this mode is equivalent to 'wgpuOnly'.
267+
* - 'wgpuOnly': Perform WebGPU internal validation only.
268+
* - 'basic': Perform basic validation including WebGPU internal validation. This is the default mode.
269+
* - 'full': Perform full validation. This mode may have performance impact. Use it for debugging purpose.
270+
*
271+
* @default 'basic'
272+
*/
273+
validationMode?: 'disabled' | 'wgpuOnly' | 'basic' | 'full';
274+
261275
/**
262276
* Specify an optional WebGPU device to be used by the WebGPU execution provider.
263277
*/

js/package-lock.json

Lines changed: 6 additions & 6 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

js/web/lib/wasm/session-options.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,11 @@ const setExecutionProviders = async (
129129

130130
appendEpOption(epOptions, 'forceCpuNodeNames', names.join('\n'), allocs);
131131
}
132+
133+
// set validation mode
134+
if (webgpuOptions.validationMode) {
135+
appendEpOption(epOptions, 'validationMode', webgpuOptions.validationMode, allocs);
136+
}
132137
}
133138

134139
const info = getInstance().webgpuRegisterDevice!(customDevice);

onnxruntime/contrib_ops/cpu/quantization/dynamic_quantize_matmul.cc

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
// Copyright (c) Microsoft Corporation. All rights reserved.
22
// Licensed under the MIT License.
33

4-
#include "core/common/cpuid_info.h" // for CPUIDInfo::GetCPUIDInfo().HasArm_SME()
4+
#include "core/common/cpuid_info.h" // for CPUIDInfo::GetCPUIDInfo().HasArm_SME2()
55
#include "core/common/narrow.h"
66
#include "core/common/safeint.h"
77
#include "core/mlas/inc/mlas.h"
@@ -213,9 +213,9 @@ class DynamicQuantizeMatMul final : public MatMulIntegerToFloatBase {
213213
}
214214
}
215215

216-
// Currently, MlasDynamicQGemmBatch() and associated functions require SME or else they are no-ops.
216+
// Currently, MlasDynamicQGemmBatch() and associated functions require SME2 or else they are no-ops.
217217
// We check that here too before attempting to use them.
218-
if (!CPUIDInfo::GetCPUIDInfo().HasArm_SME()) {
218+
if (!CPUIDInfo::GetCPUIDInfo().HasArm_SME2()) {
219219
can_use_dynamic_quant_mlas_ = false;
220220
}
221221

onnxruntime/contrib_ops/webgpu/bert/attention.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -284,7 +284,7 @@ Status InPlaceSoftmaxProgram::GenerateShaderCode(ShaderHelper& shader) const {
284284

285285
if (has_head_sink_) {
286286
// Handle head sink
287-
shader.MainFunctionBody() << "let sink_value: f32 = head_sink[head_idx];\n"
287+
shader.MainFunctionBody() << "let sink_value: f32 = f32(head_sink[head_idx]);\n"
288288
<< "var max_value = sink_value;\n";
289289
} else if (use_smooth_softmax_) {
290290
shader.MainFunctionBody() << "var max_value: f32 = 0.0;\n";
Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
// Copyright (c) Microsoft Corporation. All rights reserved.
2+
// Licensed under the MIT License.
3+
4+
#include "core/providers/webgpu/shader_helper.h"
5+
#include "core/providers/webgpu/webgpu_supported_types.h"
6+
#include "core/providers/webgpu/math/unary_elementwise_ops.h"
7+
#include "contrib_ops/webgpu/bert/bias_gelu.h"
8+
#include "contrib_ops/webgpu/webgpu_contrib_kernels.h"
9+
10+
namespace onnxruntime {
11+
namespace contrib {
12+
namespace webgpu {
13+
14+
ONNX_OPERATOR_KERNEL_EX(
15+
BiasGelu,
16+
kMSDomain,
17+
1,
18+
kWebGpuExecutionProvider,
19+
(*KernelDefBuilder::Create())
20+
.TypeConstraint("T", WebGpuSupportedFloatTypes()),
21+
BiasGelu);
22+
23+
Status BiasGeluProgram::GenerateShaderCode(ShaderHelper& shader) const {
24+
const auto& x = shader.AddInput("x", ShaderUsage::UseUniform | ShaderUsage::UseValueTypeAlias);
25+
const auto& bias = shader.AddInput("bias", ShaderUsage::UseUniform | ShaderUsage::UseShapeAndStride);
26+
const auto& y = shader.AddOutput("y", ShaderUsage::UseUniform);
27+
28+
shader.AdditionalImplementation() << onnxruntime::webgpu::ErfImpl;
29+
shader.MainFunctionBody() << shader.GuardAgainstOutOfBoundsWorkgroupSizes("uniforms.vec_size")
30+
<< " var a = " << x.GetByOffset("global_idx") << ";\n";
31+
32+
// Add bias to input
33+
if (bias_components_ == 1) {
34+
shader.MainFunctionBody() << " let bias_offset = global_idx * 4;\n"
35+
" a += x_value_t("
36+
<< bias.GetByOffset("bias_offset % uniforms.bias_shape") << ", "
37+
<< bias.GetByOffset("(bias_offset + 1) % uniforms.bias_shape") << ", "
38+
<< bias.GetByOffset("(bias_offset + 2) % uniforms.bias_shape") << ", "
39+
<< bias.GetByOffset("(bias_offset + 3) % uniforms.bias_shape") << ");\n";
40+
} else {
41+
shader.MainFunctionBody() << " a += " << bias.GetByOffset("global_idx % uniforms.bias_shape") + ";\n";
42+
}
43+
44+
// Apply GELU activation: 0.5 * a * (1.0 + erf(a * 0.7071067811865475))
45+
shader.MainFunctionBody() << y.SetByOffset("global_idx", onnxruntime::webgpu::GeluExpr);
46+
47+
return Status::OK();
48+
}
49+
50+
Status BiasGelu::ComputeInternal(onnxruntime::webgpu::ComputeContext& context) const {
51+
const auto* input = context.Input(0);
52+
const auto* bias = context.Input(1);
53+
auto* output = context.Output(0, input->Shape());
54+
55+
uint32_t data_size = onnxruntime::narrow<uint32_t>(output->Shape().Size());
56+
if (data_size == 0) {
57+
return Status::OK();
58+
}
59+
60+
const auto& input_shape = input->Shape();
61+
const auto& bias_shape = bias->Shape();
62+
63+
// Validate inputs
64+
if (input_shape.NumDimensions() < 1 || bias_shape.NumDimensions() != 1) {
65+
return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
66+
"BiasGelu: input must have at least 1 dimension and bias must be 1-dimensional.");
67+
}
68+
69+
if (input_shape.GetDims().back() != bias_shape.GetDims().back()) {
70+
return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
71+
"BiasGelu: bias must match the last dimension of input.");
72+
}
73+
74+
const auto vec_size = (data_size + 3) / 4;
75+
uint32_t bias_size = onnxruntime::narrow<uint32_t>(bias->Shape().Size());
76+
int bias_components = 1;
77+
78+
if (bias_size % 4 == 0) {
79+
bias_components = 4;
80+
bias_size = bias_size / 4;
81+
}
82+
83+
BiasGeluProgram program{bias_components};
84+
program.AddInput({input, ProgramTensorMetadataDependency::Type, {vec_size}, 4})
85+
.AddInput({bias, ProgramTensorMetadataDependency::TypeAndRank, {bias_size}, bias_components})
86+
.AddOutput({output, ProgramTensorMetadataDependency::None, {vec_size}, 4})
87+
.SetDispatchGroupSize((vec_size + WORKGROUP_SIZE - 1) / WORKGROUP_SIZE)
88+
.AddUniformVariable({vec_size});
89+
90+
return context.RunProgram(program);
91+
}
92+
93+
} // namespace webgpu
94+
} // namespace contrib
95+
} // namespace onnxruntime
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
// Copyright (c) Microsoft Corporation. All rights reserved.
2+
// Licensed under the MIT License.
3+
4+
#pragma once
5+
6+
#include "core/providers/webgpu/program.h"
7+
#include "core/providers/webgpu/webgpu_kernel.h"
8+
9+
namespace onnxruntime {
10+
namespace contrib {
11+
namespace webgpu {
12+
13+
using namespace onnxruntime::webgpu;
14+
using onnxruntime::webgpu::ComputeContext;
15+
16+
class BiasGeluProgram final : public Program<BiasGeluProgram> {
17+
public:
18+
BiasGeluProgram(int bias_components) : Program{"BiasGelu"}, bias_components_{bias_components} {
19+
}
20+
21+
Status GenerateShaderCode(ShaderHelper& sh) const override;
22+
23+
WEBGPU_PROGRAM_DEFINE_UNIFORM_VARIABLES({"vec_size", ProgramUniformVariableDataType::Uint32});
24+
25+
private:
26+
int bias_components_;
27+
};
28+
29+
class BiasGelu final : public WebGpuKernel {
30+
public:
31+
BiasGelu(const OpKernelInfo& info) : WebGpuKernel(info) {}
32+
33+
Status ComputeInternal(ComputeContext& context) const override;
34+
};
35+
36+
} // namespace webgpu
37+
} // namespace contrib
38+
} // namespace onnxruntime

0 commit comments

Comments
 (0)