Skip to content

Commit 30115cf

Browse files
authored
upgrade dawn version to 4cb1f9be152a4fa6bb695c08cd707ab078a1e2fb (microsoft#24247)
### Description Bump version of Dawn to 4cb1f9be152a4fa6bb695c08cd707ab078a1e2fb. ### Changes to the patches to Dawn: Removed patches because they are already merged into upstream or resolved in a different way: - (public) CMake fix to support Emscripten v4.0.3+ - (private) Fix external ref count for "external" device in emwgpu C++ implementation - (private) Allow "external" buffer in emwgpu C++ implementation Keep unchanged patches: - (private) Remove hard-coded CMAKE_OSX_DEPLOYMENT_TARGET in Dawn's CMake files Rewritten patches: - (public) Fix emwgpu C++ implementation for buffer destroy ### Corresponding changes in ORT - Dawn API changes - follow changes to `wgpu::Limits` - remove the usage of `DAWN_EMSCRIPTEN_TOOLCHAIN` - use `wgpu::InstanceDescriptor` in `wgpu::Instance` creation in WASM since it is supported now.
1 parent 4eeefd7 commit 30115cf

File tree

5 files changed

+29
-159
lines changed

5 files changed

+29
-159
lines changed

cmake/deps.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,5 +57,5 @@ extensions;https://github.com/microsoft/onnxruntime-extensions/archive/c24b7bab0
5757
composable_kernel;https://github.com/ROCmSoftwarePlatform/composable_kernel/archive/204da9c522cebec5220bba52cd3542ebcaf99e7a.zip;1827348efd47831c13074245274d41b7cae8a557
5858
directx_headers;https://github.com/microsoft/DirectX-Headers/archive/refs/tags/v1.613.1.zip;47653509a3371eabb156360f42faf582f314bf2e
5959
cudnn_frontend;https://github.com/NVIDIA/cudnn-frontend/archive/refs/tags/v1.7.0.zip;d0753d8d5b39947ca0729d7773cb84653a129eb1
60-
dawn;https://github.com/google/dawn/archive/40a9fa79f76e6c76cca9e2fa69ea07f202f1d2e6.zip;e224563d5ab4a8e53a517b06f721242533bce722
60+
dawn;https://github.com/google/dawn/archive/4cb1f9be152a4fa6bb695c08cd707ab078a1e2fb.zip;de39336b7715f53c14eec61072293b85cc73b691
6161
kleidiai;https://github.com/ARM-software/kleidiai/archive/refs/tags/v1.4.0.tar.gz;22d3b57b54a61c194ab256ff11b0353a3b220244

cmake/external/onnxruntime_external_deps.cmake

Lines changed: 4 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -623,9 +623,7 @@ if (onnxruntime_USE_WEBGPU)
623623
set(DAWN_ENABLE_NULL OFF CACHE BOOL "" FORCE)
624624
set(DAWN_FETCH_DEPENDENCIES ON CACHE BOOL "" FORCE)
625625
set(DAWN_BUILD_TESTS OFF CACHE BOOL "" FORCE)
626-
if (CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
627-
set(DAWN_EMSCRIPTEN_TOOLCHAIN "${REPO_ROOT}/cmake/external/emsdk/upstream/emscripten" CACHE STRING "" FORCE)
628-
else()
626+
if (NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
629627
if (onnxruntime_BUILD_DAWN_MONOLITHIC_LIBRARY)
630628
set(DAWN_BUILD_MONOLITHIC_LIBRARY ON CACHE BOOL "" FORCE)
631629
set(DAWN_ENABLE_INSTALL ON CACHE BOOL "" FORCE)
@@ -713,27 +711,14 @@ if (onnxruntime_USE_WEBGPU)
713711
#
714712
# The dawn.patch contains the following changes:
715713
#
716-
# - (public) CMake fix to support Emscripten v4.0.3+
717-
# This change allows Dawn to find the file "gen_struct_info.py" in the correct location.
718-
# https://dawn-review.googlesource.com/c/dawn/+/225514
719-
#
720-
# - (public) Fix emwgpu C++ implementation for buffer destroy
714+
# - (private) Allow WGPUBufferImpl class to destroy the buffer in the destructor
721715
# In native implementation, wgpuBufferRelease will trigger the buffer destroy (if refcount decreased to 0). But
722-
# in emwgpu implementation, the buffer destroy won't happen. This change fixes the bug.
723-
# https://dawn-review.googlesource.com/c/dawn/+/226315
724-
#
725-
# - (private) Allow "external" buffer in emwgpu C++ implementation
726-
# This change allows WGPUBufferImpl to destroy the buffer when the refcount decreased to 0 only for non-external
727-
# buffer.
728-
# "external buffer" means the GPUBuffer instance created in JavaScript and imported to C++ by `importJsBuffer`.
716+
# in emwgpu implementation, the buffer destroy won't happen. This change adds a destructor to the buffer class
717+
# to destroy the buffer when the refcount is 0 for non-external buffers.
729718
#
730719
# - (private) Remove hard-coded CMAKE_OSX_DEPLOYMENT_TARGET in Dawn's CMake files
731720
# https://github.com/microsoft/onnxruntime/pull/23729
732721
#
733-
# - (private) Fix external ref count for "external" device in emwgpu C++ implementation
734-
# This change fixes the incorrect external ref count for class WGPUDeviceImpl when used with "external" device.
735-
# "external device" means the GPUDevice instance created in JavaScript and imported to C++ by `importJsDevice`.
736-
#
737722
#
738723
PATCH_COMMAND ${Patch_EXECUTABLE} --binary --ignore-whitespace -p1 < ${PROJECT_SOURCE_DIR}/patches/dawn/dawn.patch
739724
EXCLUDE_FROM_ALL

cmake/patches/dawn/dawn.patch

Lines changed: 9 additions & 118 deletions
Original file line numberDiff line numberDiff line change
@@ -11,137 +11,28 @@ index 50638e2456..efa42711e6 100644
1111
- set(CMAKE_OSX_DEPLOYMENT_TARGET "11.0" CACHE STRING "Minimum macOS version" FORCE)
1212
-endif ()
1313
\ No newline at end of file
14-
diff --git a/src/emdawnwebgpu/CMakeLists.txt b/src/emdawnwebgpu/CMakeLists.txt
15-
index 6e8ae37593..633af91eef 100644
16-
--- a/src/emdawnwebgpu/CMakeLists.txt
17-
+++ b/src/emdawnwebgpu/CMakeLists.txt
18-
@@ -77,9 +77,17 @@ if (${DAWN_ENABLE_EMSCRIPTEN})
19-
"${arg_UNPARSED_ARGUMENTS}")
20-
endif()
21-
22-
+ # since Emscripten 4.0.3, file gen_struct_info.py is moved to outside of directory maint.
23-
+ if (EXISTS "${DAWN_EMSCRIPTEN_TOOLCHAIN}/tools/gen_struct_info.py")
24-
+ set(EM_GEN_STRUCT_INFO_SCRIPT "${DAWN_EMSCRIPTEN_TOOLCHAIN}/tools/gen_struct_info.py")
25-
+ elseif (EXISTS "${DAWN_EMSCRIPTEN_TOOLCHAIN}/tools/maint/gen_struct_info.py")
26-
+ set(EM_GEN_STRUCT_INFO_SCRIPT "${DAWN_EMSCRIPTEN_TOOLCHAIN}/tools/maint/gen_struct_info.py")
27-
+ else()
28-
+ message(FATAL_ERROR "Dawn: Failed to locate file gen_struct_info.py from Emscripten.")
29-
+ endif()
30-
set(ARGS
31-
${Python3_EXECUTABLE}
32-
- "${DAWN_EMSCRIPTEN_TOOLCHAIN}/tools/maint/gen_struct_info.py"
33-
+ "${EM_GEN_STRUCT_INFO_SCRIPT}"
34-
-q
35-
"${EM_BUILD_GEN_DIR}/struct_info_webgpu.json"
36-
"-I=${EM_BUILD_GEN_DIR}/include"
37-
diff --git a/src/emdawnwebgpu/README.md b/src/emdawnwebgpu/README.md
38-
index efd6491cd6..8ebc5d28b6 100644
39-
--- a/src/emdawnwebgpu/README.md
40-
+++ b/src/emdawnwebgpu/README.md
41-
@@ -56,7 +56,7 @@ Set up the build directory using emcmake
42-
mkdir out/cmake-wasm
43-
cd out/cmake-wasm
44-
45-
-# Make sure the path is to the source checkout of Emscripten, not emsdk's release.
46-
+# If using Emscripten v4.0.2 or lower, make sure the path is to the source checkout of Emscripten, not emsdk's release.
47-
emcmake cmake -GNinja -DDAWN_EMSCRIPTEN_TOOLCHAIN="path/to/emscripten" ../..
48-
49-
ninja
5014
diff --git a/third_party/emdawnwebgpu/webgpu.cpp b/third_party/emdawnwebgpu/webgpu.cpp
51-
index f1c5a7d50e..16f2495712 100644
15+
index 5bfac41dcc..71a153daaa 100644
5216
--- a/third_party/emdawnwebgpu/webgpu.cpp
5317
+++ b/third_party/emdawnwebgpu/webgpu.cpp
54-
@@ -131,7 +131,6 @@ class RefCounted : NonMovable {
55-
bool Release() {
56-
if (mRefCount.fetch_sub(1u, std::memory_order_release) == 1u) {
57-
std::atomic_thread_fence(std::memory_order_acquire);
58-
- emwgpuDelete(this);
59-
return true;
60-
}
61-
return false;
62-
@@ -234,6 +233,7 @@ class Ref {
63-
static void Release(T value) {
64-
if (value != nullptr && value->RefCounted::Release()) {
65-
delete value;
66-
+ emwgpuDelete(value);
67-
}
68-
}
69-
70-
@@ -641,7 +641,8 @@ struct WGPUAdapterImpl final : public EventSource, public RefCounted {
71-
struct WGPUBufferImpl final : public EventSource,
72-
public RefCountedWithExternalCount {
73-
public:
74-
- WGPUBufferImpl(const EventSource* source, bool mappedAtCreation);
75-
+ WGPUBufferImpl(const EventSource* source, bool mappedAtCreation, bool isExternal);
18+
@@ -692,6 +692,7 @@ struct WGPUBufferImpl final : public EventSource,
19+
WGPUBufferImpl(const EventSource* source, bool mappedAtCreation);
20+
// Injection constructor used when we already have a backing Buffer.
21+
WGPUBufferImpl(const EventSource* source, WGPUBufferMapState mapState);
7622
+ ~WGPUBufferImpl();
7723

7824
void Destroy();
7925
const void* GetConstMappedRange(size_t offset, size_t size);
80-
@@ -671,6 +672,7 @@ struct WGPUBufferImpl final : public EventSource,
81-
};
82-
MapRequest mPendingMapRequest;
83-
WGPUBufferMapState mMapState;
84-
+ bool mIsExternal;
85-
};
86-
87-
struct WGPUQueueImpl final : public EventSource, public RefCounted {
88-
@@ -1164,11 +1166,15 @@ WGPUAdapter emwgpuCreateAdapter(const EventSource* source) {
89-
90-
WGPUBuffer emwgpuCreateBuffer(const EventSource* source,
91-
bool mappedAtCreation = false) {
92-
- return new WGPUBufferImpl(source, mappedAtCreation);
93-
+ return new WGPUBufferImpl(source, mappedAtCreation, true);
94-
}
95-
96-
WGPUDevice emwgpuCreateDevice(const EventSource* source, WGPUQueue queue) {
97-
- return new WGPUDeviceImpl(source, queue);
98-
+ // This function is only called from JS via `importJsDevice()`, which
99-
+ // needs to increment the external ref count to fix the behavior.
100-
+ WGPUDeviceImpl* device = new WGPUDeviceImpl(source, queue);
101-
+ device->AddExternalRef();
102-
+ return device;
103-
}
104-
105-
WGPUQueue emwgpuCreateQueue(const EventSource* source) {
106-
@@ -1275,15 +1281,22 @@ WGPUAdapterImpl::WGPUAdapterImpl(const EventSource* source)
107-
// WGPUBuffer implementations.
108-
// ----------------------------------------------------------------------------
109-
110-
-WGPUBufferImpl::WGPUBufferImpl(const EventSource* source, bool mappedAtCreation)
111-
+WGPUBufferImpl::WGPUBufferImpl(const EventSource* source, bool mappedAtCreation, bool isExternal)
112-
: EventSource(source),
113-
mMapState(mappedAtCreation ? WGPUBufferMapState_Mapped
114-
- : WGPUBufferMapState_Unmapped) {
115-
+ : WGPUBufferMapState_Unmapped),
116-
+ mIsExternal(isExternal) {
117-
if (mappedAtCreation) {
118-
mPendingMapRequest = {kNullFutureId, WGPUMapMode_Write};
119-
}
120-
}
26+
@@ -1361,6 +1362,12 @@ WGPUBufferImpl::WGPUBufferImpl(const EventSource* source,
27+
RefCountedWithExternalCount(kImportedFromJS),
28+
mMapState(mapState) {}
12129

12230
+WGPUBufferImpl::~WGPUBufferImpl() {
123-
+ if (!mIsExternal) {
31+
+ if (!IsImported()) {
12432
+ Destroy();
12533
+ }
12634
+}
12735
+
12836
void WGPUBufferImpl::Destroy() {
12937
emwgpuBufferDestroy(this);
13038
AbortPendingMap("Buffer was destroyed before mapping was resolved.");
131-
@@ -1504,6 +1517,7 @@ WGPUFuture WGPUShaderModuleImpl::GetCompilationInfo(
132-
void wgpu##Name##Release(WGPU##Name o) { \
133-
if (o->Release()) { \
134-
delete o; \
135-
+ emwgpuDelete(o); \
136-
} \
137-
}
138-
WGPU_OBJECTS(DEFINE_WGPU_DEFAULT_ADDREF_RELEASE)
139-
@@ -1638,7 +1652,7 @@ void wgpuBufferUnmap(WGPUBuffer buffer) {
140-
141-
WGPUBuffer wgpuDeviceCreateBuffer(WGPUDevice device,
142-
const WGPUBufferDescriptor* descriptor) {
143-
- WGPUBuffer buffer = new WGPUBufferImpl(device, descriptor->mappedAtCreation);
144-
+ WGPUBuffer buffer = new WGPUBufferImpl(device, descriptor->mappedAtCreation, false);
145-
emwgpuDeviceCreateBuffer(device, descriptor, buffer);
146-
return buffer;
147-
}

onnxruntime/core/providers/webgpu/webgpu_context.cc

Lines changed: 14 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ void WebGpuContext::Initialize(const WebGpuBufferCacheConfig& buffer_cache_confi
110110
device_desc.requiredFeatures = required_features.data();
111111
device_desc.requiredFeatureCount = required_features.size();
112112
}
113-
wgpu::RequiredLimits required_limits = GetRequiredLimits(adapter);
113+
wgpu::Limits required_limits = GetRequiredLimits(adapter);
114114
device_desc.requiredLimits = &required_limits;
115115

116116
// TODO: revise temporary error handling
@@ -139,9 +139,7 @@ void WebGpuContext::Initialize(const WebGpuBufferCacheConfig& buffer_cache_confi
139139
// cache adapter info
140140
ORT_ENFORCE(Device().GetAdapterInfo(&adapter_info_));
141141
// cache device limits
142-
wgpu::SupportedLimits device_supported_limits;
143-
ORT_ENFORCE(Device().GetLimits(&device_supported_limits));
144-
device_limits_ = device_supported_limits.limits;
142+
ORT_ENFORCE(Device().GetLimits(&device_limits_));
145143

146144
#if !defined(__wasm__)
147145
supports_buffer_map_extended_usages_ = device_.HasFeature(wgpu::FeatureName::BufferMapExtendedUsages);
@@ -508,20 +506,20 @@ std::vector<wgpu::FeatureName> WebGpuContext::GetAvailableRequiredFeatures(const
508506
return required_features;
509507
}
510508

511-
wgpu::RequiredLimits WebGpuContext::GetRequiredLimits(const wgpu::Adapter& adapter) const {
512-
wgpu::RequiredLimits required_limits{};
513-
wgpu::SupportedLimits adapter_limits;
509+
wgpu::Limits WebGpuContext::GetRequiredLimits(const wgpu::Adapter& adapter) const {
510+
wgpu::Limits required_limits{};
511+
wgpu::Limits adapter_limits;
514512
ORT_ENFORCE(adapter.GetLimits(&adapter_limits));
515513

516-
required_limits.limits.maxBindGroups = adapter_limits.limits.maxBindGroups;
517-
required_limits.limits.maxComputeWorkgroupStorageSize = adapter_limits.limits.maxComputeWorkgroupStorageSize;
518-
required_limits.limits.maxComputeWorkgroupsPerDimension = adapter_limits.limits.maxComputeWorkgroupsPerDimension;
519-
required_limits.limits.maxStorageBufferBindingSize = adapter_limits.limits.maxStorageBufferBindingSize;
520-
required_limits.limits.maxBufferSize = adapter_limits.limits.maxBufferSize;
521-
required_limits.limits.maxComputeInvocationsPerWorkgroup = adapter_limits.limits.maxComputeInvocationsPerWorkgroup;
522-
required_limits.limits.maxComputeWorkgroupSizeX = adapter_limits.limits.maxComputeWorkgroupSizeX;
523-
required_limits.limits.maxComputeWorkgroupSizeY = adapter_limits.limits.maxComputeWorkgroupSizeY;
524-
required_limits.limits.maxComputeWorkgroupSizeZ = adapter_limits.limits.maxComputeWorkgroupSizeZ;
514+
required_limits.maxBindGroups = adapter_limits.maxBindGroups;
515+
required_limits.maxComputeWorkgroupStorageSize = adapter_limits.maxComputeWorkgroupStorageSize;
516+
required_limits.maxComputeWorkgroupsPerDimension = adapter_limits.maxComputeWorkgroupsPerDimension;
517+
required_limits.maxStorageBufferBindingSize = adapter_limits.maxStorageBufferBindingSize;
518+
required_limits.maxBufferSize = adapter_limits.maxBufferSize;
519+
required_limits.maxComputeInvocationsPerWorkgroup = adapter_limits.maxComputeInvocationsPerWorkgroup;
520+
required_limits.maxComputeWorkgroupSizeX = adapter_limits.maxComputeWorkgroupSizeX;
521+
required_limits.maxComputeWorkgroupSizeY = adapter_limits.maxComputeWorkgroupSizeY;
522+
required_limits.maxComputeWorkgroupSizeZ = adapter_limits.maxComputeWorkgroupSizeZ;
525523

526524
return required_limits;
527525
}
@@ -740,13 +738,9 @@ WebGpuContext& WebGpuContextFactory::CreateContext(const WebGpuContextConfig& co
740738
#endif
741739

742740
// Step.2 - Create wgpu::Instance
743-
#if !defined(__wasm__)
744741
wgpu::InstanceDescriptor instance_desc{};
745742
instance_desc.capabilities.timedWaitAnyEnable = true;
746743
default_instance_ = wgpu::CreateInstance(&instance_desc);
747-
#else
748-
default_instance_ = wgpu::CreateInstance(nullptr);
749-
#endif
750744

751745
ORT_ENFORCE(default_instance_ != nullptr, "Failed to create wgpu::Instance.");
752746
});

onnxruntime/core/providers/webgpu/webgpu_context.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -161,7 +161,7 @@ class WebGpuContext final {
161161
std::vector<const char*> GetEnabledDeviceToggles() const;
162162
std::vector<const char*> GetDisabledDeviceToggles() const;
163163
std::vector<wgpu::FeatureName> GetAvailableRequiredFeatures(const wgpu::Adapter& adapter) const;
164-
wgpu::RequiredLimits GetRequiredLimits(const wgpu::Adapter& adapter) const;
164+
wgpu::Limits GetRequiredLimits(const wgpu::Adapter& adapter) const;
165165
void WriteTimestamp(uint32_t query_index);
166166

167167
struct PendingKernelInfo {

0 commit comments

Comments
 (0)