Skip to content

Commit 40dd1f0

Browse files
committed
Fix formatting and add github action workflows for vulkan and metal (m-series) webgpu backends
1 parent e0d8a71 commit 40dd1f0

File tree

6 files changed

+163
-27
lines changed

6 files changed

+163
-27
lines changed

.github/workflows/build.yml

Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,70 @@ jobs:
135135
cd build
136136
ctest -L main --verbose --timeout 900
137137
138+
macOS-latest-cmake-arm64-webgpu:
139+
runs-on: macos-14
140+
141+
steps:
142+
- name: Clone
143+
id: checkout
144+
uses: actions/checkout@v4
145+
146+
- name: ccache
147+
uses: hendrikmuhs/[email protected]
148+
with:
149+
key: macOS-latest-cmake-arm64-webgpu
150+
evict-old-files: 1d
151+
152+
- name: Dependencies
153+
id: depends
154+
continue-on-error: true
155+
run: |
156+
brew update
157+
brew install curl
158+
159+
- name: Dawn Dependency
160+
id: dawn-depends
161+
run: |
162+
ARTIFACTS_JSON=$(curl -s -L \
163+
-H "Accept: application/vnd.github+json" \
164+
-H "Authorization: Bearer ${{ secrets.GITHUB_TOKEN }}" \
165+
-H "X-GitHub-Api-Version: 2022-11-28" \
166+
"https://api.github.com/repos/google/dawn/actions/artifacts")
167+
echo "Finding latest macos-latest-Release artifact..."
168+
DOWNLOAD_URL=$(echo "$ARTIFACTS_JSON" | jq -r '.artifacts
169+
| sort_by(.created_at)
170+
| reverse
171+
| map(select(.name | test("macos-latest-Release$")))
172+
| .[0].archive_download_url')
173+
if [ "$DOWNLOAD_URL" = "null" ] || [ -z "$DOWNLOAD_URL" ]; then
174+
echo "No suitable Dawn artifact found!"
175+
exit 1
176+
fi
177+
echo "Downloading from: $DOWNLOAD_URL"
178+
curl -L \
179+
-H "Accept: application/vnd.github+json" \
180+
-H "Authorization: Bearer ${{ secrets.GITHUB_TOKEN }}" \
181+
-o artifact.zip "$DOWNLOAD_URL"
182+
unzip artifact.zip
183+
mkdir dawn
184+
tar_file=$(find . -name '*.tar.gz' | head -n 1)
185+
echo "Extracting: $tar_file"
186+
tar -xvf "$tar_file" -C dawn --strip-components=1
187+
188+
- name: Build
189+
id: cmake_build
190+
run: |
191+
sysctl -a
192+
export Dawn_DIR=dawn/lib64/cmake/Dawn
193+
cmake -B build -DGGML_WEBGPU=ON -DGGML_METAL=OFF -DGGML_BLAS=OFF
194+
cmake --build build --config Release -j $(sysctl -n hw.logicalcpu)
195+
196+
- name: Test
197+
id: cmake_test
198+
run: |
199+
cd build
200+
ctest -L main --verbose --timeout 900
201+
138202
ubuntu-cpu-cmake:
139203
strategy:
140204
matrix:
@@ -344,6 +408,72 @@ jobs:
344408
# This is using llvmpipe and runs slower than other backends
345409
ctest -L main --verbose --timeout 3600
346410
411+
ubuntu-22-cmake-webgpu:
412+
runs-on: ubuntu-22.04
413+
414+
steps:
415+
- name: Clone
416+
id: checkout
417+
uses: actions/checkout@v4
418+
419+
- name: ccache
420+
uses: hendrikmuhs/[email protected]
421+
with:
422+
key: ubuntu-22-cmake-webgpu
423+
evict-old-files: 1d
424+
425+
- name: Vulkan SDK Dependencies
426+
id: vulkan-depends
427+
run: |
428+
wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | sudo apt-key add -
429+
sudo wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list
430+
sudo apt-get update -y
431+
sudo apt-get install -y build-essential mesa-vulkan-drivers vulkan-sdk libcurl4-openssl-dev
432+
433+
- name: Dawn Dependency
434+
id: dawn-depends
435+
run: |
436+
sudo apt-get install -y libxrandr-dev libxinerama-dev libxcursor-dev mesa-common-dev libx11-xcb-dev libxi-dev
437+
ARTIFACTS_JSON=$(curl -s -L \
438+
-H "Accept: application/vnd.github+json" \
439+
-H "Authorization: Bearer ${{ secrets.GITHUB_TOKEN }}" \
440+
-H "X-GitHub-Api-Version: 2022-11-28" \
441+
"https://api.github.com/repos/google/dawn/actions/artifacts")
442+
echo "Finding latest ubuntu-latest-Release artifact..."
443+
DOWNLOAD_URL=$(echo "$ARTIFACTS_JSON" | jq -r '.artifacts
444+
| sort_by(.created_at)
445+
| reverse
446+
| map(select(.name | test("ubuntu-latest-Release$")))
447+
| .[0].archive_download_url')
448+
if [ "$DOWNLOAD_URL" = "null" ] || [ -z "$DOWNLOAD_URL" ]; then
449+
echo "No suitable Dawn artifact found!"
450+
exit 1
451+
fi
452+
echo "Downloading from: $DOWNLOAD_URL"
453+
curl -L \
454+
-H "Accept: application/vnd.github+json" \
455+
-H "Authorization: Bearer ${{ secrets.GITHUB_TOKEN }}" \
456+
-o artifact.zip "$DOWNLOAD_URL"
457+
unzip artifact.zip
458+
mkdir dawn
459+
tar_file=$(find . -name '*.tar.gz' | head -n 1)
460+
echo "Extracting: $tar_file"
461+
tar -xvf "$tar_file" -C dawn --strip-components=1
462+
463+
- name: Build
464+
id: cmake_build
465+
run: |
466+
export Dawn_DIR=dawn/lib64/cmake/Dawn
467+
cmake -B build -DGGML_WEBGPU=ON
468+
cmake --build build --config Release -j $(nproc)
469+
470+
- name: Test
471+
id: cmake_test
472+
run: |
473+
cd build
474+
# This is using llvmpipe and runs slower than other backends
475+
ctest -L main --verbose --timeout 3600
476+
347477
ubuntu-22-cmake-hip:
348478
runs-on: ubuntu-22.04
349479
container: rocm/dev-ubuntu-22.04:6.0.2

docs/build.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -568,9 +568,9 @@ cmake -B build -DGGML_WEBGPU=ON
568568
cmake --build build --config Release
569569
```
570570

571-
### Browser Support
571+
### Browser Support
572572

573-
WebGPU allows cross-platform access to the GPU from supported browsers. We utilize [Emscripten](https://emscripten.org/) to compile ggml's WebGPU backend to WebAssembly. Emscripten does not officially support WebGPU bindings yet, but Dawn currently maintains its own WebGPU bindings called emdawnwebgpu.
573+
WebGPU allows cross-platform access to the GPU from supported browsers. We utilize [Emscripten](https://emscripten.org/) to compile ggml's WebGPU backend to WebAssembly. Emscripten does not officially support WebGPU bindings yet, but Dawn currently maintains its own WebGPU bindings called emdawnwebgpu.
574574

575575
Follow the instructions [here](https://dawn.googlesource.com/dawn/+/refs/heads/main/src/emdawnwebgpu/) to download or build the emdawnwebgpu package (Note that it might be safer to build the emdawbwebgpu package locally, so that it stays in sync with the version of Dawn you have installed above). When building using CMake, the path to the emdawnwebgpu port file needs to be set with the flag `EMDAWNWEBGPU_DIR`.
576576

ggml/src/ggml-webgpu/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,4 +51,4 @@ if (GGML_WEBGPU_DEBUG)
5151
endif()
5252

5353
target_include_directories(ggml-webgpu PRIVATE ${SHADER_OUTPUT_DIR})
54-
target_link_libraries(ggml-webgpu PRIVATE ${DawnWebGPU_TARGET})
54+
target_link_libraries(ggml-webgpu PRIVATE ${DawnWebGPU_TARGET})

ggml/src/ggml-webgpu/ggml-webgpu.cpp

Lines changed: 24 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,9 @@
77

88
#include "ggml-wgsl-shaders.hpp"
99

10+
#include <cstring>
1011
#include <iostream>
12+
#include <mutex>
1113
#include <vector>
1214

1315
#ifdef GGML_WEBGPU_DEBUG
@@ -131,7 +133,7 @@ static void ggml_webgpu_create_buffer(wgpu::Device &device, wgpu::Buffer &buffer
131133
buffer_desc.size = size;
132134
buffer_desc.usage = usage;
133135
buffer_desc.label = label;
134-
buffer_desc.mappedAtCreation = false;
136+
buffer_desc.mappedAtCreation = false;
135137
// TODO: error handling
136138
buffer = device.CreateBuffer(&buffer_desc);
137139
}
@@ -161,7 +163,7 @@ static void ggml_backend_webgpu_buffer_memset(webgpu_context ctx, wgpu::Buffer b
161163
uint32_t * params = (uint32_t *) ctx->memset_params_host_buf.GetMappedRange();
162164

163165
params[0] = (uint32_t)offset;
164-
params[1] = (uint32_t)size;
166+
params[1] = (uint32_t)size;
165167
params[2] = value;
166168
ctx->memset_params_host_buf.Unmap();
167169

@@ -184,8 +186,8 @@ static void ggml_backend_webgpu_buffer_memset(webgpu_context ctx, wgpu::Buffer b
184186

185187
wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
186188
encoder.CopyBufferToBuffer(
187-
ctx->memset_params_host_buf, 0,
188-
ctx->memset_params_dev_buf, 0,
189+
ctx->memset_params_host_buf, 0,
190+
ctx->memset_params_dev_buf, 0,
189191
ctx->memset_params_dev_buf.GetSize()
190192
);
191193
wgpu::ComputePassEncoder pass = encoder.BeginComputePass();
@@ -206,7 +208,7 @@ static void ggml_backend_webgpu_wait_on_submission(webgpu_context ctx) {
206208
if (status != wgpu::QueueWorkDoneStatus::Success) {
207209
GGML_LOG_ERROR("ggml_webgpu: Failed to wait on queue: %s\n", message.data);
208210
}
209-
}),
211+
}),
210212
UINT64_MAX
211213
);
212214
}
@@ -243,7 +245,7 @@ static bool ggml_webgpu_encode_node(webgpu_context ctx, ggml_tensor * node){
243245
case GGML_OP_VIEW:
244246
case GGML_OP_PERMUTE:
245247
return false;
246-
248+
247249
case GGML_OP_CPY: {
248250
std::lock_guard<std::mutex> lock(ctx->mutex);
249251
const ggml_tensor * src = node->src[0];
@@ -259,7 +261,7 @@ static bool ggml_webgpu_encode_node(webgpu_context ctx, ggml_tensor * node){
259261
dst_offset &= ~(ctx->limits.minStorageBufferOffsetAlignment - 1);
260262

261263
wgpu::Device device = ctx->device;
262-
ggml_backend_webgpu_map_buffer(ctx, ctx->cpy_params_host_buf,
264+
ggml_backend_webgpu_map_buffer(ctx, ctx->cpy_params_host_buf,
263265
wgpu::MapMode::Write, 0, ctx->cpy_params_host_buf.GetSize());
264266
uint32_t * params = (uint32_t *) ctx->cpy_params_host_buf.GetMappedRange();
265267
uint32_t ne = (uint32_t)ggml_nelements(node);
@@ -309,8 +311,8 @@ static bool ggml_webgpu_encode_node(webgpu_context ctx, ggml_tensor * node){
309311

310312
wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
311313
encoder.CopyBufferToBuffer(
312-
ctx->cpy_params_host_buf, 0,
313-
ctx->cpy_params_dev_buf, 0,
314+
ctx->cpy_params_host_buf, 0,
315+
ctx->cpy_params_dev_buf, 0,
314316
ctx->cpy_params_dev_buf.GetSize()
315317
);
316318
wgpu::ComputePassEncoder pass = encoder.BeginComputePass();
@@ -343,7 +345,7 @@ static bool ggml_webgpu_encode_node(webgpu_context ctx, ggml_tensor * node){
343345
wgpu::Device device = ctx->device;
344346

345347
// map the host parameters buffer
346-
ggml_backend_webgpu_map_buffer(ctx, ctx->mul_mat_params_host_buf,
348+
ggml_backend_webgpu_map_buffer(ctx, ctx->mul_mat_params_host_buf,
347349
wgpu::MapMode::Write, 0, ctx->mul_mat_params_host_buf.GetSize());
348350
uint32_t * params = (uint32_t *) ctx->mul_mat_params_host_buf.GetMappedRange();
349351

@@ -371,7 +373,7 @@ static bool ggml_webgpu_encode_node(webgpu_context ctx, ggml_tensor * node){
371373
entries[0].offset = src0_offset;
372374
entries[0].size = ggml_nbytes(src0);
373375

374-
entries[1].binding = 1;
376+
entries[1].binding = 1;
375377
entries[1].buffer = src1_ctx->buffer;
376378
entries[1].offset = src1_offset;
377379
entries[1].size = ggml_nbytes(src1);
@@ -395,8 +397,8 @@ static bool ggml_webgpu_encode_node(webgpu_context ctx, ggml_tensor * node){
395397

396398
wgpu::CommandEncoder encoder = device.CreateCommandEncoder();
397399
encoder.CopyBufferToBuffer(
398-
ctx->mul_mat_params_host_buf, 0,
399-
ctx->mul_mat_params_dev_buf, 0,
400+
ctx->mul_mat_params_host_buf, 0,
401+
ctx->mul_mat_params_dev_buf, 0,
400402
ctx->mul_mat_params_dev_buf.GetSize()
401403
);
402404
wgpu::ComputePassEncoder pass = encoder.BeginComputePass();
@@ -417,7 +419,7 @@ static bool ggml_webgpu_encode_node(webgpu_context ctx, ggml_tensor * node){
417419
return false;
418420
}
419421
}
420-
422+
421423
static ggml_status ggml_backend_webgpu_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
422424
WEBGPU_LOG_DEBUG("ggml_backend_webgpu_graph_compute(" << cgraph->n_nodes << " nodes)");
423425

@@ -517,13 +519,13 @@ static void ggml_backend_webgpu_buffer_get_tensor(ggml_backend_buffer_t buffer,
517519

518520
std::lock_guard<std::mutex> lock(webgpu_ctx->mutex);
519521

520-
if (webgpu_ctx->get_tensor_staging_buf == nullptr ||
522+
if (webgpu_ctx->get_tensor_staging_buf == nullptr ||
521523
webgpu_ctx->get_tensor_staging_buf.GetSize() < final_size) {
522524
// Create a new staging buffer if it doesn't exist or is too small
523525
if (webgpu_ctx->get_tensor_staging_buf) {
524526
webgpu_ctx->get_tensor_staging_buf.Destroy();
525527
}
526-
ggml_webgpu_create_buffer(device, webgpu_ctx->get_tensor_staging_buf, final_size,
528+
ggml_webgpu_create_buffer(device, webgpu_ctx->get_tensor_staging_buf, final_size,
527529
wgpu::BufferUsage::CopyDst | wgpu::BufferUsage::MapRead, "get_tensor_staging_buf");
528530
}
529531

@@ -577,7 +579,7 @@ static ggml_backend_buffer_t ggml_backend_webgpu_buffer_type_alloc_buffer(ggml_b
577579
ggml_backend_webgpu_device_context * ctx = static_cast<ggml_backend_webgpu_device_context *>(buft->device->context);
578580

579581
wgpu::Buffer buf;
580-
ggml_webgpu_create_buffer(ctx->webgpu_ctx->device, buf, size,
582+
ggml_webgpu_create_buffer(ctx->webgpu_ctx->device, buf, size,
581583
wgpu::BufferUsage::Storage | wgpu::BufferUsage::CopySrc | wgpu::BufferUsage::CopyDst, "allocated_buffer");
582584

583585
ggml_backend_webgpu_buffer_context * buf_ctx = new ggml_backend_webgpu_buffer_context(ctx->webgpu_ctx, buf);
@@ -652,7 +654,7 @@ static void ggml_webgpu_init_memset_pipeline(webgpu_context webgpu_ctx) {
652654
constants[1].key = "bytes_per_thread";
653655
constants[1].value = webgpu_ctx->memset_bytes_per_thread;
654656
ggml_webgpu_create_pipeline(webgpu_ctx->device, webgpu_ctx->memset_pipeline, wgsl_memset, "memset", constants);
655-
ggml_webgpu_create_buffer(webgpu_ctx->device, webgpu_ctx->memset_params_dev_buf,
657+
ggml_webgpu_create_buffer(webgpu_ctx->device, webgpu_ctx->memset_params_dev_buf,
656658
3 * sizeof(uint32_t), // 3 parameters: buffer size, offset, value
657659
wgpu::BufferUsage::Uniform | wgpu::BufferUsage::CopyDst, "memset_params_dev_buf");
658660
ggml_webgpu_create_buffer(webgpu_ctx->device, webgpu_ctx->memset_params_host_buf,
@@ -679,7 +681,7 @@ static void ggml_webgpu_init_cpy_pipeline(webgpu_context webgpu_ctx) {
679681
wgpu::BufferUsage::MapWrite | wgpu::BufferUsage::CopySrc, "cpy_params_host_buf");
680682
}
681683

682-
// TODO: Does this need to be thread safe? Is it only called once?
684+
// TODO: Make thread safe if multiple devices are used
683685
static ggml_backend_t ggml_backend_webgpu_device_init(ggml_backend_dev_t dev, const char * params) {
684686
GGML_UNUSED(params);
685687

@@ -696,7 +698,7 @@ static ggml_backend_t ggml_backend_webgpu_device_init(ggml_backend_dev_t dev, co
696698
dev_desc.requiredLimits = &webgpu_ctx->limits;
697699
dev_desc.requiredFeatures = webgpu_ctx->features.features;
698700
dev_desc.requiredFeatureCount = webgpu_ctx->features.featureCount;
699-
dev_desc.SetDeviceLostCallback(wgpu::CallbackMode::AllowSpontaneous,
701+
dev_desc.SetDeviceLostCallback(wgpu::CallbackMode::AllowSpontaneous,
700702
[](const wgpu::Device& device, wgpu::DeviceLostReason reason, wgpu::StringView message) {
701703
GGML_UNUSED(device);
702704
GGML_LOG_ERROR("ggml_webgpu: Device lost! Reason: %d, Message: %s\n", static_cast<int>(reason), message.data);
@@ -847,7 +849,7 @@ static ggml_backend_dev_t ggml_backend_webgpu_reg_get_device(ggml_backend_reg_t
847849
device_ctx.device_name = std::string(info.device.data);
848850
device_ctx.device_desc = std::string(info.description.data);
849851

850-
GGML_LOG_INFO("ggml_webgpu: adapter_info: vendor_id: %u | vendor: %s | architecture: %s | device_id: %u | name: %s | device_desc: %s\n",
852+
GGML_LOG_INFO("ggml_webgpu: adapter_info: vendor_id: %u | vendor: %s | architecture: %s | device_id: %u | name: %s | device_desc: %s\n",
851853
info.vendorID, info.vendor.data, info.architecture.data, info.deviceID, info.device.data, info.description.data);
852854

853855
// See GGML Backend Device Interface section
@@ -902,4 +904,4 @@ ggml_backend_t ggml_backend_webgpu_init(void) {
902904
return ggml_backend_webgpu_device_init(dev, nullptr);
903905
}
904906

905-
GGML_BACKEND_DL_IMPL(ggml_backend_webgpu_reg)
907+
GGML_BACKEND_DL_IMPL(ggml_backend_webgpu_reg)

ggml/src/ggml-webgpu/wgsl-shaders/embed_wgsl.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,16 @@
11
import os
22
import argparse
33

4+
45
def escape_triple_quotes(wgsl):
56
# Simple defense in case of embedded """
67
return wgsl.replace('"""', '\\"""')
78

9+
810
def to_cpp_string_literal(varname, content):
911
return f'const char* wgsl_{varname} = R"({content})";\n'
1012

13+
1114
def main():
1215
parser = argparse.ArgumentParser()
1316
parser.add_argument('--input', required=True)
@@ -27,5 +30,6 @@ def main():
2730
out.write(to_cpp_string_literal(varname, content))
2831
out.write('\n')
2932

33+
3034
if __name__ == '__main__':
3135
main()

ggml/src/ggml-webgpu/wgsl-shaders/mul_mat.wgsl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
4141
let src02_idx = dst2_idx / params.broadcast2; // src0 may also be broadcast along the second dimension
4242
let src12_idx = dst2_idx; // src1 is not broadcast
4343

44-
let dst2_rem = dst3_rem % dst2_stride;
44+
let dst2_rem = dst3_rem % dst2_stride;
4545

4646
let row = dst2_rem / params.n; // output row
4747
let col = dst2_rem % params.n; // output column
@@ -53,4 +53,4 @@ fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
5353
sum = sum + src0[src0_idx] * src1[src1_idx];
5454
}
5555
dst[dst3_idx * dst3_stride + dst2_idx * dst2_stride + row * params.n + col] = sum;
56-
}
56+
}

0 commit comments

Comments
 (0)