Skip to content

Commit a5e26c2

Browse files
committed
Update CODEOWNERS and remove serialize submit option
1 parent d3c7ddd commit a5e26c2

File tree

4 files changed

+8
-22
lines changed

4 files changed

+8
-22
lines changed

CODEOWNERS

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,11 +70,11 @@
7070
/ggml/src/ggml-rpc/ @rgerganov
7171
/ggml/src/ggml-threading.* @ggerganov @slaren
7272
/ggml/src/ggml-vulkan/ @0cc4m
73+
/ggml/src/ggml-webgpu/ @reeselevine
7374
/ggml/src/ggml-zdnn/ @taronaeo @Andreas-Krebbel @AlekseiNikiforovIBM
7475
/ggml/src/ggml.c @ggerganov @slaren
7576
/ggml/src/ggml.cpp @ggerganov @slaren
7677
/ggml/src/gguf.cpp @JohannesGaessler @Green-Sky
77-
/ggml/src/ggml-webgpu/ @reeselevine
7878
/gguf-py/ @CISC
7979
/media/ @ggerganov
8080
/scripts/gen* @ggerganov

ggml/CMakeLists.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -221,7 +221,6 @@ option(GGML_VULKAN_SHADER_DEBUG_INFO "ggml: enable Vulkan shader debug in
221221
option(GGML_VULKAN_VALIDATE "ggml: enable Vulkan validation" OFF)
222222
option(GGML_VULKAN_RUN_TESTS "ggml: run Vulkan tests" OFF)
223223
option(GGML_WEBGPU "ggml: use WebGPU" OFF)
224-
option(GGML_WEBGPU_SERIALIZE_SUBMIT "ggml: enable WebGPU command serialization" OFF)
225224
option(GGML_WEBGPU_DEBUG "ggml: enable WebGPU debug output" OFF)
226225
option(GGML_WEBGPU_CPU_PROFILE "ggml: enable WebGPU profiling (CPU)" OFF)
227226
option(GGML_WEBGPU_GPU_PROFILE "ggml: enable WebGPU profiling (GPU)" OFF)

ggml/src/ggml-webgpu/CMakeLists.txt

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -46,10 +46,6 @@ else()
4646
set(DawnWebGPU_TARGET dawn::webgpu_dawn)
4747
endif()
4848

49-
if (GGML_WEBGPU_SERIALIZE_SUBMIT)
50-
target_compile_definitions(ggml-webgpu PRIVATE GGML_WEBGPU_SERIALIZE_SUBMIT=1)
51-
endif()
52-
5349
if (GGML_WEBGPU_DEBUG)
5450
target_compile_definitions(ggml-webgpu PRIVATE GGML_WEBGPU_DEBUG=1)
5551
endif()

ggml/src/ggml-webgpu/ggml-webgpu.cpp

Lines changed: 7 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -57,22 +57,12 @@
5757
# define WEBGPU_TIMESTAMP_QUERY_BUF_SIZE_BYTES 16 // e.g. enough for two timestamps
5858
#endif
5959

60-
// TODO: The WebGPU backend can deadlock in multi-threaded scenarios if the parameter buffer pool
61-
// is exhausted and the command submit batch size is too high, or in cases where the underlying
62-
// WebGPU implementation has bugs in handling concurrent operations. Serializing command submission
63-
// is a workaround, but we should also investigate better solutions.
64-
#ifdef GGML_WEBGPU_SERIALIZE_SUBMIT
65-
# define WEBGPU_COMMAND_SUBMIT_BATCH_SIZE 1u
66-
# define WEBGPU_WAIT_ANY_TIMEOUT_MS UINT64_MAX
67-
#else
68-
# define WEBGPU_COMMAND_SUBMIT_BATCH_SIZE 8u
69-
# define WEBGPU_WAIT_ANY_TIMEOUT_MS 0
70-
#endif
71-
7260
/* Constants */
7361

7462
#define WEBGPU_MUL_MAT_WG_SIZE 256
7563
#define WEBGPU_NUM_PARAM_BUFS 32u
64+
#define WEBGPU_COMMAND_SUBMIT_BATCH_SIZE 8u
65+
#define WEBGPU_WAIT_ANY_TIMEOUT_MS 0
7666
// Maximum number of in-flight submissions per-thread, to avoid exhausting the parameter buffer pool
7767
#define WEBGPU_MAX_INFLIGHT_SUBS_PER_THREAD WEBGPU_NUM_PARAM_BUFS / WEBGPU_COMMAND_SUBMIT_BATCH_SIZE
7868
#define WEBGPU_PARAMS_BUF_SIZE_BYTES 128 // enough for 32 parameters
@@ -376,11 +366,12 @@ static void ggml_webgpu_create_buffer(wgpu::Device & device,
376366
// Wait for the queue to finish processing all submitted work
377367
static void ggml_backend_webgpu_wait(webgpu_context & ctx,
378368
std::vector<webgpu_submission_futures> & futures,
379-
uint64_t timeout_ms = UINT64_MAX) {
369+
bool block = true) {
380370
// If we have too many in-flight submissions, wait on the oldest one first. If there are many threads,
381371
// inflight_max may be 0, meaning that we must wait on all futures.
382-
uint inflight_threads = ctx->inflight_threads;
383-
uint inflight_max = WEBGPU_MAX_INFLIGHT_SUBS_PER_THREAD / std::max(inflight_threads, 1u);
372+
uint64_t timeout_ms = block ? UINT64_MAX : 0;
373+
uint inflight_threads = ctx->inflight_threads;
374+
uint inflight_max = WEBGPU_MAX_INFLIGHT_SUBS_PER_THREAD / std::max(inflight_threads, 1u);
384375
while (futures.size() >= inflight_max && futures.size() > 0) {
385376
ctx->instance.WaitAny(futures[0].futures.size(), futures[0].futures.data(), UINT64_MAX);
386377
futures.erase(futures.begin());
@@ -1287,7 +1278,7 @@ static ggml_status ggml_backend_webgpu_graph_compute(ggml_backend_t backend, str
12871278
futures.push_back(ggml_backend_webgpu_submit(ctx, commands));
12881279
// Process events and check for completed submissions
12891280
ctx->instance.ProcessEvents();
1290-
ggml_backend_webgpu_wait(ctx, futures, WEBGPU_WAIT_ANY_TIMEOUT_MS);
1281+
ggml_backend_webgpu_wait(ctx, futures, false);
12911282
commands.clear();
12921283
}
12931284
}

0 commit comments

Comments
 (0)