|
57 | 57 | # define WEBGPU_TIMESTAMP_QUERY_BUF_SIZE_BYTES 16 // e.g. enough for two timestamps |
58 | 58 | #endif |
59 | 59 |
|
60 | | -// TODO: The WebGPU backend can deadlock in multi-threaded scenarios if the parameter buffer pool |
61 | | -// is exhausted and the command submit batch size is too high, or in cases where the underlying |
62 | | -// WebGPU implementation has bugs in handling concurrent operations. Serializing command submission |
63 | | -// is a workaround, but we should also investigate better solutions. |
64 | | -#ifdef GGML_WEBGPU_SERIALIZE_SUBMIT |
65 | | -# define WEBGPU_COMMAND_SUBMIT_BATCH_SIZE 1u |
66 | | -# define WEBGPU_WAIT_ANY_TIMEOUT_MS UINT64_MAX |
67 | | -#else |
68 | | -# define WEBGPU_COMMAND_SUBMIT_BATCH_SIZE 8u |
69 | | -# define WEBGPU_WAIT_ANY_TIMEOUT_MS 0 |
70 | | -#endif |
71 | | - |
72 | 60 | /* Constants */ |
73 | 61 |
|
74 | 62 | #define WEBGPU_MUL_MAT_WG_SIZE 256 |
75 | 63 | #define WEBGPU_NUM_PARAM_BUFS 32u |
| 64 | +#define WEBGPU_COMMAND_SUBMIT_BATCH_SIZE 8u |
| 65 | +#define WEBGPU_WAIT_ANY_TIMEOUT_MS 0 |
76 | 66 | // Maximum number of in-flight submissions per-thread, to avoid exhausting the parameter buffer pool |
77 | 67 | #define WEBGPU_MAX_INFLIGHT_SUBS_PER_THREAD WEBGPU_NUM_PARAM_BUFS / WEBGPU_COMMAND_SUBMIT_BATCH_SIZE |
78 | 68 | #define WEBGPU_PARAMS_BUF_SIZE_BYTES 128 // enough for 32 parameters |
@@ -376,11 +366,12 @@ static void ggml_webgpu_create_buffer(wgpu::Device & device, |
376 | 366 | // Wait for the queue to finish processing all submitted work |
377 | 367 | static void ggml_backend_webgpu_wait(webgpu_context & ctx, |
378 | 368 | std::vector<webgpu_submission_futures> & futures, |
379 | | - uint64_t timeout_ms = UINT64_MAX) { |
| 369 | + bool block = true) { |
380 | 370 | // If we have too many in-flight submissions, wait on the oldest one first. If there are many threads, |
381 | 371 | // inflight_max may be 0, meaning that we must wait on all futures. |
382 | | - uint inflight_threads = ctx->inflight_threads; |
383 | | - uint inflight_max = WEBGPU_MAX_INFLIGHT_SUBS_PER_THREAD / std::max(inflight_threads, 1u); |
| 372 | + uint64_t timeout_ms = block ? UINT64_MAX : 0; |
| 373 | + uint inflight_threads = ctx->inflight_threads; |
| 374 | + uint inflight_max = WEBGPU_MAX_INFLIGHT_SUBS_PER_THREAD / std::max(inflight_threads, 1u); |
384 | 375 | while (futures.size() >= inflight_max && futures.size() > 0) { |
385 | 376 | ctx->instance.WaitAny(futures[0].futures.size(), futures[0].futures.data(), UINT64_MAX); |
386 | 377 | futures.erase(futures.begin()); |
@@ -1287,7 +1278,7 @@ static ggml_status ggml_backend_webgpu_graph_compute(ggml_backend_t backend, str |
1287 | 1278 | futures.push_back(ggml_backend_webgpu_submit(ctx, commands)); |
1288 | 1279 | // Process events and check for completed submissions |
1289 | 1280 | ctx->instance.ProcessEvents(); |
1290 | | - ggml_backend_webgpu_wait(ctx, futures, WEBGPU_WAIT_ANY_TIMEOUT_MS); |
| 1281 | + ggml_backend_webgpu_wait(ctx, futures, false); |
1291 | 1282 | commands.clear(); |
1292 | 1283 | } |
1293 | 1284 | } |
|
0 commit comments