|
62 | 62 | // WebGPU implementation has bugs in handling concurrent operations. Serializing command submission |
63 | 63 | // is a workaround, but we should also investigate better solutions. |
64 | 64 | #ifdef GGML_WEBGPU_SERIALIZE_SUBMIT |
65 | | -# define WEBGPU_COMMAND_SUBMIT_BATCH_SIZE 1 |
| 65 | +# define WEBGPU_COMMAND_SUBMIT_BATCH_SIZE 1u |
66 | 66 | # define WEBGPU_WAIT_ANY_TIMEOUT_MS UINT64_MAX |
67 | 67 | #else |
68 | | -# define WEBGPU_COMMAND_SUBMIT_BATCH_SIZE 8 |
| 68 | +# define WEBGPU_COMMAND_SUBMIT_BATCH_SIZE 8u |
69 | 69 | # define WEBGPU_WAIT_ANY_TIMEOUT_MS 0 |
70 | 70 | #endif |
71 | 71 |
|
72 | 72 | /* Constants */ |
73 | 73 |
|
74 | 74 | #define WEBGPU_MUL_MAT_WG_SIZE 256 |
75 | | -#define WEBGPU_NUM_PARAM_BUFS 32 |
| 75 | +#define WEBGPU_NUM_PARAM_BUFS 32u |
76 | 76 | // Maximum number of in-flight submissions per-thread, to avoid exhausting the parameter buffer pool |
77 | 77 | #define WEBGPU_MAX_INFLIGHT_SUBS_PER_THREAD WEBGPU_NUM_PARAM_BUFS / WEBGPU_COMMAND_SUBMIT_BATCH_SIZE |
78 | 78 | #define WEBGPU_PARAMS_BUF_SIZE_BYTES 128 // enough for 32 parameters |
@@ -251,7 +251,7 @@ struct webgpu_context_struct { |
251 | 251 | uint32_t max_wg_size_x; |
252 | 252 |
|
253 | 253 | std::recursive_mutex mutex; |
254 | | - std::atomic_int inflight_threads = 0; |
| 254 | + std::atomic_uint inflight_threads = 0; |
255 | 255 |
|
256 | 256 | webgpu_buf_pool param_buf_pool; |
257 | 257 | webgpu_buf_pool set_rows_error_buf_pool; |
@@ -379,7 +379,8 @@ static void ggml_backend_webgpu_wait(webgpu_context & ct |
379 | 379 | uint64_t timeout_ms = UINT64_MAX) { |
380 | 380 | // If we have too many in-flight submissions, wait on the oldest one first. If there are many threads, |
381 | 381 | // inflight_max may be 0, meaning that we must wait on all futures. |
382 | | - int inflight_max = WEBGPU_MAX_INFLIGHT_SUBS_PER_THREAD / std::min(ctx->inflight_threads, 1); |
| 382 | + uint inflight_threads = ctx->inflight_threads; |
| 383 | + uint inflight_max = WEBGPU_MAX_INFLIGHT_SUBS_PER_THREAD / std::max(inflight_threads, 1u); |
383 | 384 | while (futures.size() >= inflight_max && futures.size() > 0) { |
384 | 385 | ctx->instance.WaitAny(futures[0].futures.size(), futures[0].futures.data(), UINT64_MAX); |
385 | 386 | futures.erase(futures.begin()); |
@@ -1279,8 +1280,9 @@ static ggml_status ggml_backend_webgpu_graph_compute(ggml_backend_t backend, str |
1279 | 1280 | commands.push_back(*cmd); |
1280 | 1281 | } |
1281 | 1282 | // compute the batch size based on the number of inflight threads |
1282 | | - int batch_size = std::min(std::max(1, WEBGPU_NUM_PARAM_BUFS / ctx->inflight_threads), |
1283 | | - WEBGPU_COMMAND_SUBMIT_BATCH_SIZE); |
| 1283 | + uint inflight_threads = ctx->inflight_threads; |
| 1284 | + uint batch_size = std::min(std::max(1u, WEBGPU_NUM_PARAM_BUFS / std::max(inflight_threads, 1u)), |
| 1285 | + WEBGPU_COMMAND_SUBMIT_BATCH_SIZE); |
1284 | 1286 | if (commands.size() >= batch_size) { |
1285 | 1287 | futures.push_back(ggml_backend_webgpu_submit(ctx, commands)); |
1286 | 1288 | // Process events and check for completed submissions |
|
0 commit comments