|
18 | 18 |
|
19 | 19 | /* Constants */ |
20 | 20 |
|
21 | | -// TODO: find a better way to get the memory available |
22 | | -#define WEBGPU_MAX_BUFFERS 32 |
23 | | - |
24 | 21 | #define WEBGPU_MUL_MAT_WG_SIZE 64 |
25 | 22 | #define WEBGPU_MUL_MAT_PARAMS_SIZE (13 * sizeof(uint32_t)) // M, N, K, batch sizes, broadcasts |
26 | 23 | #define WEBGPU_CPY_PARAMS_SIZE (15 * sizeof(uint32_t)) // strides and offsets |
@@ -119,7 +116,7 @@ static void ggml_webgpu_create_pipeline(wgpu::Device &device, wgpu::ComputePipel |
119 | 116 | pipeline_desc.label = label; |
120 | 117 | pipeline_desc.compute.module = shader_module; |
121 | 118 | pipeline_desc.compute.entryPoint = "main"; // Entry point in the WGSL code |
122 | | - pipeline_desc.layout = nullptr; // Guessing that nullptr means auto layout |
| 119 | + pipeline_desc.layout = nullptr; // nullptr means auto layout |
123 | 120 | if (constants.size() > 0) { |
124 | 121 | pipeline_desc.compute.constants = constants.data(); |
125 | 122 | pipeline_desc.compute.constantCount = constants.size(); |
@@ -199,7 +196,6 @@ static void ggml_backend_webgpu_buffer_memset(webgpu_context ctx, wgpu::Buffer b |
199 | 196 | pass.End(); |
200 | 197 | wgpu::CommandBuffer commands = encoder.Finish(); |
201 | 198 |
|
202 | | - // TODO, async, do we need to wait on this? |
203 | 199 | ctx->queue.Submit(1, &commands); |
204 | 200 | } |
205 | 201 |
|
@@ -489,7 +485,6 @@ static void ggml_backend_webgpu_buffer_set_tensor(ggml_backend_buffer_t buffer, |
489 | 485 |
|
490 | 486 | size_t total_offset = webgpu_tensor_offset(tensor) + tensor->view_offs + offset; |
491 | 487 |
|
492 | | - // TODO: wait on this? |
493 | 488 | webgpu_ctx->queue.WriteBuffer(buf_ctx->buffer, total_offset, data, (size/4)*4); |
494 | 489 |
|
495 | 490 | if (size % 4 != 0) { |
@@ -617,9 +612,9 @@ static const char * ggml_backend_webgpu_device_get_description(ggml_backend_dev_ |
617 | 612 |
|
618 | 613 | static void ggml_backend_webgpu_device_get_memory(ggml_backend_dev_t dev, size_t * free, size_t * total) { |
619 | 614 | ggml_backend_webgpu_device_context * ctx = static_cast<ggml_backend_webgpu_device_context *>(dev->context); |
620 | | - // TODO: what do we actually want to return here? |
621 | | - *free = ctx->webgpu_ctx->limits.maxBufferSize * WEBGPU_MAX_BUFFERS; |
622 | | - *total = ctx->webgpu_ctx->limits.maxBufferSize * WEBGPU_MAX_BUFFERS; |
| 615 | + // TODO: what do we actually want to return here? maxBufferSize might not be the full available memory. |
| 616 | + *free = ctx->webgpu_ctx->limits.maxBufferSize; |
| 617 | + *total = ctx->webgpu_ctx->limits.maxBufferSize; |
623 | 618 | } |
624 | 619 |
|
625 | 620 | static enum ggml_backend_dev_type ggml_backend_webgpu_device_get_type(ggml_backend_dev_t dev) { |
|
0 commit comments