Skip to content

Commit d1b0ffe

Browse files
authored
Merge pull request #3 from reeselevine/fixes
debug
2 parents 6552e2e + 7a323b0 commit d1b0ffe

File tree

2 files changed

+9
-9
lines changed

2 files changed

+9
-9
lines changed

ggml/src/ggml-webgpu/ggml-webgpu.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -823,7 +823,7 @@ static ggml_backend_buffer_t ggml_backend_webgpu_buffer_type_alloc_buffer(ggml_b
823823
wgpu::Buffer buf;
824824
ggml_webgpu_create_buffer(ctx->webgpu_ctx->device,
825825
buf,
826-
size,
826+
(size + WEBGPU_STORAGE_BUF_BINDING_MULT - 1) & ~(WEBGPU_STORAGE_BUF_BINDING_MULT - 1),
827827
wgpu::BufferUsage::Storage | wgpu::BufferUsage::CopySrc | wgpu::BufferUsage::CopyDst,
828828
"allocated_buffer");
829829

ggml/src/ggml-webgpu/wgsl-shaders/memset.wgsl

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -19,20 +19,20 @@ fn main(@builtin(global_invocation_id) gid: vec3<u32>) {
1919
let start = params.offset;
2020
let end = params.offset + params.size;
2121

22-
for (var j: u32 = 0u; j < bytes_per_thread; j = j + 1u) {
22+
for (var j: u32 = 0u; j < bytes_per_thread; j += 4) {
2323
let byte_index = start + i + j;
24-
if (byte_index + 4u <= end) {
25-
output_buffer[(byte_index >> 2u)] = params.value;
24+
if (byte_index + 4 <= end) {
25+
output_buffer[byte_index >> 2] = params.value;
2626
} else {
2727
// Handle tail (unaligned)
28-
for (var k: u32 = 0u; k < 4u; k = k + 1u) {
28+
for (var k: u32 = 0; k < 4; k++) {
2929
let idx = byte_index + k;
3030
if (idx < end) {
31-
let word_idx = idx >> 2u;
32-
let byte_offset = (idx & 3u) * 8u;
33-
let mask = ~(0xffu << byte_offset);
31+
let word_idx = idx >> 2;
32+
let bit_offset = (idx & 3) * 8u;
33+
let mask = ~(0xffu << bit_offset);
3434
let existing = output_buffer[word_idx];
35-
output_buffer[word_idx] = (existing & mask) | ((params.value & 0xffu) << byte_offset);
35+
output_buffer[word_idx] = (existing & mask) | (params.value & (0xffu << bit_offset));
3636
}
3737
}
3838
}

0 commit comments

Comments
 (0)