Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
30ba139
Add paramater buffer pool, batching of submissions, refactor command …
reeselevine Jul 30, 2025
04d7b27
Add header for linux builds
reeselevine Jul 30, 2025
01c8ced
Free staged parameter buffers at once
reeselevine Jul 30, 2025
bfff27f
Format with clang-format
reeselevine Jul 30, 2025
b8012ec
Fix thread-safe implementation
reeselevine Jul 31, 2025
cddda7e
Use device implicit synchronization
reeselevine Jul 31, 2025
1d5726a
Merge remote-tracking branch 'upstream/master' into fixes
reeselevine Jul 31, 2025
6a20e39
Update workflow to use custom release
reeselevine Aug 1, 2025
ea39068
Remove testing branch workflow
reeselevine Aug 1, 2025
96d107e
some f32 tests passing
Aug 1, 2025
4c58742
Merge branch 'ggml-org:master' into master
reeselevine Aug 4, 2025
ae8edbf
Disable set_rows until it's implemented
reeselevine Aug 4, 2025
39aa11d
f32 add all tests passing
Aug 4, 2025
2c57726
Merge branch 'master' of https://github.com/reeselevine/llama.cpp int…
Aug 4, 2025
6a6135c
Begin work on set_rows
reeselevine Aug 5, 2025
b2dbfcd
Work on set rows
reeselevine Aug 5, 2025
248f7a5
Add error buffers for reporting unsupported SET_ROWS indices
reeselevine Aug 6, 2025
4ad0986
Remove extra comments
reeselevine Aug 6, 2025
ac52243
most recent merge
Aug 6, 2025
1b16a91
Merge remote-tracking branch 'origin/master' into addition
reeselevine Sep 4, 2025
7f9ee10
Add templated addition, clean up code
reeselevine Sep 4, 2025
c102197
Get addition and multiplication working
reeselevine Sep 8, 2025
efc0cb0
Merge pull request #1 from reeselevine/addition
reeselevine Sep 9, 2025
7fbe84c
Implement rms_norm
reeselevine Sep 10, 2025
dc7bc4a
Add get_rows implementation
reeselevine Sep 11, 2025
b7635c4
Add new get_rows files
reeselevine Sep 11, 2025
4293531
Refactor use of wg size entry
reeselevine Sep 11, 2025
ff41205
Fix compilation
reeselevine Sep 12, 2025
a5da437
Merge remote-tracking branch 'upstream/master'
reeselevine Sep 12, 2025
77f8b96
Try manually unrolled q4_0 quant
reeselevine Sep 12, 2025
102f225
Revert "Try manually unrolled q4_0 quant"
reeselevine Sep 12, 2025
b0bd49f
Move to constant max wg size
reeselevine Sep 13, 2025
fc91520
Check for tensor size in supports_op
reeselevine Sep 13, 2025
4561784
Vectorize f32 and change default workgroup size
reeselevine Sep 15, 2025
26742e2
Merge remote-tracking branch 'upstream/master'
reeselevine Sep 15, 2025
cfa4fc1
Move f32 get_rows from < 4 to % 4 != 0
reeselevine Sep 15, 2025
9422879
fix linter errors
reeselevine Sep 15, 2025
b877e07
Add in-place tests
reeselevine Sep 17, 2025
be35439
Merge remote-tracking branch 'upstream/master'
reeselevine Sep 17, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
705 changes: 479 additions & 226 deletions ggml/src/ggml-webgpu/ggml-webgpu.cpp

Large diffs are not rendered by default.

44 changes: 44 additions & 0 deletions ggml/src/ggml-webgpu/wgsl-shaders/add.tmpl.wgsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
#define(VARIANTS)

[
{
"REPLS": {
"TYPE" : "f32",
}
},
{
"REPLS": {
"TYPE" : "f16",
}
}
]

#end(VARIANTS)

#define(SHADER)

enable f16;

#include "binary_head.tmpl"

@group(0) @binding(0)
var<storage, read_write> src0: array<{{TYPE}}>;

@group(0) @binding(1)
var<storage, read_write> src1: array<{{TYPE}}>;

@group(0) @binding(2)
var<storage, read_write> dst: array<{{TYPE}}>;

@group(0) @binding(3)
var<uniform> params: Params;

override wg_size: u32;
@compute @workgroup_size(wg_size)
fn main(@builtin(global_invocation_id) gid: vec3<u32>) {
if (gid.x < params.ne) {
dst[params.offset_dst + gid.x] = src0[params.offset_src0 + gid.x] + src1[params.offset_src1 + src1_index(gid.x)];
}
}

#end(SHADER)
41 changes: 41 additions & 0 deletions ggml/src/ggml-webgpu/wgsl-shaders/add_in_place.tmpl.wgsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
#define(VARIANTS)

[
{
"REPLS": {
"TYPE" : "f32",
}
},
{
"REPLS": {
"TYPE" : "f16",
}
}
]

#end(VARIANTS)

#define(SHADER)

enable f16;

#include "binary_head.tmpl"

@group(0) @binding(0)
var<storage, read_write> src0: array<{{TYPE}}>;

@group(0) @binding(1)
var<storage, read_write> src1: array<{{TYPE}}>;

@group(0) @binding(2)
var<uniform> params: Params;

override wg_size: u32;
@compute @workgroup_size(wg_size)
fn main(@builtin(global_invocation_id) gid: vec3<u32>) {
if (gid.x < params.ne) {
src0[params.offset_dst + gid.x] = src0[params.offset_src0 + gid.x] + src1[params.offset_src1 + src1_index(gid.x)];
}
}

#end(SHADER)
45 changes: 45 additions & 0 deletions ggml/src/ggml-webgpu/wgsl-shaders/binary_head.tmpl
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
struct Params {
ne: u32,

// offsets in elements
offset_src0: u32,
offset_src1: u32,
offset_dst: u32,

stride_src1_0: u32,
stride_src1_1: u32,
stride_src1_2: u32,
stride_src1_3: u32,

a_ne0: u32,
a_ne1: u32,
a_ne2: u32,

b_ne0: u32,
b_ne1: u32,
b_ne2: u32,
b_ne3: u32,
};

fn src1_index(_i: u32) -> u32 {
var i = _i;
let a_i3 = i / (params.a_ne2 * params.a_ne1 * params.a_ne0);
i = i % (params.a_ne2 * params.a_ne1 * params.a_ne0);
let a_i2 = i / (params.a_ne1 * params.a_ne0);
i = i % (params.a_ne1 * params.a_ne0);
let a_i1 = i / params.a_ne0;
let a_i0 = i % params.a_ne0;

// handle repetition of b
// index loops back to the beginning and repeats after elements are exhausted = modulo
let b_i0 = a_i0 % params.b_ne0;
let b_i1 = a_i1 % params.b_ne1;
let b_i2 = a_i2 % params.b_ne2;
let b_i3 = a_i3 % params.b_ne3;

// compute index for position in b's flat array
return b_i0 * params.stride_src1_0 +
b_i1 * params.stride_src1_1 +
b_i2 * params.stride_src1_2 +
b_i3 * params.stride_src1_3;
}
Loading
Loading