Skip to content

Commit e2380e2

Browse files
committed
Get firt i-quant working
1 parent d76e562 commit e2380e2

File tree

4 files changed

+238
-71
lines changed

4 files changed

+238
-71
lines changed

ggml/src/ggml-webgpu/ggml-webgpu.cpp

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020

2121
#ifdef GGML_WEBGPU_DEBUG
2222
# define WEBGPU_LOG_DEBUG(msg) std::cout << msg << std::endl
23-
# define WEBGPU_DEBUG_BUF_ELEMS 32
23+
# define WEBGPU_DEBUG_BUF_ELEMS 33
2424
#else
2525
# define WEBGPU_LOG_DEBUG(msg) ((void) 0)
2626
#endif // GGML_WEBGPU_DEBUG
@@ -129,7 +129,7 @@ struct webgpu_context_struct {
129129
webgpu_buf_pool set_rows_error_buf_pool;
130130

131131
wgpu::ComputePipeline memset_pipeline;
132-
wgpu::ComputePipeline mul_mat_pipeline[15][2];
132+
wgpu::ComputePipeline mul_mat_pipeline[17][2];
133133
wgpu::ComputePipeline set_rows_pipeline;
134134
wgpu::ComputePipeline cpy_pipeline;
135135

@@ -595,12 +595,17 @@ static void ggml_webgpu_mul_mat(webgpu_context & ctx, ggml_tensor * src0, ggml_t
595595
{ .binding = 2,
596596
.buffer = ggml_webgpu_tensor_buf(dst),
597597
.offset = ggml_webgpu_tensor_align_offset(ctx, dst),
598-
.size = ggml_webgpu_tensor_binding_size(ctx, dst) }
598+
.size = ggml_webgpu_tensor_binding_size(ctx, dst) },
599+
// { .binding = 3,
600+
// .buffer = ctx->debug_dev_buf,
601+
// .offset = 0,
602+
// .size = ctx->debug_dev_buf.GetSize() }
599603
};
600604

601605
uint32_t wg_x =
602606
(dst->ne[0] * dst->ne[1] * dst->ne[2] * dst->ne[3] + WEBGPU_MUL_MAT_WG_SIZE - 1) / WEBGPU_MUL_MAT_WG_SIZE;
603607
ggml_backend_webgpu_build_and_enqueue(ctx, ctx->mul_mat_pipeline[src0->type][src1->type], params, entries, wg_x);
608+
//ggml_backend_webgpu_debug(ctx);
604609
}
605610

606611
// Returns true if node has enqueued work into the queue, false otherwise
@@ -910,7 +915,7 @@ static void ggml_webgpu_init_memset_pipeline(webgpu_context & webgpu_ctx) {
910915
}
911916

912917
static void ggml_webgpu_init_mul_mat_pipeline(webgpu_context & webgpu_ctx) {
913-
webgpu_pipeline_info pipeline_infos[13] = {
918+
webgpu_pipeline_info pipeline_infos[14] = {
914919
{ .name = "mul_mat_f32_f32",
915920
.shader_code = wgsl_mul_mat_f32_f32,
916921
.src0_type = GGML_TYPE_F32,
@@ -962,7 +967,11 @@ static void ggml_webgpu_init_mul_mat_pipeline(webgpu_context & webgpu_ctx) {
962967
{ .name = "mul_mat_q6_k_f32",
963968
.shader_code = wgsl_mul_mat_q6_k_f32,
964969
.src0_type = GGML_TYPE_Q6_K,
965-
.src1_type = GGML_TYPE_F32 }
970+
.src1_type = GGML_TYPE_F32 },
971+
{ .name = "mul_mat_iq2_xxs_f32",
972+
.shader_code = wgsl_mul_mat_iq2_xxs_f32,
973+
.src0_type = GGML_TYPE_IQ2_XXS,
974+
.src1_type = GGML_TYPE_F32 }
966975
};
967976

968977
for (auto & pipeline_info : pipeline_infos) {
@@ -1064,6 +1073,7 @@ static bool ggml_backend_webgpu_device_supports_op(ggml_backend_dev_t dev, const
10641073
case GGML_TYPE_Q4_K:
10651074
case GGML_TYPE_Q5_K:
10661075
case GGML_TYPE_Q6_K:
1076+
case GGML_TYPE_IQ2_XXS:
10671077
return true;
10681078
default:
10691079
return false;

ggml/src/ggml-webgpu/wgsl-shaders/embed_wgsl.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -44,10 +44,12 @@ def generate_variants(shader_path, output_dir, outfile):
4444
shader_template = extract_block(text, "SHADER")
4545

4646
for variant in variants:
47-
decls_key = variant["DECLS"]
48-
if decls_key not in decls_map:
49-
raise ValueError(f"DECLS key '{decls_key}' not found.")
50-
decls_code = decls_map[decls_key] + "\n\n"
47+
decls = variant["DECLS"]
48+
decls_code = ""
49+
for key in decls:
50+
if key not in decls_map:
51+
raise ValueError(f"DECLS key '{key}' not found.")
52+
decls_code += decls_map[key] + "\n\n"
5153

5254
shader_variant = replace_placeholders(shader_template, variant["REPLS"])
5355
final_shader = re.sub(rf'\bDECLS\b', decls_code, shader_variant)

0 commit comments

Comments
 (0)