|
20 | 20 |
|
21 | 21 | #ifdef GGML_WEBGPU_DEBUG |
22 | 22 | # define WEBGPU_LOG_DEBUG(msg) std::cout << msg << std::endl |
23 | | -# define WEBGPU_DEBUG_BUF_ELEMS 32 |
| 23 | +# define WEBGPU_DEBUG_BUF_ELEMS 33 |
24 | 24 | #else |
25 | 25 | # define WEBGPU_LOG_DEBUG(msg) ((void) 0) |
26 | 26 | #endif // GGML_WEBGPU_DEBUG |
@@ -129,7 +129,7 @@ struct webgpu_context_struct { |
129 | 129 | webgpu_buf_pool set_rows_error_buf_pool; |
130 | 130 |
|
131 | 131 | wgpu::ComputePipeline memset_pipeline; |
132 | | - wgpu::ComputePipeline mul_mat_pipeline[15][2]; |
| 132 | + wgpu::ComputePipeline mul_mat_pipeline[17][2]; |
133 | 133 | wgpu::ComputePipeline set_rows_pipeline; |
134 | 134 | wgpu::ComputePipeline cpy_pipeline; |
135 | 135 |
|
@@ -595,12 +595,17 @@ static void ggml_webgpu_mul_mat(webgpu_context & ctx, ggml_tensor * src0, ggml_t |
595 | 595 | { .binding = 2, |
596 | 596 | .buffer = ggml_webgpu_tensor_buf(dst), |
597 | 597 | .offset = ggml_webgpu_tensor_align_offset(ctx, dst), |
598 | | - .size = ggml_webgpu_tensor_binding_size(ctx, dst) } |
| 598 | + .size = ggml_webgpu_tensor_binding_size(ctx, dst) }, |
| 599 | +// { .binding = 3, |
| 600 | +// .buffer = ctx->debug_dev_buf, |
| 601 | +// .offset = 0, |
| 602 | +// .size = ctx->debug_dev_buf.GetSize() } |
599 | 603 | }; |
600 | 604 |
|
601 | 605 | uint32_t wg_x = |
602 | 606 | (dst->ne[0] * dst->ne[1] * dst->ne[2] * dst->ne[3] + WEBGPU_MUL_MAT_WG_SIZE - 1) / WEBGPU_MUL_MAT_WG_SIZE; |
603 | 607 | ggml_backend_webgpu_build_and_enqueue(ctx, ctx->mul_mat_pipeline[src0->type][src1->type], params, entries, wg_x); |
| 608 | + //ggml_backend_webgpu_debug(ctx); |
604 | 609 | } |
605 | 610 |
|
606 | 611 | // Returns true if node has enqueued work into the queue, false otherwise |
@@ -910,7 +915,7 @@ static void ggml_webgpu_init_memset_pipeline(webgpu_context & webgpu_ctx) { |
910 | 915 | } |
911 | 916 |
|
912 | 917 | static void ggml_webgpu_init_mul_mat_pipeline(webgpu_context & webgpu_ctx) { |
913 | | - webgpu_pipeline_info pipeline_infos[13] = { |
| 918 | + webgpu_pipeline_info pipeline_infos[14] = { |
914 | 919 | { .name = "mul_mat_f32_f32", |
915 | 920 | .shader_code = wgsl_mul_mat_f32_f32, |
916 | 921 | .src0_type = GGML_TYPE_F32, |
@@ -962,7 +967,11 @@ static void ggml_webgpu_init_mul_mat_pipeline(webgpu_context & webgpu_ctx) { |
962 | 967 | { .name = "mul_mat_q6_k_f32", |
963 | 968 | .shader_code = wgsl_mul_mat_q6_k_f32, |
964 | 969 | .src0_type = GGML_TYPE_Q6_K, |
965 | | - .src1_type = GGML_TYPE_F32 } |
| 970 | + .src1_type = GGML_TYPE_F32 }, |
| 971 | + { .name = "mul_mat_iq2_xxs_f32", |
| 972 | + .shader_code = wgsl_mul_mat_iq2_xxs_f32, |
| 973 | + .src0_type = GGML_TYPE_IQ2_XXS, |
| 974 | + .src1_type = GGML_TYPE_F32 } |
966 | 975 | }; |
967 | 976 |
|
968 | 977 | for (auto & pipeline_info : pipeline_infos) { |
@@ -1064,6 +1073,7 @@ static bool ggml_backend_webgpu_device_supports_op(ggml_backend_dev_t dev, const |
1064 | 1073 | case GGML_TYPE_Q4_K: |
1065 | 1074 | case GGML_TYPE_Q5_K: |
1066 | 1075 | case GGML_TYPE_Q6_K: |
| 1076 | + case GGML_TYPE_IQ2_XXS: |
1067 | 1077 | return true; |
1068 | 1078 | default: |
1069 | 1079 | return false; |
|
0 commit comments