@@ -229,6 +229,7 @@ struct vk_device_struct {
229229 vk_pipeline pipeline_cpy_f32_f32, pipeline_cpy_f32_f16, pipeline_cpy_f16_f16;
230230 vk_pipeline pipeline_contig_cpy_f32_f32, pipeline_contig_cpy_f32_f16, pipeline_contig_cpy_f16_f16;
231231 vk_pipeline pipeline_cpy_f32_quant[GGML_TYPE_COUNT];
232+ vk_pipeline pipeline_cpy_quant_f32[GGML_TYPE_COUNT];
232233 vk_pipeline pipeline_norm_f32;
233234 vk_pipeline pipeline_group_norm_f32;
234235 vk_pipeline pipeline_rms_norm_f32;
@@ -1973,6 +1974,13 @@ static void ggml_vk_load_shaders(vk_device& device) {
19731974 ggml_vk_create_pipeline (device, device->pipeline_cpy_f32_quant [GGML_TYPE_Q8_0], " cpy_f32_q8_0" , cpy_f32_q8_0_len, cpy_f32_q8_0_data, " main" , 2 , sizeof (vk_op_unary_push_constants), {(uint32_t )ggml_blck_size (GGML_TYPE_Q8_0), 1 , 1 }, {}, 1 );
19741975 ggml_vk_create_pipeline (device, device->pipeline_cpy_f32_quant [GGML_TYPE_IQ4_NL], " cpy_f32_iq4_nl" , cpy_f32_iq4_nl_len, cpy_f32_iq4_nl_data, " main" , 2 , sizeof (vk_op_unary_push_constants), {(uint32_t )ggml_blck_size (GGML_TYPE_IQ4_NL), 1 , 1 }, {}, 1 );
19751976
1977+ ggml_vk_create_pipeline (device, device->pipeline_cpy_quant_f32 [GGML_TYPE_Q4_0], " cpy_q4_0_f32" , cpy_q4_0_f32_len, cpy_q4_0_f32_data, " main" , 2 , sizeof (vk_op_unary_push_constants), {(uint32_t )ggml_blck_size (GGML_TYPE_Q4_0), 1 , 1 }, {}, 1 );
1978+ ggml_vk_create_pipeline (device, device->pipeline_cpy_quant_f32 [GGML_TYPE_Q4_1], " cpy_q4_1_f32" , cpy_q4_1_f32_len, cpy_q4_1_f32_data, " main" , 2 , sizeof (vk_op_unary_push_constants), {(uint32_t )ggml_blck_size (GGML_TYPE_Q4_1), 1 , 1 }, {}, 1 );
1979+ ggml_vk_create_pipeline (device, device->pipeline_cpy_quant_f32 [GGML_TYPE_Q5_0], " cpy_q5_0_f32" , cpy_q5_0_f32_len, cpy_q5_0_f32_data, " main" , 2 , sizeof (vk_op_unary_push_constants), {(uint32_t )ggml_blck_size (GGML_TYPE_Q5_0), 1 , 1 }, {}, 1 );
1980+ ggml_vk_create_pipeline (device, device->pipeline_cpy_quant_f32 [GGML_TYPE_Q5_1], " cpy_q5_1_f32" , cpy_q5_1_f32_len, cpy_q5_1_f32_data, " main" , 2 , sizeof (vk_op_unary_push_constants), {(uint32_t )ggml_blck_size (GGML_TYPE_Q5_1), 1 , 1 }, {}, 1 );
1981+ ggml_vk_create_pipeline (device, device->pipeline_cpy_quant_f32 [GGML_TYPE_Q8_0], " cpy_q8_0_f32" , cpy_q8_0_f32_len, cpy_q8_0_f32_data, " main" , 2 , sizeof (vk_op_unary_push_constants), {(uint32_t )ggml_blck_size (GGML_TYPE_Q8_0), 1 , 1 }, {}, 1 );
1982+ ggml_vk_create_pipeline (device, device->pipeline_cpy_quant_f32 [GGML_TYPE_IQ4_NL], " cpy_iq4_nl_f32" , cpy_iq4_nl_f32_len, cpy_iq4_nl_f32_data, " main" , 2 , sizeof (vk_op_unary_push_constants), {(uint32_t )ggml_blck_size (GGML_TYPE_IQ4_NL), 1 , 1 }, {}, 1 );
1983+
19761984 ggml_vk_create_pipeline (device, device->pipeline_add_f32 , " add_f32" , add_f32_len, add_f32_data, " main" , 3 , sizeof (vk_op_binary_push_constants), {512 , 1 , 1 }, {0 }, 1 );
19771985 ggml_vk_create_pipeline (device, device->pipeline_add_f32_norepeat , " add_f32_norepeat" , add_f32_len, add_f32_data, " main" , 3 , sizeof (vk_op_binary_push_constants), {512 , 1 , 1 }, {1 }, 1 );
19781986 ggml_vk_create_pipeline (device, device->pipeline_add_f16_f32_f16 , " add_f16_f32_f16" , add_f16_f32_f16_len, add_f16_f32_f16_data, " main" , 3 , sizeof (vk_op_binary_push_constants), {512 , 1 , 1 }, {0 }, 1 );
@@ -3711,6 +3719,20 @@ static vk_pipeline ggml_vk_get_cpy_pipeline(ggml_backend_vk_context * ctx, const
37113719 }
37123720 }
37133721
3722+ if (to == GGML_TYPE_F32) {
3723+ switch (src->type ) {
3724+ case GGML_TYPE_Q4_0:
3725+ case GGML_TYPE_Q4_1:
3726+ case GGML_TYPE_Q5_0:
3727+ case GGML_TYPE_Q5_1:
3728+ case GGML_TYPE_Q8_0:
3729+ case GGML_TYPE_IQ4_NL:
3730+ return ctx->device ->pipeline_cpy_quant_f32 [src->type ];
3731+ default :
3732+ break ;
3733+ }
3734+ }
3735+
37143736 std::cerr << " Missing CPY op for types: " << ggml_type_name (src->type ) << " " << ggml_type_name (to) << std::endl;
37153737 GGML_ABORT (" fatal error" );
37163738}
@@ -5181,7 +5203,7 @@ static void ggml_vk_op_f32(ggml_backend_vk_context * ctx, vk_context& subctx, co
51815203 }
51825204 std::cerr << " ), (" << dst << " , name=" << dst->name << " , type=" << dst->type << " , ne0=" << dst->ne [0 ] << " , ne1=" << dst->ne [1 ] << " , ne2=" << dst->ne [2 ] << " , ne3=" << dst->ne [3 ] << " , nb0=" << dst->nb [0 ] << " , nb1=" << dst->nb [1 ] << " , nb2=" << dst->nb [2 ] << " , nb3=" << dst->nb [3 ];
51835205 std::cerr << " ), " << ggml_op_name (op) << " , " << (dryrun ? " dryrun" : " " ) << " )" );
5184- GGML_ASSERT (op == GGML_OP_GET_ROWS || (!ggml_is_quantized (src0->type ) && (src1 == nullptr || !ggml_is_quantized (src1->type )))); // NOLINT
5206+ GGML_ASSERT (op == GGML_OP_GET_ROWS || op == GGML_OP_CPY || (!ggml_is_quantized (src0->type ) && (src1 == nullptr || !ggml_is_quantized (src1->type )))); // NOLINT
51855207 GGML_ASSERT (ggml_vk_op_supports_incontiguous (op) || ggml_vk_dim01_contiguous (src0)); // NOLINT
51865208 GGML_ASSERT (dst->buffer != nullptr );
51875209 const uint64_t ne00 = src0->ne [0 ];
@@ -7942,6 +7964,20 @@ static bool ggml_backend_vk_device_supports_op(ggml_backend_dev_t dev, const ggm
79427964 break ;
79437965 }
79447966 }
7967+ if (src1_type == GGML_TYPE_F32) {
7968+ switch (src0_type) {
7969+ case GGML_TYPE_Q4_0:
7970+ case GGML_TYPE_Q4_1:
7971+ case GGML_TYPE_Q5_0:
7972+ case GGML_TYPE_Q5_1:
7973+ case GGML_TYPE_Q8_0:
7974+ case GGML_TYPE_IQ4_NL:
7975+ return true ;
7976+ default :
7977+ break ;
7978+ }
7979+ }
7980+
79457981 if (src0_type == GGML_TYPE_F16 && src1_type == GGML_TYPE_F16) {
79467982 return true ;
79477983 }
0 commit comments