Skip to content

Commit 4e642bc

Browse files
committed
vulkan: don't use std::string in load_shaders, to improve compile time
1 parent d4d8dbe commit 4e642bc

File tree

1 file changed

+7
-15
lines changed

1 file changed

+7
-15
lines changed

ggml/src/ggml-vulkan/ggml-vulkan.cpp

Lines changed: 7 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -2340,7 +2340,7 @@ static void ggml_vk_load_shaders(vk_device& device) {
23402340
}
23412341

23422342
std::vector<std::future<void>> compiles;
2343-
auto const &ggml_vk_create_pipeline = [&](vk_device& device, vk_pipeline& pipeline, const std::string &name, size_t spv_size, const void* spv_data, const std::string &entrypoint,
2343+
auto const &ggml_vk_create_pipeline = [&](vk_device& device, vk_pipeline& pipeline, const char *name, size_t spv_size, const void* spv_data, const char *entrypoint,
23442344
uint32_t parameter_count, uint32_t push_constant_size, std::array<uint32_t, 3> wg_denoms, const std::vector<uint32_t>& specialization_constants,
23452345
uint32_t align, bool disable_robustness = false, bool require_full_subgroups = false, uint32_t required_subgroup_size = 0) {
23462346

@@ -3112,9 +3112,9 @@ static void ggml_vk_load_shaders(vk_device& device) {
31123112

31133113
for (uint32_t i = 0; i < p021_max_gqa_ratio; ++i) {
31143114
if (device->subgroup_arithmetic && device->subgroup_require_full_support) {
3115-
ggml_vk_create_pipeline(device, device->pipeline_mul_mat_vec_p021_f16_f32[i], "mul_mat_vec_p021_f16_f32"+std::to_string(i+1), mul_mat_vec_p021_f16_f32_subgroup_add_len, mul_mat_vec_p021_f16_f32_subgroup_add_data, "main", 3, 6 * sizeof(uint32_t), {1, 1, 1}, {device->subgroup_size, i + 1}, 1, true, true);
3115+
ggml_vk_create_pipeline(device, device->pipeline_mul_mat_vec_p021_f16_f32[i], "mul_mat_vec_p021_f16_f32", mul_mat_vec_p021_f16_f32_subgroup_add_len, mul_mat_vec_p021_f16_f32_subgroup_add_data, "main", 3, 6 * sizeof(uint32_t), {1, 1, 1}, {device->subgroup_size, i + 1}, 1, true, true);
31163116
} else {
3117-
ggml_vk_create_pipeline(device, device->pipeline_mul_mat_vec_p021_f16_f32[i], "mul_mat_vec_p021_f16_f32"+std::to_string(i+1), mul_mat_vec_p021_f16_f32_len, mul_mat_vec_p021_f16_f32_data, "main", 3, 6 * sizeof(uint32_t), {1, 1, 1}, {device->subgroup_size, i + 1}, 1, true);
3117+
ggml_vk_create_pipeline(device, device->pipeline_mul_mat_vec_p021_f16_f32[i], "mul_mat_vec_p021_f16_f32", mul_mat_vec_p021_f16_f32_len, mul_mat_vec_p021_f16_f32_data, "main", 3, 6 * sizeof(uint32_t), {1, 1, 1}, {device->subgroup_size, i + 1}, 1, true);
31183118
}
31193119
}
31203120
ggml_vk_create_pipeline(device, device->pipeline_mul_mat_vec_nc_f16_f32, "mul_mat_vec_nc_f16_f32", mul_mat_vec_nc_f16_f32_len, mul_mat_vec_nc_f16_f32_data, "main", 3, 12 * sizeof(uint32_t), {1, 1, 1}, {}, 1);
@@ -3187,19 +3187,11 @@ static void ggml_vk_load_shaders(vk_device& device) {
31873187
ggml_vk_create_pipeline(device, device->pipeline_cpy_quant_f32[GGML_TYPE_Q8_0], "cpy_q8_0_f32", cpy_q8_0_f32_len, cpy_q8_0_f32_data, "main", 2, sizeof(vk_op_unary_push_constants), {(uint32_t)ggml_blck_size(GGML_TYPE_Q8_0), 1, 1}, {}, 1);
31883188
ggml_vk_create_pipeline(device, device->pipeline_cpy_quant_f32[GGML_TYPE_IQ4_NL], "cpy_iq4_nl_f32", cpy_iq4_nl_f32_len, cpy_iq4_nl_f32_data, "main", 2, sizeof(vk_op_unary_push_constants), {(uint32_t)ggml_blck_size(GGML_TYPE_IQ4_NL), 1, 1}, {}, 1);
31893189

3190-
auto get_suffix = [](bool src0_f16, bool src1_f16, bool dst_f16) {
3191-
std::string s;
3192-
s += std::string(src0_f16 ? "_f16" : "_f32");
3193-
s += std::string(src1_f16 ? "_f16" : "_f32");
3194-
s += std::string(dst_f16 ? "_f16" : "_f32");
3195-
return s;
3196-
};
3197-
31983190
bool rte = device->float_controls_rte_fp16;
31993191
#define CREATE_BINARY(name, namemod, spec, bindings) \
32003192
for (int s0 : {0,1}) for (int s1 : {0,1}) for (int d : {0,1}) \
32013193
ggml_vk_create_pipeline(device, device->pipeline_ ## name ## namemod[s0][s1][d], \
3202-
#name + get_suffix(s0, s1, d) + #namemod, name ## _len[s0][s1][d][rte], name ## _data[s0][s1][d][rte], \
3194+
#name #namemod, name ## _len[s0][s1][d][rte], name ## _data[s0][s1][d][rte], \
32033195
"main", (bindings), sizeof(vk_op_binary_push_constants), {512, 1, 1}, spec, 1);
32043196

32053197
CREATE_BINARY(add, , {0}, 4)
@@ -3216,8 +3208,8 @@ static void ggml_vk_load_shaders(vk_device& device) {
32163208

32173209
if (device->multi_add) {
32183210
for (uint32_t i = 0; i < MAX_FUSED_ADDS; ++i) {
3219-
ggml_vk_create_pipeline(device, device->pipeline_multi_add[i], "multi_add_f32_" + std::to_string(i+1), multi_add_f32_len, multi_add_f32_data, "main", MAX_PARAMETER_COUNT, sizeof(vk_op_multi_add_push_constants), {512, 1, 1}, {i+2}, 1);
3220-
ggml_vk_create_pipeline(device, device->pipeline_multi_add_rms[i], "multi_add_rms_f32_" + std::to_string(i+1), multi_add_rms_f32_len, multi_add_rms_f32_data, "main", MAX_PARAMETER_COUNT, sizeof(vk_op_multi_add_push_constants), {512, 1, 1}, {i+2}, 1);
3211+
ggml_vk_create_pipeline(device, device->pipeline_multi_add[i], "multi_add_f32_" , multi_add_f32_len, multi_add_f32_data, "main", MAX_PARAMETER_COUNT, sizeof(vk_op_multi_add_push_constants), {512, 1, 1}, {i+2}, 1);
3212+
ggml_vk_create_pipeline(device, device->pipeline_multi_add_rms[i], "multi_add_rms_f32_" , multi_add_rms_f32_len, multi_add_rms_f32_data, "main", MAX_PARAMETER_COUNT, sizeof(vk_op_multi_add_push_constants), {512, 1, 1}, {i+2}, 1);
32213213
}
32223214
}
32233215

@@ -3309,7 +3301,7 @@ static void ggml_vk_load_shaders(vk_device& device) {
33093301
}
33103302

33113303
for (uint32_t i = 0; i < num_argsort_pipelines; ++i) {
3312-
ggml_vk_create_pipeline(device, device->pipeline_argsort_f32[i], "argsort_f32_"+std::to_string(i), argsort_f32_len, argsort_f32_data, "main", 2, sizeof(vk_op_argsort_push_constants), {1u<<i, 1, 1}, {1u<<i, i}, 1, true);
3304+
ggml_vk_create_pipeline(device, device->pipeline_argsort_f32[i], "argsort_f32_", argsort_f32_len, argsort_f32_data, "main", 2, sizeof(vk_op_argsort_push_constants), {1u<<i, 1, 1}, {1u<<i, i}, 1, true);
33133305
}
33143306

33153307
ggml_vk_create_pipeline(device, device->pipeline_argmax_f32, "argmax_f32", argmax_f32_len, argmax_f32_data, "main", 2, sizeof(vk_op_push_constants), {1, 1, 1}, { device->subgroup_size }, 1);

0 commit comments

Comments
 (0)