@@ -1521,26 +1521,32 @@ struct GpuPipelineConfig {
15211521};
15221522
15231523// Common pipeline configuration for RDNA GPUs.
1524- static const std::unordered_map<std::string, uint32_t > rdna_pipelines = {
1525- {" soft_max_f32" , 64 }, {" soft_max_f32_wg512" , 64 },
1526- {" soft_max_f32_f16" , 64 }, {" soft_max_f32_f16_wg512" , 64 },
1527- {" im2col_f32" , 64 }, {" im2col_f32_f16" , 64 },
1524+ static const std::unordered_map<std::string, uint32_t > rdna_common_pipelines = {
1525+ {" soft_max" , 64 }, {" im2col" , 64 },
15281526};
1527+
1528+ // RDNA1 pipeline configuration.
1529+ static std::unordered_map<std::string, uint32_t > rdna1_pipelines = rdna_common_pipelines;
1530+ static const bool rdna1_initialized = (rdna1_pipelines.insert({
1531+ {" argmax" , 64 }, {" mul_mat_vec" , 64 },
1532+ {" mul_mat_vec_f16" , 32 }, {" mul_mat_vec_f32_f16" , 32 }
1533+ }), true );
1534+
15291535static constexpr uint32_t RDNA_DEFAULT_SUBGROUP_SIZE = 32 ;
15301536
15311537// Define configurations for different GPUs.
15321538static std::vector<GpuPipelineConfig> gpu_pipeline_configs = {
15331539 {
15341540 vk_device_architecture::AMD_RDNA1,
15351541 {
1536- rdna_pipelines ,
1542+ rdna1_pipelines ,
15371543 },
15381544 RDNA_DEFAULT_SUBGROUP_SIZE
15391545 },
15401546 {
15411547 vk_device_architecture::AMD_RDNA2,
15421548 {
1543- rdna_pipelines ,
1549+ rdna_common_pipelines ,
15441550 },
15451551 RDNA_DEFAULT_SUBGROUP_SIZE
15461552 },
@@ -1550,14 +1556,21 @@ static uint32_t get_subgroup_size(const std::string &pipeline_name, const vk_dev
15501556 for (const auto &config : gpu_pipeline_configs) {
15511557 if (config.arch == arch) {
15521558 auto pipIt = config.pipelines .find (pipeline_name);
1553- if (pipIt != config.pipelines .end () && pipIt-> second != 0 ) {
1559+ if (pipIt != config.pipelines .end ()) {
15541560 return pipIt->second ;
15551561 }
1562+ std::vector<std::pair<std::string, uint32_t >> sorted_pipelines (config.pipelines .begin (), config.pipelines .end ());
1563+ std::sort (sorted_pipelines.begin (), sorted_pipelines.end (),
1564+ [](const auto &a, const auto &b) { return a.first .size () > b.first .size (); });
1565+ for (const auto &entry : sorted_pipelines) {
1566+ if (pipeline_name.find (entry.first ) != std::string::npos) {
1567+ return entry.second ;
1568+ }
1569+ }
15561570 return config.default_subgroup_size ;
15571571 }
15581572 }
1559- // If no matching configuration is found, return 0.
1560- return 0 ;
1573+ return 0 ; // If no matching configuration is found
15611574}
15621575
15631576static void ggml_vk_load_shaders (vk_device& device) {
0 commit comments