@@ -1485,12 +1485,12 @@ static bool ggml_vk_matmul_shmem_support(const vk_device& device, const std::vec
14851485}
14861486
14871487struct  GpuPipelineConfig  {
1488-     //  List of all aliases for a given GPU .
1489-     //  For example, this can include names like "NAVI10", "RX 5700", etc. 
1490-     std::vector<std::string> device_names ;
1488+     //  GPU architecture identifier .
1489+     //  Example: vk_device_architecture::AMD_GCN 
1490+     vk_device_architecture arch ;
14911491
14921492    //  Mapping of pipeline names to their specific subgroup sizes.
1493-     //  Example: {"soft_max_f32", 64}. 
1493+     //  Example: {"soft_max_f32", 64}
14941494    std::unordered_map<std::string, uint32_t > pipelines;
14951495
14961496    //  Default subgroup size for this GPU.
@@ -1501,7 +1501,7 @@ struct GpuPipelineConfig {
15011501//  Define configurations for different GPUs.
15021502static  std::vector<GpuPipelineConfig> gpu_pipeline_configs = {
15031503    {
1504-         { " NAVI10 " ,  " NAVI14 " ,  " RX 5700 " ,  " RX 5600 " ,  " RX 5500 " } ,
1504+         vk_device_architecture::AMD_RDNA1 ,
15051505        {
15061506            {" soft_max_f32"  , 64 }, {" soft_max_f32_wg512"  , 64 },
15071507            {" soft_max_f32_f16"  , 64 }, {" soft_max_f32_f16_wg512"  , 64 },
@@ -1511,16 +1511,14 @@ static std::vector<GpuPipelineConfig> gpu_pipeline_configs = {
15111511    },
15121512};
15131513
1514- static  uint32_t  get_subgroup_size (const  std::string &pipeline_name, const  std::string &device_name ) {
1514+ static  uint32_t  get_subgroup_size (const  std::string &pipeline_name, const  vk_device_architecture &arch ) {
15151515    for  (const  auto  &config : gpu_pipeline_configs) {
1516-         for  (const  auto  &alias : config.device_names ) {
1517-             if  (device_name.find (alias) != std::string::npos) {
1518-                 auto  pipIt = config.pipelines .find (pipeline_name);
1519-                 if  (pipIt != config.pipelines .end () && pipIt->second  != 0 ) {
1520-                     return  pipIt->second ;
1521-                 }
1522-                 return  config.default_subgroup_size ;
1516+         if  (config.arch  == arch) {
1517+             auto  pipIt = config.pipelines .find (pipeline_name);
1518+             if  (pipIt != config.pipelines .end () && pipIt->second  != 0 ) {
1519+                 return  pipIt->second ;
15231520            }
1521+             return  config.default_subgroup_size ;
15241522        }
15251523    }
15261524    //  If no matching configuration is found, return 0.
@@ -1647,16 +1645,12 @@ static void ggml_vk_load_shaders(vk_device& device) {
16471645        device->pipeline_matmul_id_f32  = std::make_shared<vk_matmul_pipeline_struct>();
16481646    }
16491647
1650-     vk::PhysicalDeviceProperties2 props2;
1651-     device->physical_device .getProperties2 (&props2);
1652-     std::string device_name = props2.properties .deviceName .data ();
1653- 
16541648    std::vector<std::future<void >> compiles;
16551649    auto  const  &ggml_vk_create_pipeline = [&](vk_device& device, vk_pipeline& pipeline, const  std::string &name, size_t  spv_size, const  void * spv_data, const  std::string &entrypoint,
16561650                                              uint32_t  parameter_count, uint32_t  push_constant_size, std::array<uint32_t , 3 > wg_denoms, const  std::vector<uint32_t >& specialization_constants,
16571651                                              uint32_t  align, bool  disable_robustness = false , bool  require_full_subgroups = false , uint32_t  required_subgroup_size = 0 ) {
16581652
1659-         required_subgroup_size = get_subgroup_size (name, device_name );
1653+         required_subgroup_size = get_subgroup_size (name, device-> architecture );
16601654
16611655        if  (!pipeline) {
16621656            pipeline = std::make_shared<vk_pipeline_struct>();
@@ -2810,7 +2804,8 @@ static void ggml_vk_print_gpu_info(size_t idx) {
28102804    subgroup_props.pNext  = &driver_props;
28112805    physical_device.getProperties2 (&props2);
28122806
2813-     uint32_t  default_subgroup_size = get_subgroup_size (" "  , props2.properties .deviceName .data ());
2807+     vk_device_architecture arch = get_device_architecture (physical_device);
2808+     uint32_t  default_subgroup_size = get_subgroup_size (" "  , arch);
28142809    const  size_t  subgroup_size = (default_subgroup_size != 0 ) ? default_subgroup_size : subgroup_props.subgroupSize ;
28152810
28162811    const  bool  uma = props2.properties .deviceType  == vk::PhysicalDeviceType::eIntegratedGpu;
0 commit comments