@@ -1423,49 +1423,6 @@ static bool ggml_vk_matmul_shmem_support(const vk_device& device, const std::vec
14231423 return supported;
14241424}
14251425
1426- struct GpuPipelineConfig {
1427- // List of all aliases for a given GPU.
1428- // For example, this can include names like "NAVI10", "RX 5700", etc.
1429- std::vector<std::string> device_names;
1430-
1431- // Mapping of pipeline names to their specific subgroup sizes.
1432- // Example: {"soft_max_f32", 64}.
1433- std::unordered_map<std::string, uint32_t > pipelines;
1434-
1435- // Default subgroup size for this GPU.
1436- // Defaults to 0 if not explicitly provided.
1437- uint32_t default_subgroup_size = 0 ;
1438- };
1439-
1440- // Define configurations for different GPUs.
1441- static std::vector<GpuPipelineConfig> gpu_pipeline_configs = {
1442- {
1443- {" NAVI10" , " NAVI14" , " RX 5700" , " RX 5600" , " RX 5500" },
1444- {
1445- {" soft_max_f32" , 64 }, {" soft_max_f32_wg512" , 64 },
1446- {" soft_max_f32_f16" , 64 }, {" soft_max_f32_f16_wg512" , 64 },
1447- {" im2col_f32" , 64 }, {" im2col_f32_f16" , 64 },
1448- },
1449- 32
1450- },
1451- };
1452-
1453- static uint32_t get_subgroup_size (const std::string &pipeline_name, const std::string &device_name) {
1454- for (const auto &config : gpu_pipeline_configs) {
1455- for (const auto &alias : config.device_names ) {
1456- if (device_name.find (alias) != std::string::npos) {
1457- auto pipIt = config.pipelines .find (pipeline_name);
1458- if (pipIt != config.pipelines .end () && pipIt->second != 0 ) {
1459- return pipIt->second ;
1460- }
1461- return config.default_subgroup_size ;
1462- }
1463- }
1464- }
1465- // If no matching configuration is found, return 0.
1466- return 0 ;
1467- }
1468-
14691426static void ggml_vk_load_shaders (vk_device& device) {
14701427 VK_LOG_DEBUG (" ggml_vk_load_shaders(" << device->name << " )" );
14711428
@@ -1586,17 +1543,11 @@ static void ggml_vk_load_shaders(vk_device& device) {
15861543 device->pipeline_matmul_id_f32 = std::make_shared<vk_matmul_pipeline_struct>();
15871544 }
15881545
1589- vk::PhysicalDeviceProperties2 props2;
1590- device->physical_device .getProperties2 (&props2);
1591- std::string device_name = props2.properties .deviceName .data ();
1592-
15931546 std::vector<std::future<void >> compiles;
15941547 auto const &ggml_vk_create_pipeline = [&](vk_device& device, vk_pipeline& pipeline, const std::string &name, size_t spv_size, const void * spv_data, const std::string &entrypoint,
15951548 uint32_t parameter_count, uint32_t push_constant_size, std::array<uint32_t , 3 > wg_denoms, const std::vector<uint32_t >& specialization_constants,
15961549 uint32_t align, bool disable_robustness = false , bool require_full_subgroups = false , uint32_t required_subgroup_size = 0 ) {
15971550
1598- required_subgroup_size = get_subgroup_size (name, device_name);
1599-
16001551 if (!pipeline) {
16011552 pipeline = std::make_shared<vk_pipeline_struct>();
16021553 pipeline->name = name;
@@ -2723,6 +2674,36 @@ static vk_device ggml_vk_get_device(size_t idx) {
27232674 return vk_instance.devices [idx];
27242675}
27252676
2677+ struct GpuPipelineConfig {
2678+ // List of all aliases for a given GPU.
2679+ // For example, this can include names like "NAVI10", "RX 5700", etc.
2680+ std::vector<std::string> device_names;
2681+
2682+ // Default subgroup size for this GPU.
2683+ // Defaults to 0 if not explicitly provided.
2684+ uint32_t default_subgroup_size = 0 ;
2685+ };
2686+
2687+ // Define configurations for different GPUs.
2688+ static std::vector<GpuPipelineConfig> gpu_pipeline_configs = {
2689+ {
2690+ {" NAVI10" , " NAVI14" , " RX 5700" , " RX 5600" , " RX 5500" },
2691+ 32
2692+ },
2693+ };
2694+
2695+ static uint32_t get_subgroup_size (const std::string &device_name) {
2696+ for (const auto &config : gpu_pipeline_configs) {
2697+ for (const auto &alias : config.device_names ) {
2698+ if (device_name.find (alias) != std::string::npos) {
2699+ return config.default_subgroup_size ;
2700+ }
2701+ }
2702+ }
2703+ // If no matching configuration is found, return 0.
2704+ return 0 ;
2705+ }
2706+
27262707static void ggml_vk_print_gpu_info (size_t idx) {
27272708 GGML_ASSERT (idx < vk_instance.device_indices .size ());
27282709 size_t dev_num = vk_instance.device_indices [idx];
@@ -2748,7 +2729,9 @@ static void ggml_vk_print_gpu_info(size_t idx) {
27482729 subgroup_props.pNext = &driver_props;
27492730 physical_device.getProperties2 (&props2);
27502731
2751- const size_t subgroup_size = subgroup_props.subgroupSize ;
2732+ uint32_t default_subgroup_size = get_subgroup_size (props2.properties .deviceName .data ());
2733+ const size_t subgroup_size = (default_subgroup_size != 0 ) ? default_subgroup_size : subgroup_props.subgroupSize ;
2734+
27522735 const bool uma = props2.properties .deviceType == vk::PhysicalDeviceType::eIntegratedGpu;
27532736
27542737 bool fp16_storage = false ;
0 commit comments