@@ -1428,6 +1428,10 @@ struct GpuPipelineConfig {
14281428 // For example, this can include names like "NAVI10", "RX 5700", etc.
14291429 std::vector<std::string> device_names;
14301430
1431+ // Mapping of pipeline names to their specific subgroup sizes.
1432+ // Example: {"soft_max_f32", 64}.
1433+ std::unordered_map<std::string, uint32_t > pipelines;
1434+
14311435 // Default subgroup size for this GPU.
14321436 // Defaults to 0 if not explicitly provided.
14331437 uint32_t default_subgroup_size = 0 ;
@@ -1437,14 +1441,23 @@ struct GpuPipelineConfig {
14371441static std::vector<GpuPipelineConfig> gpu_pipeline_configs = {
14381442 {
14391443 {" NAVI10" , " NAVI14" , " RX 5700" , " RX 5600" , " RX 5500" },
1444+ {
1445+ {" soft_max_f32" , 64 }, {" soft_max_f32_wg512" , 64 },
1446+ {" soft_max_f32_f16" , 64 }, {" soft_max_f32_f16_wg512" , 64 },
1447+ {" im2col_f32" , 64 }, {" im2col_f32_f16" , 64 },
1448+ },
14401449 32
14411450 },
14421451};
14431452
1444- static uint32_t get_subgroup_size (const std::string &device_name) {
1453+ static uint32_t get_subgroup_size (const std::string &pipeline_name, const std::string & device_name) {
14451454 for (const auto &config : gpu_pipeline_configs) {
14461455 for (const auto &alias : config.device_names ) {
14471456 if (device_name.find (alias) != std::string::npos) {
1457+ auto pipIt = config.pipelines .find (pipeline_name);
1458+ if (pipIt != config.pipelines .end () && pipIt->second != 0 ) {
1459+ return pipIt->second ;
1460+ }
14481461 return config.default_subgroup_size ;
14491462 }
14501463 }
@@ -1582,7 +1595,7 @@ static void ggml_vk_load_shaders(vk_device& device) {
15821595 uint32_t parameter_count, uint32_t push_constant_size, std::array<uint32_t , 3 > wg_denoms, const std::vector<uint32_t >& specialization_constants,
15831596 uint32_t align, bool disable_robustness = false , bool require_full_subgroups = false , uint32_t required_subgroup_size = 0 ) {
15841597
1585- required_subgroup_size = get_subgroup_size (device_name);
1598+ required_subgroup_size = get_subgroup_size (name, device_name);
15861599
15871600 if (!pipeline) {
15881601 pipeline = std::make_shared<vk_pipeline_struct>();
@@ -2735,7 +2748,7 @@ static void ggml_vk_print_gpu_info(size_t idx) {
27352748 subgroup_props.pNext = &driver_props;
27362749 physical_device.getProperties2 (&props2);
27372750
2738- uint32_t default_subgroup_size = get_subgroup_size (props2.properties .deviceName .data ());
2751+ uint32_t default_subgroup_size = get_subgroup_size (" " , props2.properties .deviceName .data ());
27392752 const size_t subgroup_size = (default_subgroup_size != 0 ) ? default_subgroup_size : subgroup_props.subgroupSize ;
27402753
27412754 const bool uma = props2.properties .deviceType == vk::PhysicalDeviceType::eIntegratedGpu;
0 commit comments