@@ -1423,6 +1423,36 @@ static bool ggml_vk_matmul_shmem_support(const vk_device& device, const std::vec
14231423 return supported;
14241424}
14251425
1426+ struct GpuPipelineConfig {
1427+ // List of all aliases for a given GPU.
1428+ // For example, this can include names like "NAVI10", "RX 5700", etc.
1429+ std::vector<std::string> device_names;
1430+
1431+ // Default subgroup size for this GPU.
1432+ // Defaults to 0 if not explicitly provided.
1433+ uint32_t default_subgroup_size = 0 ;
1434+ };
1435+
1436+ // Define configurations for different GPUs.
1437+ static std::vector<GpuPipelineConfig> gpu_pipeline_configs = {
1438+ {
1439+ {" NAVI10" , " NAVI14" , " RX 5700" , " RX 5600" , " RX 5500" },
1440+ 32
1441+ },
1442+ };
1443+
1444+ static uint32_t get_subgroup_size (const std::string &device_name) {
1445+ for (const auto &config : gpu_pipeline_configs) {
1446+ for (const auto &alias : config.device_names ) {
1447+ if (device_name.find (alias) != std::string::npos) {
1448+ return config.default_subgroup_size ;
1449+ }
1450+ }
1451+ }
1452+ // If no matching configuration is found, return 0.
1453+ return 0 ;
1454+ }
1455+
14261456static void ggml_vk_load_shaders (vk_device& device) {
14271457 VK_LOG_DEBUG (" ggml_vk_load_shaders(" << device->name << " )" );
14281458
@@ -1543,11 +1573,17 @@ static void ggml_vk_load_shaders(vk_device& device) {
15431573 device->pipeline_matmul_id_f32 = std::make_shared<vk_matmul_pipeline_struct>();
15441574 }
15451575
1576+ vk::PhysicalDeviceProperties2 props2;
1577+ device->physical_device .getProperties2 (&props2);
1578+ std::string device_name = props2.properties .deviceName .data ();
1579+
15461580 std::vector<std::future<void >> compiles;
15471581 auto const &ggml_vk_create_pipeline = [&](vk_device& device, vk_pipeline& pipeline, const std::string &name, size_t spv_size, const void * spv_data, const std::string &entrypoint,
15481582 uint32_t parameter_count, uint32_t push_constant_size, std::array<uint32_t , 3 > wg_denoms, const std::vector<uint32_t >& specialization_constants,
15491583 uint32_t align, bool disable_robustness = false , bool require_full_subgroups = false , uint32_t required_subgroup_size = 0 ) {
15501584
1585+ required_subgroup_size = get_subgroup_size (device_name);
1586+
15511587 if (!pipeline) {
15521588 pipeline = std::make_shared<vk_pipeline_struct>();
15531589 pipeline->name = name;
@@ -2674,36 +2710,6 @@ static vk_device ggml_vk_get_device(size_t idx) {
26742710 return vk_instance.devices [idx];
26752711}
26762712
2677- struct GpuPipelineConfig {
2678- // List of all aliases for a given GPU.
2679- // For example, this can include names like "NAVI10", "RX 5700", etc.
2680- std::vector<std::string> device_names;
2681-
2682- // Default subgroup size for this GPU.
2683- // Defaults to 0 if not explicitly provided.
2684- uint32_t default_subgroup_size = 0 ;
2685- };
2686-
2687- // Define configurations for different GPUs.
2688- static std::vector<GpuPipelineConfig> gpu_pipeline_configs = {
2689- {
2690- {" NAVI10" , " NAVI14" , " RX 5700" , " RX 5600" , " RX 5500" },
2691- 32
2692- },
2693- };
2694-
2695- static uint32_t get_subgroup_size (const std::string &device_name) {
2696- for (const auto &config : gpu_pipeline_configs) {
2697- for (const auto &alias : config.device_names ) {
2698- if (device_name.find (alias) != std::string::npos) {
2699- return config.default_subgroup_size ;
2700- }
2701- }
2702- }
2703- // If no matching configuration is found, return 0.
2704- return 0 ;
2705- }
2706-
27072713static void ggml_vk_print_gpu_info (size_t idx) {
27082714 GGML_ASSERT (idx < vk_instance.device_indices .size ());
27092715 size_t dev_num = vk_instance.device_indices [idx];
0 commit comments