Skip to content

Commit 43c3e6f

Browse files
committed
Initial vulkan subgroup size tuning for RDNA3
1 parent 7695541 commit 43c3e6f

File tree

1 file changed

+12
-1
lines changed

1 file changed

+12
-1
lines changed

ggml/src/ggml-vulkan/ggml-vulkan.cpp

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1531,6 +1531,15 @@ static std::vector<GpuPipelineConfig> gpu_pipeline_configs = {
15311531
},
15321532
32
15331533
},
1534+
{
1535+
vk_device_architecture::AMD_RDNA3,
1536+
{
1537+
{"soft_max_f32", 64}, {"soft_max_f32_wg512", 64},
1538+
{"soft_max_f32_f16", 64}, {"soft_max_f32_f16_wg512", 64},
1539+
{"im2col_f32", 64}, {"im2col_f32_f16", 64},
1540+
},
1541+
32
1542+
},
15341543
};
15351544

15361545
static uint32_t get_subgroup_size(const std::string &pipeline_name, const vk_device_architecture &arch) {
@@ -1673,7 +1682,9 @@ static void ggml_vk_load_shaders(vk_device& device) {
16731682
uint32_t parameter_count, uint32_t push_constant_size, std::array<uint32_t, 3> wg_denoms, const std::vector<uint32_t>& specialization_constants,
16741683
uint32_t align, bool disable_robustness = false, bool require_full_subgroups = false, uint32_t required_subgroup_size = 0) {
16751684

1676-
required_subgroup_size = get_subgroup_size(name, device->architecture);
1685+
if (!require_full_subgroups) {
1686+
required_subgroup_size = get_subgroup_size(name, device->architecture);
1687+
}
16771688

16781689
if (!pipeline) {
16791690
pipeline = std::make_shared<vk_pipeline_struct>();

0 commit comments

Comments
 (0)