Skip to content

Commit 29e8104

Browse files
committed
vulkan: fine tuned RDNA1 subgroup sizes
1 parent afb5c2d commit 29e8104

File tree

1 file changed

+22
-9
lines changed

1 file changed

+22
-9
lines changed

ggml/src/ggml-vulkan/ggml-vulkan.cpp

Lines changed: 22 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1521,26 +1521,32 @@ struct GpuPipelineConfig {
15211521
};
15221522

15231523
// Common pipeline configuration for RDNA GPUs.
1524-
static const std::unordered_map<std::string, uint32_t> rdna_pipelines = {
1525-
{"soft_max_f32", 64}, {"soft_max_f32_wg512", 64},
1526-
{"soft_max_f32_f16", 64}, {"soft_max_f32_f16_wg512", 64},
1527-
{"im2col_f32", 64}, {"im2col_f32_f16", 64},
1524+
static const std::unordered_map<std::string, uint32_t> rdna_common_pipelines = {
1525+
{"soft_max", 64}, {"im2col", 64},
15281526
};
1527+
1528+
// RDNA1 pipeline configuration.
1529+
static std::unordered_map<std::string, uint32_t> rdna1_pipelines = rdna_common_pipelines;
1530+
static const bool rdna1_initialized = (rdna1_pipelines.insert({
1531+
{"argmax", 64}, {"mul_mat_vec", 64},
1532+
{"mul_mat_vec_f16", 32}, {"mul_mat_vec_f32_f16", 32}
1533+
}), true);
1534+
15291535
static constexpr uint32_t RDNA_DEFAULT_SUBGROUP_SIZE = 32;
15301536

15311537
// Define configurations for different GPUs.
15321538
static std::vector<GpuPipelineConfig> gpu_pipeline_configs = {
15331539
{
15341540
vk_device_architecture::AMD_RDNA1,
15351541
{
1536-
rdna_pipelines,
1542+
rdna1_pipelines,
15371543
},
15381544
RDNA_DEFAULT_SUBGROUP_SIZE
15391545
},
15401546
{
15411547
vk_device_architecture::AMD_RDNA2,
15421548
{
1543-
rdna_pipelines,
1549+
rdna_common_pipelines,
15441550
},
15451551
RDNA_DEFAULT_SUBGROUP_SIZE
15461552
},
@@ -1550,14 +1556,21 @@ static uint32_t get_subgroup_size(const std::string &pipeline_name, const vk_dev
15501556
for (const auto &config : gpu_pipeline_configs) {
15511557
if (config.arch == arch) {
15521558
auto pipIt = config.pipelines.find(pipeline_name);
1553-
if (pipIt != config.pipelines.end() && pipIt->second != 0) {
1559+
if (pipIt != config.pipelines.end()) {
15541560
return pipIt->second;
15551561
}
1562+
std::vector<std::pair<std::string, uint32_t>> sorted_pipelines(config.pipelines.begin(), config.pipelines.end());
1563+
std::sort(sorted_pipelines.begin(), sorted_pipelines.end(),
1564+
[](const auto &a, const auto &b) { return a.first.size() > b.first.size(); });
1565+
for (const auto &entry : sorted_pipelines) {
1566+
if (pipeline_name.find(entry.first) != std::string::npos) {
1567+
return entry.second;
1568+
}
1569+
}
15561570
return config.default_subgroup_size;
15571571
}
15581572
}
1559-
// If no matching configuration is found, return 0.
1560-
return 0;
1573+
return 0; // If no matching configuration is found
15611574
}
15621575

15631576
static void ggml_vk_load_shaders(vk_device& device) {

0 commit comments

Comments
 (0)