-
Notifications
You must be signed in to change notification settings - Fork 13.6k
vulkan: improve im2col #11826
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
vulkan: improve im2col #11826
Changes from 5 commits
62733f2
35f6369
293edef
14ea4fa
04100e8
9036e7a
d151973
0e5dd68
27f1301
9c9b812
4a3988e
c49419f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1423,6 +1423,36 @@ static bool ggml_vk_matmul_shmem_support(const vk_device& device, const std::vec | |
| return supported; | ||
| } | ||
|
|
||
| // Define a configuration map per GPU. | ||
| // Outer key: GPU identifier (e.g. "RX 5700"). | ||
| // Inner map: key is pipeline name; value is the subgroup size. | ||
| static std::unordered_map<std::string, std::unordered_map<std::string, uint32_t>> gpu_pipeline_config = { | ||
| {"RX 5700", { | ||
| {"soft_max_f32", 64}, {"soft_max_f32_wg512", 64}, | ||
| {"soft_max_f32_f16", 64}, {"soft_max_f32_f16_wg512", 64}, | ||
| {"im2col_f32", 64}, {"im2col_f32_f16", 64}, | ||
| }} | ||
| }; | ||
|
|
||
| static uint32_t get_subgroup_size(const std::string &pipeline_name, const std::string &device_name) { | ||
| std::string foundKey; | ||
| for (const auto &entry : gpu_pipeline_config) { | ||
| if (device_name.find(entry.first) != std::string::npos) { | ||
| foundKey = entry.first; | ||
| break; | ||
| } | ||
| } | ||
| if (!foundKey.empty()) { | ||
| auto &pipelineMap = gpu_pipeline_config[foundKey]; | ||
| auto pipIt = pipelineMap.find(pipeline_name); | ||
| if (pipIt != pipelineMap.end() && pipIt->second != 0) { | ||
| return pipIt->second; | ||
| } | ||
| } | ||
| // If not defined, return 0. | ||
| return 0; | ||
| } | ||
|
|
||
| static void ggml_vk_load_shaders(vk_device& device) { | ||
| VK_LOG_DEBUG("ggml_vk_load_shaders(" << device->name << ")"); | ||
|
|
||
|
|
@@ -1543,11 +1573,20 @@ static void ggml_vk_load_shaders(vk_device& device) { | |
| device->pipeline_matmul_id_f32 = std::make_shared<vk_matmul_pipeline_struct>(); | ||
| } | ||
|
|
||
| vk::PhysicalDeviceProperties2 props2; | ||
| device->physical_device.getProperties2(&props2); | ||
| std::string device_name = props2.properties.deviceName.data(); | ||
|
||
|
|
||
| std::vector<std::future<void>> compiles; | ||
| auto const &ggml_vk_create_pipeline = [&](vk_device& device, vk_pipeline& pipeline, const std::string &name, size_t spv_size, const void* spv_data, const std::string &entrypoint, | ||
| uint32_t parameter_count, uint32_t push_constant_size, std::array<uint32_t, 3> wg_denoms, const std::vector<uint32_t>& specialization_constants, | ||
| uint32_t align, bool disable_robustness = false, bool require_full_subgroups = false, uint32_t required_subgroup_size = 0) { | ||
|
|
||
| required_subgroup_size = get_subgroup_size(name, device_name); | ||
| if (required_subgroup_size == 0) { | ||
| required_subgroup_size = (device_name.find("RX 5700") != std::string::npos) ? 32 : required_subgroup_size; | ||
| } | ||
|
|
||
| if (!pipeline) { | ||
| pipeline = std::make_shared<vk_pipeline_struct>(); | ||
| pipeline->name = name; | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.