Skip to content

Commit 1aa26d7

Browse files
committed
set min and max subgroup size in any case
1 parent 8b13f2d commit 1aa26d7

File tree

1 file changed

+6
-3
lines changed

1 file changed

+6
-3
lines changed

ggml/src/ggml-vulkan/ggml-vulkan.cpp

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1848,7 +1848,7 @@ static void ggml_vk_load_shaders(vk_device& device) {
18481848

18491849
// AMD GCN graphics cards perform best when the number of rows per shader is doubled
18501850
uint32_t rm = 1;
1851-
if ((device->subgroup_size_control) && (device->vendor_id == VK_VENDOR_ID_AMD) && (device->subgroup_min_size == 64) && (device->subgroup_max_size == 64))
1851+
if ((device->vendor_id == VK_VENDOR_ID_AMD) && (device->subgroup_min_size == 64) && (device->subgroup_max_size == 64))
18521852
rm = 2;
18531853

18541854
// computing additional rows per workgroup is a benefit for Q4_0 -> Q5_1, but not for Q8_0.
@@ -2249,13 +2249,16 @@ static vk_device ggml_vk_get_device(size_t idx) {
22492249

22502250
device->pipeline_robustness = pl_robustness_features.pipelineRobustness;
22512251

2252+
if (device->subgroup_size_control) {
2253+
device->subgroup_min_size = subgroup_size_control_props.minSubgroupSize;
2254+
device->subgroup_max_size = subgroup_size_control_props.maxSubgroupSize;
2255+
}
2256+
22522257
device->subgroup_size_control = device->subgroup_size_control &&
22532258
(subgroup_size_control_props.requiredSubgroupSizeStages & vk::ShaderStageFlagBits::eCompute) &&
22542259
subgroup_size_control_features.subgroupSizeControl;
22552260

22562261
if (device->subgroup_size_control) {
2257-
device->subgroup_min_size = subgroup_size_control_props.minSubgroupSize;
2258-
device->subgroup_max_size = subgroup_size_control_props.maxSubgroupSize;
22592262
device->subgroup_require_full_support = subgroup_size_control_features.computeFullSubgroups;
22602263
device_extensions.push_back("VK_EXT_subgroup_size_control");
22612264
}

0 commit comments

Comments
 (0)