File tree Expand file tree Collapse file tree 1 file changed +6
-3
lines changed Expand file tree Collapse file tree 1 file changed +6
-3
lines changed Original file line number Diff line number Diff line change @@ -1848,7 +1848,7 @@ static void ggml_vk_load_shaders(vk_device& device) {
18481848
18491849 // AMD GCN graphics cards perform best when the number of rows per shader is doubled
18501850 uint32_t rm = 1 ;
1851- if ((device->subgroup_size_control ) && (device-> vendor_id == VK_VENDOR_ID_AMD) && (device->subgroup_min_size == 64 ) && (device->subgroup_max_size == 64 ))
1851+ if ((device->vendor_id == VK_VENDOR_ID_AMD) && (device->subgroup_min_size == 64 ) && (device->subgroup_max_size == 64 ))
18521852 rm = 2 ;
18531853
18541854 // computing additional rows per workgroup is a benefit for Q4_0 -> Q5_1, but not for Q8_0.
@@ -2249,13 +2249,16 @@ static vk_device ggml_vk_get_device(size_t idx) {
22492249
22502250 device->pipeline_robustness = pl_robustness_features.pipelineRobustness ;
22512251
2252+ if (device->subgroup_size_control ) {
2253+ device->subgroup_min_size = subgroup_size_control_props.minSubgroupSize ;
2254+ device->subgroup_max_size = subgroup_size_control_props.maxSubgroupSize ;
2255+ }
2256+
22522257 device->subgroup_size_control = device->subgroup_size_control &&
22532258 (subgroup_size_control_props.requiredSubgroupSizeStages & vk::ShaderStageFlagBits::eCompute) &&
22542259 subgroup_size_control_features.subgroupSizeControl ;
22552260
22562261 if (device->subgroup_size_control ) {
2257- device->subgroup_min_size = subgroup_size_control_props.minSubgroupSize ;
2258- device->subgroup_max_size = subgroup_size_control_props.maxSubgroupSize ;
22592262 device->subgroup_require_full_support = subgroup_size_control_features.computeFullSubgroups ;
22602263 device_extensions.push_back (" VK_EXT_subgroup_size_control" );
22612264 }
You can’t perform that action at this time.
0 commit comments