Skip to content

Commit ab59dc6

Browse files
Keenutssudonatalie
andauthored
opt: prevent meld to merge block with MaximalReconvergence (KhronosGroup#5557)
The extension SPV_KHR_maximal_reconvergence adds more constraints around the merge blocks, and how the control flow can be altered. The one we address here is explained in the following part of the spec: Note: This means that the instructions in a break block will execute as if they were still diverged according to the loop iteration. This restricts potential transformations an implementation may perform on the IR to match shader author expectations. Similarly, instructions in the loop construct cannot be moved into the continue construct unless it can be proven that invocations are always converged. Until the optimizer is clever enough to determine if the invocation have already converged, we shall not meld a block which branches to a merge block into it, as it might move some instructions outside of the convergence region. This behavior being only required with the extension, this commit behavior change is gated by the extension. This means using wave operations without the maximal reconvergence extension might lead to undefined behaviors. Co-authored-by: Natalie Chouinard <[email protected]>
1 parent 6c11c2b commit ab59dc6

File tree

2 files changed

+153
-0
lines changed

2 files changed

+153
-0
lines changed

source/opt/block_merge_util.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,17 @@ bool CanMergeWithSuccessor(IRContext* context, BasicBlock* block) {
9898
return false;
9999
}
100100

101+
// Note: This means that the instructions in a break block will execute as if
102+
// they were still diverged according to the loop iteration. This restricts
103+
// potential transformations an implementation may perform on the IR to match
104+
// shader author expectations. Similarly, instructions in the loop construct
105+
// cannot be moved into the continue construct unless it can be proven that
106+
// invocations are always converged.
107+
if (succ_is_merge && context->get_feature_mgr()->HasExtension(
108+
kSPV_KHR_maximal_reconvergence)) {
109+
return false;
110+
}
111+
101112
if (pred_is_merge && IsContinue(context, lab_id)) {
102113
// Cannot merge a continue target with a merge block.
103114
return false;

test/opt/block_merge_test.cpp

Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1320,6 +1320,148 @@ OpFunctionEnd
13201320
SinglePassRunAndMatch<BlockMergePass>(text, true);
13211321
}
13221322

1323+
TEST_F(BlockMergeTest, MaximalReconvergenceNoMeldToMerge) {
1324+
const std::string text = R"(
1325+
OpCapability Shader
1326+
OpCapability GroupNonUniformBallot
1327+
OpCapability GroupNonUniformArithmetic
1328+
OpExtension "SPV_KHR_maximal_reconvergence"
1329+
OpMemoryModel Logical GLSL450
1330+
OpEntryPoint GLCompute %main "main" %gl_GlobalInvocationID %output
1331+
OpExecutionMode %main LocalSize 1 1 1
1332+
OpExecutionMode %main MaximallyReconvergesKHR
1333+
OpSource HLSL 660
1334+
OpName %type_RWStructuredBuffer_uint "type.RWStructuredBuffer.uint"
1335+
OpName %output "output"
1336+
OpName %main "main"
1337+
OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
1338+
OpDecorate %output DescriptorSet 0
1339+
OpDecorate %output Binding 0
1340+
OpDecorate %_runtimearr_uint ArrayStride 4
1341+
OpMemberDecorate %type_RWStructuredBuffer_uint 0 Offset 0
1342+
OpDecorate %type_RWStructuredBuffer_uint Block
1343+
%uint = OpTypeInt 32 0
1344+
%bool = OpTypeBool
1345+
%int = OpTypeInt 32 1
1346+
%int_0 = OpConstant %int 0
1347+
%int_1 = OpConstant %int 1
1348+
%_runtimearr_uint = OpTypeRuntimeArray %uint
1349+
%type_RWStructuredBuffer_uint = OpTypeStruct %_runtimearr_uint
1350+
%_ptr_StorageBuffer_type_RWStructuredBuffer_uint = OpTypePointer StorageBuffer %type_RWStructuredBuffer_uint
1351+
%v3uint = OpTypeVector %uint 3
1352+
%_ptr_Input_v3uint = OpTypePointer Input %v3uint
1353+
%void = OpTypeVoid
1354+
%15 = OpTypeFunction %void
1355+
%uint_3 = OpConstant %uint 3
1356+
%_ptr_StorageBuffer_uint = OpTypePointer StorageBuffer %uint
1357+
%output = OpVariable %_ptr_StorageBuffer_type_RWStructuredBuffer_uint StorageBuffer
1358+
%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
1359+
%main = OpFunction %void None %15
1360+
%18 = OpLabel
1361+
%19 = OpLoad %v3uint %gl_GlobalInvocationID
1362+
OpBranch %20
1363+
%20 = OpLabel
1364+
OpLoopMerge %21 %22 None
1365+
; CHECK: OpLoopMerge [[merge:%\w+]] [[continue:%\w+]]
1366+
OpBranch %23
1367+
%23 = OpLabel
1368+
%24 = OpCompositeExtract %uint %19 0
1369+
%25 = OpGroupNonUniformBroadcastFirst %uint %uint_3 %24
1370+
%26 = OpIEqual %bool %24 %25
1371+
OpSelectionMerge %27 None
1372+
OpBranchConditional %26 %28 %27
1373+
%28 = OpLabel
1374+
%29 = OpGroupNonUniformIAdd %int %uint_3 Reduce %int_1
1375+
%30 = OpBitcast %uint %29
1376+
OpBranch %21
1377+
; CHECK: [[t1:%\w+]] = OpGroupNonUniformIAdd %int %uint_3 Reduce %int_1
1378+
; CHECK-NEXT: [[t2:%\w+]] = OpBitcast %uint [[t1]]
1379+
; CHECK-NEXT: OpBranch [[merge]]
1380+
%27 = OpLabel
1381+
OpBranch %22
1382+
%22 = OpLabel
1383+
OpBranch %20
1384+
%21 = OpLabel
1385+
%31 = OpAccessChain %_ptr_StorageBuffer_uint %output %int_0 %24
1386+
OpStore %31 %30
1387+
OpReturn
1388+
OpFunctionEnd
1389+
)";
1390+
1391+
SetTargetEnv(SPV_ENV_VULKAN_1_3);
1392+
SinglePassRunAndMatch<BlockMergePass>(text, true);
1393+
}
1394+
1395+
TEST_F(BlockMergeTest, NoMaximalReconvergenceMeldToMerge) {
1396+
const std::string text = R"(
1397+
OpCapability Shader
1398+
OpCapability GroupNonUniformBallot
1399+
OpCapability GroupNonUniformArithmetic
1400+
OpMemoryModel Logical GLSL450
1401+
OpEntryPoint GLCompute %main "main" %gl_GlobalInvocationID %output
1402+
OpExecutionMode %main LocalSize 1 1 1
1403+
OpSource HLSL 660
1404+
OpName %type_RWStructuredBuffer_uint "type.RWStructuredBuffer.uint"
1405+
OpName %output "output"
1406+
OpName %main "main"
1407+
OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
1408+
OpDecorate %output DescriptorSet 0
1409+
OpDecorate %output Binding 0
1410+
OpDecorate %_runtimearr_uint ArrayStride 4
1411+
OpMemberDecorate %type_RWStructuredBuffer_uint 0 Offset 0
1412+
OpDecorate %type_RWStructuredBuffer_uint Block
1413+
%uint = OpTypeInt 32 0
1414+
%bool = OpTypeBool
1415+
%int = OpTypeInt 32 1
1416+
%int_0 = OpConstant %int 0
1417+
%int_1 = OpConstant %int 1
1418+
%_runtimearr_uint = OpTypeRuntimeArray %uint
1419+
%type_RWStructuredBuffer_uint = OpTypeStruct %_runtimearr_uint
1420+
%_ptr_StorageBuffer_type_RWStructuredBuffer_uint = OpTypePointer StorageBuffer %type_RWStructuredBuffer_uint
1421+
%v3uint = OpTypeVector %uint 3
1422+
%_ptr_Input_v3uint = OpTypePointer Input %v3uint
1423+
%void = OpTypeVoid
1424+
%15 = OpTypeFunction %void
1425+
%uint_3 = OpConstant %uint 3
1426+
%_ptr_StorageBuffer_uint = OpTypePointer StorageBuffer %uint
1427+
%output = OpVariable %_ptr_StorageBuffer_type_RWStructuredBuffer_uint StorageBuffer
1428+
%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
1429+
%main = OpFunction %void None %15
1430+
%18 = OpLabel
1431+
%19 = OpLoad %v3uint %gl_GlobalInvocationID
1432+
OpBranch %20
1433+
%20 = OpLabel
1434+
OpLoopMerge %21 %22 None
1435+
; CHECK: OpLoopMerge [[merge:%\w+]] [[continue:%\w+]]
1436+
OpBranch %23
1437+
%23 = OpLabel
1438+
%24 = OpCompositeExtract %uint %19 0
1439+
%25 = OpGroupNonUniformBroadcastFirst %uint %uint_3 %24
1440+
%26 = OpIEqual %bool %24 %25
1441+
OpSelectionMerge %27 None
1442+
OpBranchConditional %26 %28 %27
1443+
%28 = OpLabel
1444+
%29 = OpGroupNonUniformIAdd %int %uint_3 Reduce %int_1
1445+
%30 = OpBitcast %uint %29
1446+
OpBranch %21
1447+
; CHECK: [[merge]] = OpLabel
1448+
; CHECK-NEXT: [[t1:%\w+]] = OpGroupNonUniformIAdd %int %uint_3 Reduce %int_1
1449+
; CHECK-NEXT: [[t2:%\w+]] = OpBitcast %uint [[t1]]
1450+
%27 = OpLabel
1451+
OpBranch %22
1452+
%22 = OpLabel
1453+
OpBranch %20
1454+
%21 = OpLabel
1455+
%31 = OpAccessChain %_ptr_StorageBuffer_uint %output %int_0 %24
1456+
OpStore %31 %30
1457+
OpReturn
1458+
OpFunctionEnd
1459+
)";
1460+
1461+
SetTargetEnv(SPV_ENV_VULKAN_1_3);
1462+
SinglePassRunAndMatch<BlockMergePass>(text, true);
1463+
}
1464+
13231465
// TODO(greg-lunarg): Add tests to verify handling of these cases:
13241466
//
13251467
// More complex control flow

0 commit comments

Comments
 (0)