@@ -1320,6 +1320,148 @@ OpFunctionEnd
1320
1320
SinglePassRunAndMatch<BlockMergePass>(text, true );
1321
1321
}
1322
1322
1323
+ TEST_F (BlockMergeTest, MaximalReconvergenceNoMeldToMerge) {
1324
+ const std::string text = R"(
1325
+ OpCapability Shader
1326
+ OpCapability GroupNonUniformBallot
1327
+ OpCapability GroupNonUniformArithmetic
1328
+ OpExtension "SPV_KHR_maximal_reconvergence"
1329
+ OpMemoryModel Logical GLSL450
1330
+ OpEntryPoint GLCompute %main "main" %gl_GlobalInvocationID %output
1331
+ OpExecutionMode %main LocalSize 1 1 1
1332
+ OpExecutionMode %main MaximallyReconvergesKHR
1333
+ OpSource HLSL 660
1334
+ OpName %type_RWStructuredBuffer_uint "type.RWStructuredBuffer.uint"
1335
+ OpName %output "output"
1336
+ OpName %main "main"
1337
+ OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
1338
+ OpDecorate %output DescriptorSet 0
1339
+ OpDecorate %output Binding 0
1340
+ OpDecorate %_runtimearr_uint ArrayStride 4
1341
+ OpMemberDecorate %type_RWStructuredBuffer_uint 0 Offset 0
1342
+ OpDecorate %type_RWStructuredBuffer_uint Block
1343
+ %uint = OpTypeInt 32 0
1344
+ %bool = OpTypeBool
1345
+ %int = OpTypeInt 32 1
1346
+ %int_0 = OpConstant %int 0
1347
+ %int_1 = OpConstant %int 1
1348
+ %_runtimearr_uint = OpTypeRuntimeArray %uint
1349
+ %type_RWStructuredBuffer_uint = OpTypeStruct %_runtimearr_uint
1350
+ %_ptr_StorageBuffer_type_RWStructuredBuffer_uint = OpTypePointer StorageBuffer %type_RWStructuredBuffer_uint
1351
+ %v3uint = OpTypeVector %uint 3
1352
+ %_ptr_Input_v3uint = OpTypePointer Input %v3uint
1353
+ %void = OpTypeVoid
1354
+ %15 = OpTypeFunction %void
1355
+ %uint_3 = OpConstant %uint 3
1356
+ %_ptr_StorageBuffer_uint = OpTypePointer StorageBuffer %uint
1357
+ %output = OpVariable %_ptr_StorageBuffer_type_RWStructuredBuffer_uint StorageBuffer
1358
+ %gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
1359
+ %main = OpFunction %void None %15
1360
+ %18 = OpLabel
1361
+ %19 = OpLoad %v3uint %gl_GlobalInvocationID
1362
+ OpBranch %20
1363
+ %20 = OpLabel
1364
+ OpLoopMerge %21 %22 None
1365
+ ; CHECK: OpLoopMerge [[merge:%\w+]] [[continue:%\w+]]
1366
+ OpBranch %23
1367
+ %23 = OpLabel
1368
+ %24 = OpCompositeExtract %uint %19 0
1369
+ %25 = OpGroupNonUniformBroadcastFirst %uint %uint_3 %24
1370
+ %26 = OpIEqual %bool %24 %25
1371
+ OpSelectionMerge %27 None
1372
+ OpBranchConditional %26 %28 %27
1373
+ %28 = OpLabel
1374
+ %29 = OpGroupNonUniformIAdd %int %uint_3 Reduce %int_1
1375
+ %30 = OpBitcast %uint %29
1376
+ OpBranch %21
1377
+ ; CHECK: [[t1:%\w+]] = OpGroupNonUniformIAdd %int %uint_3 Reduce %int_1
1378
+ ; CHECK-NEXT: [[t2:%\w+]] = OpBitcast %uint [[t1]]
1379
+ ; CHECK-NEXT: OpBranch [[merge]]
1380
+ %27 = OpLabel
1381
+ OpBranch %22
1382
+ %22 = OpLabel
1383
+ OpBranch %20
1384
+ %21 = OpLabel
1385
+ %31 = OpAccessChain %_ptr_StorageBuffer_uint %output %int_0 %24
1386
+ OpStore %31 %30
1387
+ OpReturn
1388
+ OpFunctionEnd
1389
+ )" ;
1390
+
1391
+ SetTargetEnv (SPV_ENV_VULKAN_1_3);
1392
+ SinglePassRunAndMatch<BlockMergePass>(text, true );
1393
+ }
1394
+
1395
+ TEST_F (BlockMergeTest, NoMaximalReconvergenceMeldToMerge) {
1396
+ const std::string text = R"(
1397
+ OpCapability Shader
1398
+ OpCapability GroupNonUniformBallot
1399
+ OpCapability GroupNonUniformArithmetic
1400
+ OpMemoryModel Logical GLSL450
1401
+ OpEntryPoint GLCompute %main "main" %gl_GlobalInvocationID %output
1402
+ OpExecutionMode %main LocalSize 1 1 1
1403
+ OpSource HLSL 660
1404
+ OpName %type_RWStructuredBuffer_uint "type.RWStructuredBuffer.uint"
1405
+ OpName %output "output"
1406
+ OpName %main "main"
1407
+ OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
1408
+ OpDecorate %output DescriptorSet 0
1409
+ OpDecorate %output Binding 0
1410
+ OpDecorate %_runtimearr_uint ArrayStride 4
1411
+ OpMemberDecorate %type_RWStructuredBuffer_uint 0 Offset 0
1412
+ OpDecorate %type_RWStructuredBuffer_uint Block
1413
+ %uint = OpTypeInt 32 0
1414
+ %bool = OpTypeBool
1415
+ %int = OpTypeInt 32 1
1416
+ %int_0 = OpConstant %int 0
1417
+ %int_1 = OpConstant %int 1
1418
+ %_runtimearr_uint = OpTypeRuntimeArray %uint
1419
+ %type_RWStructuredBuffer_uint = OpTypeStruct %_runtimearr_uint
1420
+ %_ptr_StorageBuffer_type_RWStructuredBuffer_uint = OpTypePointer StorageBuffer %type_RWStructuredBuffer_uint
1421
+ %v3uint = OpTypeVector %uint 3
1422
+ %_ptr_Input_v3uint = OpTypePointer Input %v3uint
1423
+ %void = OpTypeVoid
1424
+ %15 = OpTypeFunction %void
1425
+ %uint_3 = OpConstant %uint 3
1426
+ %_ptr_StorageBuffer_uint = OpTypePointer StorageBuffer %uint
1427
+ %output = OpVariable %_ptr_StorageBuffer_type_RWStructuredBuffer_uint StorageBuffer
1428
+ %gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
1429
+ %main = OpFunction %void None %15
1430
+ %18 = OpLabel
1431
+ %19 = OpLoad %v3uint %gl_GlobalInvocationID
1432
+ OpBranch %20
1433
+ %20 = OpLabel
1434
+ OpLoopMerge %21 %22 None
1435
+ ; CHECK: OpLoopMerge [[merge:%\w+]] [[continue:%\w+]]
1436
+ OpBranch %23
1437
+ %23 = OpLabel
1438
+ %24 = OpCompositeExtract %uint %19 0
1439
+ %25 = OpGroupNonUniformBroadcastFirst %uint %uint_3 %24
1440
+ %26 = OpIEqual %bool %24 %25
1441
+ OpSelectionMerge %27 None
1442
+ OpBranchConditional %26 %28 %27
1443
+ %28 = OpLabel
1444
+ %29 = OpGroupNonUniformIAdd %int %uint_3 Reduce %int_1
1445
+ %30 = OpBitcast %uint %29
1446
+ OpBranch %21
1447
+ ; CHECK: [[merge]] = OpLabel
1448
+ ; CHECK-NEXT: [[t1:%\w+]] = OpGroupNonUniformIAdd %int %uint_3 Reduce %int_1
1449
+ ; CHECK-NEXT: [[t2:%\w+]] = OpBitcast %uint [[t1]]
1450
+ %27 = OpLabel
1451
+ OpBranch %22
1452
+ %22 = OpLabel
1453
+ OpBranch %20
1454
+ %21 = OpLabel
1455
+ %31 = OpAccessChain %_ptr_StorageBuffer_uint %output %int_0 %24
1456
+ OpStore %31 %30
1457
+ OpReturn
1458
+ OpFunctionEnd
1459
+ )" ;
1460
+
1461
+ SetTargetEnv (SPV_ENV_VULKAN_1_3);
1462
+ SinglePassRunAndMatch<BlockMergePass>(text, true );
1463
+ }
1464
+
1323
1465
// TODO(greg-lunarg): Add tests to verify handling of these cases:
1324
1466
//
1325
1467
// More complex control flow
0 commit comments