Skip to content

Commit 58f78ff

Browse files
jaladreipsigcbot
authored andcommitted
Avoid double-storing the done bit in new raytracing
After initializating the ray, we don't have to store the done bit again. So let's just skip that part. Also fixes a small bug with proceed result not being inserted in a correct block
1 parent 47a5820 commit 58f78ff

File tree

2 files changed

+42
-7
lines changed

2 files changed

+42
-7
lines changed

IGC/AdaptorCommon/RayTracing/AutoGenRTStackAccessPrivateOS.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2541,7 +2541,7 @@ void _createTraceRayInlinePrologue_Xe(Value* arg_0, Value* arg_1, Value* arg_2,
25412541
auto* V_37 = CreateInBoundsGEP(_igc_struct_RTStackFormat__RTStack(*Ctx.getModule()), arg_0, { getInt64(0), getInt32(0), getInt32(3) });
25422542
CreateStore(getInt32(0), V_37);
25432543
auto* V_38 = CreateInBoundsGEP(_igc_struct_RTStackFormat__RTStack(*Ctx.getModule()), arg_0, { getInt64(0), getInt32(1), getInt32(3) });
2544-
CreateStore(getInt32(0), V_38);
2544+
CreateStore(getInt32(268435456), V_38);
25452545
CreateBr(_JoinBB);
25462546
V_21->addIncoming(V_20, BB_9);
25472547
V_21->addIncoming(arg_3, BB_8);
@@ -2594,7 +2594,7 @@ void _createTraceRayInlinePrologue_Xe3(Value* arg_0, Value* arg_1, Value* arg_2,
25942594
auto* V_33 = CreateInBoundsGEP(_igc_struct_RTStackFormat__RTStack_8(*Ctx.getModule()), arg_0, { getInt64(0), getInt32(0), getInt32(3) });
25952595
CreateStore(getInt32(0), V_33);
25962596
auto* V_34 = CreateInBoundsGEP(_igc_struct_RTStackFormat__RTStack_8(*Ctx.getModule()), arg_0, { getInt64(0), getInt32(1), getInt32(3) });
2597-
CreateStore(getInt32(0), V_34);
2597+
CreateStore(getInt32(268435456), V_34);
25982598
CreateBr(_JoinBB);
25992599
V_18->addIncoming(V_17, BB_9);
26002600
V_18->addIncoming(arg_3, BB_8);

IGC/AdaptorCommon/RayTracing/NewTraceRayInlineLoweringPass.cpp

Lines changed: 40 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -414,11 +414,41 @@ void InlineRaytracing::LowerIntrinsics(Function &F) {
414414
auto *result =
415415
IRB.CreatePHI(IRB.getInt1Ty(), 2, VALUE_NAME("ProceedResult"));
416416
result->addIncoming(IRB.getFalse(), result->getParent());
417-
auto [proceedBB, _] = IRB.createTriangleFlow(
417+
auto [proceedBB, abortBB] = IRB.createTriangleFlow(
418418
doNotAbort, result, VALUE_NAME("NotAbortedProceedBB"),
419419
VALUE_NAME("PostProceedBB"));
420420

421-
IRB.SetInsertPoint(proceedBB->getTerminator());
421+
auto *entryBB = proceedBB->getUniquePredecessor();
422+
entryBB->getTerminator()->eraseFromParent();
423+
424+
// clang-format off
425+
// there are 4 cases here:
426+
// 1. the ray was just initialized:
427+
// enter the traversal block
428+
// 2. we are mid traversal and app did not commit any hit since last
429+
// proceed
430+
// set the done bit to 1
431+
// enter the traversal block
432+
// 3. we are mid traversal and app has committed a hit since last proceed
433+
// set the done bit to 1
434+
// set the valid bit to 1
435+
// 4. we are done with traversal
436+
// skip the traversal block
437+
// clang-format on
438+
439+
// Create a block to handle 2 and 3
440+
auto *setDoneBB = BasicBlock::Create(
441+
*m_pCGCtx->getLLVMContext(), VALUE_NAME("setDoneBB"), &F, proceedBB);
442+
443+
IRB.SetInsertPoint(entryBB);
444+
auto *switchI = IRB.CreateSwitch(traceRayCtrl, setDoneBB, 2);
445+
switchI->addCase(IRB.getInt32(TRACE_RAY_DONE), abortBB);
446+
switchI->addCase(IRB.getInt32(TRACE_RAY_INITIAL), proceedBB);
447+
448+
// add unreachable to the new block so we can split it
449+
IRB.SetInsertPoint(setDoneBB);
450+
auto *IP = IRB.CreateUnreachable();
451+
IRB.SetInsertPoint(IP);
422452

423453
{
424454
// make sure the done bit is set to 0
@@ -429,21 +459,26 @@ void InlineRaytracing::LowerIntrinsics(Function &F) {
429459
auto *cond =
430460
IRB.CreateICmpEQ(data.CommittedDataLocation,
431461
IRB.getInt32(CommittedDataLocation::PotentialHit));
432-
auto *IP = &*IRB.GetInsertPoint();
433462

434463
Instruction *ifTerm, *elseTerm;
435464

436465
SplitBlockAndInsertIfThenElse(cond, IP, &ifTerm, &elseTerm);
437466
IRB.SetInsertPoint(ifTerm);
438467
IRB.setDoneBit(getStackPtr(IRB, rqObject), false);
439468
IRB.setHitValid(getStackPtr(IRB, rqObject), false);
469+
IRB.CreateBr(proceedBB);
470+
ifTerm->eraseFromParent();
440471

441472
IRB.SetInsertPoint(elseTerm);
442473
IRB.setDoneBit(getStackPtr(IRB, rqObject), false);
474+
IRB.CreateBr(proceedBB);
475+
elseTerm->eraseFromParent();
443476

444477
IRB.SetInsertPoint(IP);
445478
}
446479

480+
IRB.SetInsertPoint(proceedBB->getFirstNonPHI());
481+
447482
EmitPreTraceRayFence(IRB, rqObject);
448483

449484

@@ -490,7 +525,7 @@ void InlineRaytracing::LowerIntrinsics(Function &F) {
490525
1);
491526

492527
auto *notDone = IRB.CreateICmpEQ(proceedFurther, IRB.getInt32(1));
493-
result->addIncoming(notDone, proceedBB);
528+
result->addIncoming(notDone, IRB.GetInsertBlock());
494529

495530
data.TraceRayCtrl =
496531
IRB.CreateSelect(notDone, IRB.getInt32(TRACE_RAY_CONTINUE),
@@ -501,7 +536,7 @@ void InlineRaytracing::LowerIntrinsics(Function &F) {
501536
data.CommittedDataLocation = IRB.getInt32(CommittedHit);
502537
} else {
503538
auto *notDone = IRB.isDoneBitNotSet(getStackPtr(IRB, rqObject), false);
504-
result->addIncoming(notDone, proceedBB);
539+
result->addIncoming(notDone, IRB.GetInsertBlock());
505540

506541
data.TraceRayCtrl =
507542
IRB.CreateSelect(notDone, IRB.getInt32(TRACE_RAY_CONTINUE),

0 commit comments

Comments
 (0)