@@ -324,6 +324,7 @@ void InlineRaytracing::EmitPreTraceRayFence(RTBuilder &IRB, Value *rqObject) {
324
324
325
325
void InlineRaytracing::LowerIntrinsics (Function &F) {
326
326
SmallVector<RayQueryIntrinsicBase *> RQInstructions;
327
+ SmallVector<RayQueryInfoIntrinsic *> RQInfoInstructions;
327
328
328
329
for (auto &I : instructions (F)) {
329
330
if (isa<RayQueryIntrinsicBase>(&I))
@@ -366,6 +367,11 @@ void InlineRaytracing::LowerIntrinsics(Function &F) {
366
367
data.CommittedDataLocation = IRB.getInt32 (CommittedHit);
367
368
368
369
setPackedData (IRB, rqObject, data);
370
+
371
+ // for the cross-block optimization purposes, split basic block to avoid using stale shadow stack
372
+ if (allowCrossBlockLoadVectorization ())
373
+ IRB.createTriangleFlow (IRB.getFalse (), RQI);
374
+
369
375
break ;
370
376
}
371
377
case GenISAIntrinsic::GenISA_TraceRaySyncProceedHL: {
@@ -497,6 +503,10 @@ void InlineRaytracing::LowerIntrinsics(Function &F) {
497
503
setPackedData (IRB, rqObject, data);
498
504
499
505
506
+ // for the cross-block optimization purposes, split basic block to avoid using stale shadow stack
507
+ if (allowCrossBlockLoadVectorization ())
508
+ IRB.createTriangleFlow (IRB.getFalse (), RQI);
509
+
500
510
RQI->replaceAllUsesWith (result);
501
511
break ;
502
512
}
@@ -521,39 +531,9 @@ void InlineRaytracing::LowerIntrinsics(Function &F) {
521
531
case GenISAIntrinsic::GenISA_TraceRayInlineCandidateType:
522
532
RQI->replaceAllUsesWith (getPackedData (IRB, rqObject).CandidateType );
523
533
break ;
524
- case GenISAIntrinsic::GenISA_TraceRayInlineRayInfo: {
525
-
526
- auto *I = cast<RayQueryInfoIntrinsic>(RQI);
527
- auto data = getPackedData (IRB, rqObject);
528
- auto *loadCommittedFromPotential = IRB.CreateICmpEQ (data.CommittedDataLocation , IRB.getInt32 (PotentialHit),
529
- VALUE_NAME (" loadCommittedInfoFromPotentialHit" ));
530
-
531
- auto *shaderTy = IRB.CreateSelect (loadCommittedFromPotential, IRB.getInt32 (AnyHit),
532
- IRB.getInt32 (I->isCommitted () ? ClosestHit : AnyHit));
533
-
534
- switch (I->getInfoKind ()) {
535
- default :
536
- I->replaceAllUsesWith (IRB.lowerRayInfo (getStackPtr (IRB, rqObject, true ), I, shaderTy, std::nullopt ));
537
- break ;
538
- // leave this in for now, until we prove we don't need the hack anymore
539
- case GEOMETRY_INDEX: {
540
- bool specialPattern = false ;
541
- if (I->isCommitted () && IGC_GET_FLAG_VALUE (ForceRTShortCircuitingOR)) {
542
- specialPattern = forceShortCurcuitingOR_CommittedGeomIdx (IRB, I);
543
- }
544
-
545
- Value *leafType = IRB.getLeafType (getStackPtr (IRB, rqObject, true ), IRB.getInt1 (I->isCommitted ()));
546
- Value *geoIndex = IRB.getGeometryIndex (
547
- getStackPtr (IRB, rqObject, true ), I, leafType,
548
- IRB.getInt32 (I->isCommitted () ? CallableShaderTypeMD::ClosestHit : CallableShaderTypeMD::AnyHit),
549
- !specialPattern);
550
- IGC_ASSERT_MESSAGE (I->getType ()->isIntegerTy (), " Invalid geometryIndex type!" );
551
- I->replaceAllUsesWith (geoIndex);
552
- break ;
553
- }
554
- }
534
+ case GenISAIntrinsic::GenISA_TraceRayInlineRayInfo:
535
+ RQInfoInstructions.push_back (cast<RayQueryInfoIntrinsic>(RQI));
555
536
break ;
556
- }
557
537
case GenISAIntrinsic::GenISA_TraceRayInlineCommitNonOpaqueTriangleHit: {
558
538
auto data = getPackedData (IRB, rqObject);
559
539
auto *notDone = IRB.CreateAnd ({IRB.CreateICmpEQ (data.HasAcceptHitAndEndSearchFlag , IRB.getInt32 (0 )),
@@ -584,6 +564,11 @@ void InlineRaytracing::LowerIntrinsics(Function &F) {
584
564
data.CommittedStatus = IRB.getInt32 (RTStackFormat::COMMITTED_STATUS::COMMITTED_PROCEDURAL_PRIMITIVE_HIT);
585
565
586
566
setPackedData (IRB, rqObject, data);
567
+
568
+ // for the cross-block optimization purposes, split basic block to avoid using stale shadow stack
569
+ if (allowCrossBlockLoadVectorization ())
570
+ IRB.createTriangleFlow (IRB.getFalse (), RQI);
571
+
587
572
break ;
588
573
}
589
574
default :
@@ -592,6 +577,55 @@ void InlineRaytracing::LowerIntrinsics(Function &F) {
592
577
}
593
578
}
594
579
580
+ // first map every rayinfo instruction to a stack pointer
581
+ // we do it this way because rayinfo lowering itself will produce blocks
582
+ // so a 2-pass method will yield better results
583
+ MapVector<RayQueryInfoIntrinsic *, RTBuilder::SyncStackPointerVal *> RQInfoStackMap;
584
+
585
+ for (auto *I : RQInfoInstructions) {
586
+
587
+ auto *convertRQHandleFromRQObject = cast<Instruction>(I->getQueryObjIndex ());
588
+ auto *rqObject = convertRQHandleFromRQObject->getOperand (0 );
589
+ IRB.SetInsertPoint (I);
590
+ RQInfoStackMap.insert (std::make_pair (I, getStackPtr (IRB, rqObject, true )));
591
+ }
592
+
593
+ // now we can actually lower rayinfo instructions
594
+ for (const auto & [I, stackPtr] : RQInfoStackMap) {
595
+
596
+ IRB.SetInsertPoint (I);
597
+ auto *convertRQHandleFromRQObject = cast<Instruction>(I->getQueryObjIndex ());
598
+ auto *rqObject = convertRQHandleFromRQObject->getOperand (0 );
599
+ auto data = getPackedData (IRB, rqObject);
600
+ auto *loadCommittedFromPotential = IRB.CreateICmpEQ (data.CommittedDataLocation , IRB.getInt32 (PotentialHit),
601
+ VALUE_NAME (" loadCommittedInfoFromPotentialHit" ));
602
+
603
+ auto *shaderTy = IRB.CreateSelect (loadCommittedFromPotential, IRB.getInt32 (AnyHit),
604
+ IRB.getInt32 (I->isCommitted () ? ClosestHit : AnyHit));
605
+
606
+ switch (I->getInfoKind ()) {
607
+ default :
608
+ I->replaceAllUsesWith (IRB.lowerRayInfo (stackPtr, I, shaderTy, std::nullopt ));
609
+ break ;
610
+ // leave this in for now, until we prove we don't need the hack anymore
611
+ case GEOMETRY_INDEX: {
612
+ bool specialPattern = false ;
613
+ if (I->isCommitted () && IGC_GET_FLAG_VALUE (ForceRTShortCircuitingOR)) {
614
+ specialPattern = forceShortCurcuitingOR_CommittedGeomIdx (IRB, I);
615
+ }
616
+
617
+ Value *leafType = IRB.getLeafType (stackPtr, IRB.getInt1 (I->isCommitted ()));
618
+ Value *geoIndex = IRB.getGeometryIndex (
619
+ stackPtr, I, leafType,
620
+ IRB.getInt32 (I->isCommitted () ? CallableShaderTypeMD::ClosestHit : CallableShaderTypeMD::AnyHit),
621
+ !specialPattern);
622
+ IGC_ASSERT_MESSAGE (I->getType ()->isIntegerTy (), " Invalid geometryIndex type!" );
623
+ I->replaceAllUsesWith (geoIndex);
624
+ break ;
625
+ }
626
+ }
627
+ }
628
+
595
629
llvm::for_each (RQInstructions, [](RayQueryIntrinsicBase *I) {
596
630
auto *RQHandle = cast<Instruction>(I->getQueryObjIndex ());
597
631
I->eraseFromParent ();
0 commit comments