@@ -42,6 +42,15 @@ class DXILIntrinsicExpansionLegacy : public ModulePass {
4242 static char ID; // Pass identification.
4343};
4444
45+ static bool resourceAccessNeeds64BitExpansion (Module *M, Type *OverloadTy,
46+ bool IsRaw) {
47+ if (IsRaw && M->getTargetTriple ().getDXILVersion () > VersionTuple (1 , 2 ))
48+ return false ;
49+
50+ Type *ScalarTy = OverloadTy->getScalarType ();
51+ return ScalarTy->isDoubleTy () || ScalarTy->isIntegerTy (64 );
52+ }
53+
4554static bool isIntrinsicExpansion (Function &F) {
4655 switch (F.getIntrinsicID ()) {
4756 case Intrinsic::abs:
@@ -71,17 +80,20 @@ static bool isIntrinsicExpansion(Function &F) {
7180 case Intrinsic::vector_reduce_add:
7281 case Intrinsic::vector_reduce_fadd:
7382 return true ;
74- case Intrinsic::dx_resource_load_typedbuffer: {
75- // We need to handle i64, doubles, and vectors of them.
76- Type *ScalarTy =
77- F.getReturnType ()->getStructElementType (0 )->getScalarType ();
78- return ScalarTy->isDoubleTy () || ScalarTy->isIntegerTy (64 );
79- }
80- case Intrinsic::dx_resource_store_typedbuffer: {
81- // We need to handle i64 and doubles and vectors of i64 and doubles.
82- Type *ScalarTy = F.getFunctionType ()->getParamType (2 )->getScalarType ();
83- return ScalarTy->isDoubleTy () || ScalarTy->isIntegerTy (64 );
84- }
83+ case Intrinsic::dx_resource_load_rawbuffer:
84+ return resourceAccessNeeds64BitExpansion (
85+ F.getParent (), F.getReturnType ()->getStructElementType (0 ),
86+ /* IsRaw*/ true );
87+ case Intrinsic::dx_resource_load_typedbuffer:
88+ return resourceAccessNeeds64BitExpansion (
89+ F.getParent (), F.getReturnType ()->getStructElementType (0 ),
90+ /* IsRaw*/ false );
91+ case Intrinsic::dx_resource_store_rawbuffer:
92+ return resourceAccessNeeds64BitExpansion (
93+ F.getParent (), F.getFunctionType ()->getParamType (3 ), /* IsRaw*/ true );
94+ case Intrinsic::dx_resource_store_typedbuffer:
95+ return resourceAccessNeeds64BitExpansion (
96+ F.getParent (), F.getFunctionType ()->getParamType (2 ), /* IsRaw*/ false );
8597 }
8698 return false ;
8799}
@@ -544,63 +556,82 @@ static Value *expandRadiansIntrinsic(CallInst *Orig) {
544556 return Builder.CreateFMul (X, PiOver180);
545557}
546558
547- static bool expandTypedBufferLoadIntrinsic (CallInst *Orig) {
559+ static bool expandBufferLoadIntrinsic (CallInst *Orig, bool IsRaw ) {
548560 IRBuilder<> Builder (Orig);
549561
550562 Type *BufferTy = Orig->getType ()->getStructElementType (0 );
551563 Type *ScalarTy = BufferTy->getScalarType ();
552564 bool IsDouble = ScalarTy->isDoubleTy ();
553565 assert (IsDouble || ScalarTy->isIntegerTy (64 ) &&
554566 " Only expand double or int64 scalars or vectors" );
555-
567+ bool IsVector = false ;
556568 unsigned ExtractNum = 2 ;
557569 if (auto *VT = dyn_cast<FixedVectorType>(BufferTy)) {
558- assert ( VT->getNumElements () == 2 &&
559- " TypedBufferLoad vector must be size 2 " ) ;
560- ExtractNum = 4 ;
570+ ExtractNum = 2 * VT->getNumElements ();
571+ IsVector = true ;
572+ assert (IsRaw || ExtractNum == 4 && " TypedBufferLoad vector must be size 2 " ) ;
561573 }
562574
563- Type *Ty = VectorType::get (Builder.getInt32Ty (), ExtractNum, false );
564-
565- Type *LoadType = StructType::get (Ty, Builder.getInt1Ty ());
566- CallInst *Load =
567- Builder.CreateIntrinsic (LoadType, Intrinsic::dx_resource_load_typedbuffer,
568- {Orig->getOperand (0 ), Orig->getOperand (1 )});
569-
570- // extract the buffer load's result
571- Value *Extract = Builder.CreateExtractValue (Load, {0 });
572-
573- SmallVector<Value *> ExtractElements;
574- for (unsigned I = 0 ; I < ExtractNum; ++I)
575- ExtractElements.push_back (
576- Builder.CreateExtractElement (Extract, Builder.getInt32 (I)));
577-
578- // combine into double(s) or int64(s)
575+ SmallVector<Value *, 2 > Loads;
579576 Value *Result = PoisonValue::get (BufferTy);
580- for (unsigned I = 0 ; I < ExtractNum; I += 2 ) {
581- Value *Combined = nullptr ;
582- if (IsDouble)
583- // For doubles, use dx_asdouble intrinsic
584- Combined =
585- Builder.CreateIntrinsic (Builder.getDoubleTy (), Intrinsic::dx_asdouble,
586- {ExtractElements[I], ExtractElements[I + 1 ]});
587- else {
588- // For int64, manually combine two int32s
589- // First, zero-extend both values to i64
590- Value *Lo = Builder.CreateZExt (ExtractElements[I], Builder.getInt64Ty ());
591- Value *Hi =
592- Builder.CreateZExt (ExtractElements[I + 1 ], Builder.getInt64Ty ());
593- // Shift the high bits left by 32 bits
594- Value *ShiftedHi = Builder.CreateShl (Hi, Builder.getInt64 (32 ));
595- // OR the high and low bits together
596- Combined = Builder.CreateOr (Lo, ShiftedHi);
577+ unsigned Base = 0 ;
578+ // If we need to extract more than 4 i32; we need to break it up into
579+ // more than one load. LoadNum tells us how many i32s we are loading in
580+ // each load
581+ while (ExtractNum > 0 ) {
582+ unsigned LoadNum = std::min (ExtractNum, 4u );
583+ Type *Ty = VectorType::get (Builder.getInt32Ty (), LoadNum, false );
584+
585+ Type *LoadType = StructType::get (Ty, Builder.getInt1Ty ());
586+ Intrinsic::ID LoadIntrinsic = Intrinsic::dx_resource_load_typedbuffer;
587+ SmallVector<Value *, 3 > Args = {Orig->getOperand (0 ), Orig->getOperand (1 )};
588+ if (IsRaw) {
589+ LoadIntrinsic = Intrinsic::dx_resource_load_rawbuffer;
590+ Value *Tmp = Builder.getInt32 (4 * Base * 2 );
591+ Args.push_back (Builder.CreateAdd (Orig->getOperand (2 ), Tmp));
597592 }
598593
599- if (ExtractNum == 4 )
600- Result = Builder.CreateInsertElement (Result, Combined,
601- Builder.getInt32 (I / 2 ));
602- else
603- Result = Combined;
594+ CallInst *Load = Builder.CreateIntrinsic (LoadType, LoadIntrinsic, Args);
595+ Loads.push_back (Load);
596+
597+ // extract the buffer load's result
598+ Value *Extract = Builder.CreateExtractValue (Load, {0 });
599+
600+ SmallVector<Value *> ExtractElements;
601+ for (unsigned I = 0 ; I < LoadNum; ++I)
602+ ExtractElements.push_back (
603+ Builder.CreateExtractElement (Extract, Builder.getInt32 (I)));
604+
605+ // combine into double(s) or int64(s)
606+ for (unsigned I = 0 ; I < LoadNum; I += 2 ) {
607+ Value *Combined = nullptr ;
608+ if (IsDouble)
609+ // For doubles, use dx_asdouble intrinsic
610+ Combined = Builder.CreateIntrinsic (
611+ Builder.getDoubleTy (), Intrinsic::dx_asdouble,
612+ {ExtractElements[I], ExtractElements[I + 1 ]});
613+ else {
614+ // For int64, manually combine two int32s
615+ // First, zero-extend both values to i64
616+ Value *Lo =
617+ Builder.CreateZExt (ExtractElements[I], Builder.getInt64Ty ());
618+ Value *Hi =
619+ Builder.CreateZExt (ExtractElements[I + 1 ], Builder.getInt64Ty ());
620+ // Shift the high bits left by 32 bits
621+ Value *ShiftedHi = Builder.CreateShl (Hi, Builder.getInt64 (32 ));
622+ // OR the high and low bits together
623+ Combined = Builder.CreateOr (Lo, ShiftedHi);
624+ }
625+
626+ if (IsVector)
627+ Result = Builder.CreateInsertElement (Result, Combined,
628+ Builder.getInt32 ((I / 2 ) + Base));
629+ else
630+ Result = Combined;
631+ }
632+
633+ ExtractNum -= LoadNum;
634+ Base += LoadNum / 2 ;
604635 }
605636
606637 Value *CheckBit = nullptr ;
@@ -620,8 +651,14 @@ static bool expandTypedBufferLoadIntrinsic(CallInst *Orig) {
620651 } else {
621652 // Use of the check bit
622653 assert (Indices[0 ] == 1 && " Unexpected type for typedbufferload" );
623- if (!CheckBit)
624- CheckBit = Builder.CreateExtractValue (Load, {1 });
654+ // Note: This does not always match the historical behaviour of DXC.
655+ // See https://github.com/microsoft/DirectXShaderCompiler/issues/7622
656+ if (!CheckBit) {
657+ SmallVector<Value *, 2 > CheckBits;
658+ for (Value *L : Loads)
659+ CheckBits.push_back (Builder.CreateExtractValue (L, {1 }));
660+ CheckBit = Builder.CreateAnd (CheckBits);
661+ }
625662 EVI->replaceAllUsesWith (CheckBit);
626663 }
627664 EVI->eraseFromParent ();
@@ -630,46 +667,52 @@ static bool expandTypedBufferLoadIntrinsic(CallInst *Orig) {
630667 return true ;
631668}
632669
633- static bool expandTypedBufferStoreIntrinsic (CallInst *Orig) {
670+ static bool expandBufferStoreIntrinsic (CallInst *Orig, bool IsRaw ) {
634671 IRBuilder<> Builder (Orig);
635672
636- Type *BufferTy = Orig->getFunctionType ()->getParamType (2 );
673+ unsigned ValIndex = IsRaw ? 3 : 2 ;
674+ Type *BufferTy = Orig->getFunctionType ()->getParamType (ValIndex);
637675 Type *ScalarTy = BufferTy->getScalarType ();
638676 bool IsDouble = ScalarTy->isDoubleTy ();
639677 assert ((IsDouble || ScalarTy->isIntegerTy (64 )) &&
640678 " Only expand double or int64 scalars or vectors" );
641679
642680 // Determine if we're dealing with a vector or scalar
643- bool IsVector = isa<FixedVectorType>(BufferTy);
644- if (IsVector) {
645- assert (cast<FixedVectorType>(BufferTy)->getNumElements () == 2 &&
646- " TypedBufferStore vector must be size 2" );
681+ bool IsVector = false ;
682+ unsigned ExtractNum = 2 ;
683+ unsigned VecLen = 0 ;
684+ if (auto *VT = dyn_cast<FixedVectorType>(BufferTy)) {
685+ VecLen = VT->getNumElements ();
686+ assert (IsRaw || VecLen == 2 && " TypedBufferStore vector must be size 2" );
687+ ExtractNum = VecLen * 2 ;
688+ IsVector = true ;
647689 }
648690
649691 // Create the appropriate vector type for the result
650692 Type *Int32Ty = Builder.getInt32Ty ();
651- Type *ResultTy = VectorType::get (Int32Ty, IsVector ? 4 : 2 , false );
693+ Type *ResultTy = VectorType::get (Int32Ty, ExtractNum , false );
652694 Value *Val = PoisonValue::get (ResultTy);
653695
654696 Type *SplitElementTy = Int32Ty;
655697 if (IsVector)
656- SplitElementTy = VectorType::get (SplitElementTy, 2 , false );
698+ SplitElementTy = VectorType::get (SplitElementTy, VecLen , false );
657699
658700 Value *LowBits = nullptr ;
659701 Value *HighBits = nullptr ;
660702 // Split the 64-bit values into 32-bit components
661703 if (IsDouble) {
662704 auto *SplitTy = llvm::StructType::get (SplitElementTy, SplitElementTy);
663705 Value *Split = Builder.CreateIntrinsic (SplitTy, Intrinsic::dx_splitdouble,
664- {Orig->getOperand (2 )});
706+ {Orig->getOperand (ValIndex )});
665707 LowBits = Builder.CreateExtractValue (Split, 0 );
666708 HighBits = Builder.CreateExtractValue (Split, 1 );
667709 } else {
668710 // Handle int64 type(s)
669- Value *InputVal = Orig->getOperand (2 );
711+ Value *InputVal = Orig->getOperand (ValIndex );
670712 Constant *ShiftAmt = Builder.getInt64 (32 );
671713 if (IsVector)
672- ShiftAmt = ConstantVector::getSplat (ElementCount::getFixed (2 ), ShiftAmt);
714+ ShiftAmt =
715+ ConstantVector::getSplat (ElementCount::getFixed (VecLen), ShiftAmt);
673716
674717 // Split into low and high 32-bit parts
675718 LowBits = Builder.CreateTrunc (InputVal, SplitElementTy);
@@ -678,17 +721,48 @@ static bool expandTypedBufferStoreIntrinsic(CallInst *Orig) {
678721 }
679722
680723 if (IsVector) {
681- Val = Builder.CreateShuffleVector (LowBits, HighBits, {0 , 2 , 1 , 3 });
724+ SmallVector<int , 8 > Mask;
725+ for (unsigned I = 0 ; I < VecLen; ++I) {
726+ Mask.push_back (I);
727+ Mask.push_back (I + VecLen);
728+ }
729+ Val = Builder.CreateShuffleVector (LowBits, HighBits, Mask);
682730 } else {
683731 Val = Builder.CreateInsertElement (Val, LowBits, Builder.getInt32 (0 ));
684732 Val = Builder.CreateInsertElement (Val, HighBits, Builder.getInt32 (1 ));
685733 }
686734
687- // Create the final intrinsic call
688- Builder.CreateIntrinsic (Builder.getVoidTy (),
689- Intrinsic::dx_resource_store_typedbuffer,
690- {Orig->getOperand (0 ), Orig->getOperand (1 ), Val});
735+ // If we need to extract more than 4 i32; we need to break it up into
736+ // more than one store. StoreNum tells us how many i32s we are storing in
737+ // each store
738+ unsigned Base = 0 ;
739+ while (ExtractNum > 0 ) {
740+ unsigned StoreNum = std::min (ExtractNum, 4u );
741+
742+ Intrinsic::ID StoreIntrinsic = Intrinsic::dx_resource_store_typedbuffer;
743+ SmallVector<Value *, 4 > Args = {Orig->getOperand (0 ), Orig->getOperand (1 )};
744+ if (IsRaw) {
745+ StoreIntrinsic = Intrinsic::dx_resource_store_rawbuffer;
746+ Value *Tmp = Builder.getInt32 (4 * Base);
747+ Args.push_back (Builder.CreateAdd (Orig->getOperand (2 ), Tmp));
748+ }
749+
750+ SmallVector<int , 4 > Mask;
751+ for (unsigned I = 0 ; I < StoreNum; ++I) {
752+ Mask.push_back (Base + I);
753+ }
754+
755+ Value *SubVal = Val;
756+ if (VecLen > 2 )
757+ SubVal = Builder.CreateShuffleVector (Val, Mask);
758+
759+ Args.push_back (SubVal);
760+ // Create the final intrinsic call
761+ Builder.CreateIntrinsic (Builder.getVoidTy (), StoreIntrinsic, Args);
691762
763+ ExtractNum -= StoreNum;
764+ Base += StoreNum;
765+ }
692766 Orig->eraseFromParent ();
693767 return true ;
694768}
@@ -821,12 +895,20 @@ static bool expandIntrinsic(Function &F, CallInst *Orig) {
821895 case Intrinsic::dx_radians:
822896 Result = expandRadiansIntrinsic (Orig);
823897 break ;
898+ case Intrinsic::dx_resource_load_rawbuffer:
899+ if (expandBufferLoadIntrinsic (Orig, /* IsRaw*/ true ))
900+ return true ;
901+ break ;
902+ case Intrinsic::dx_resource_store_rawbuffer:
903+ if (expandBufferStoreIntrinsic (Orig, /* IsRaw*/ true ))
904+ return true ;
905+ break ;
824906 case Intrinsic::dx_resource_load_typedbuffer:
825- if (expandTypedBufferLoadIntrinsic (Orig))
907+ if (expandBufferLoadIntrinsic (Orig, /* IsRaw */ false ))
826908 return true ;
827909 break ;
828910 case Intrinsic::dx_resource_store_typedbuffer:
829- if (expandTypedBufferStoreIntrinsic (Orig))
911+ if (expandBufferStoreIntrinsic (Orig, /* IsRaw */ false ))
830912 return true ;
831913 break ;
832914 case Intrinsic::usub_sat:
0 commit comments