@@ -42,6 +42,15 @@ class DXILIntrinsicExpansionLegacy : public ModulePass {
4242 static char ID; // Pass identification.
4343};
4444
45+ static bool resourceAccessNeeds64BitExpansion (Module *M, Type *OverloadTy,
46+ bool IsRaw) {
47+ if (IsRaw && M->getTargetTriple ().getDXILVersion () > VersionTuple (1 , 2 ))
48+ return false ;
49+
50+ Type *ScalarTy = OverloadTy->getScalarType ();
51+ return ScalarTy->isDoubleTy () || ScalarTy->isIntegerTy (64 );
52+ }
53+
4554static bool isIntrinsicExpansion (Function &F) {
4655 switch (F.getIntrinsicID ()) {
4756 case Intrinsic::abs:
@@ -72,27 +81,19 @@ static bool isIntrinsicExpansion(Function &F) {
7281 case Intrinsic::vector_reduce_fadd:
7382 return true ;
7483 case Intrinsic::dx_resource_load_rawbuffer:
75- if (F.getParent ()->getTargetTriple ().getDXILVersion () > VersionTuple (1 , 2 ))
76- return false ;
77- // fallthrough to check if double or i64
78- LLVM_FALLTHROUGH;
79- case Intrinsic::dx_resource_load_typedbuffer: {
80- // We need to handle i64, doubles, and vectors of them.
81- Type *ScalarTy =
82- F.getReturnType ()->getStructElementType (0 )->getScalarType ();
83- return ScalarTy->isDoubleTy () || ScalarTy->isIntegerTy (64 );
84- }
85- case Intrinsic::dx_resource_store_rawbuffer: {
86- if (F.getParent ()->getTargetTriple ().getDXILVersion () > VersionTuple (1 , 2 ))
87- return false ;
88- Type *ScalarTy = F.getFunctionType ()->getParamType (3 )->getScalarType ();
89- return ScalarTy->isDoubleTy () || ScalarTy->isIntegerTy (64 );
90- }
91- case Intrinsic::dx_resource_store_typedbuffer: {
92- // We need to handle i64 and doubles and vectors of i64 and doubles.
93- Type *ScalarTy = F.getFunctionType ()->getParamType (2 )->getScalarType ();
94- return ScalarTy->isDoubleTy () || ScalarTy->isIntegerTy (64 );
95- }
84+ return resourceAccessNeeds64BitExpansion (
85+ F.getParent (), F.getReturnType ()->getStructElementType (0 ),
86+ /* IsRaw*/ true );
87+ case Intrinsic::dx_resource_load_typedbuffer:
88+ return resourceAccessNeeds64BitExpansion (
89+ F.getParent (), F.getReturnType ()->getStructElementType (0 ),
90+ /* IsRaw*/ false );
91+ case Intrinsic::dx_resource_store_rawbuffer:
92+ return resourceAccessNeeds64BitExpansion (
93+ F.getParent (), F.getFunctionType ()->getParamType (3 ), /* IsRaw*/ true );
94+ case Intrinsic::dx_resource_store_typedbuffer:
95+ return resourceAccessNeeds64BitExpansion (
96+ F.getParent (), F.getFunctionType ()->getParamType (2 ), /* IsRaw*/ false );
9697 }
9798 return false ;
9899}
@@ -563,19 +564,20 @@ static bool expandBufferLoadIntrinsic(CallInst *Orig, bool IsRaw) {
563564 bool IsDouble = ScalarTy->isDoubleTy ();
564565 assert (IsDouble || ScalarTy->isIntegerTy (64 ) &&
565566 " Only expand double or int64 scalars or vectors" );
566- bool IsVector = isa<FixedVectorType>(BufferTy);
567-
567+ bool IsVector = false ;
568568 unsigned ExtractNum = 2 ;
569569 if (auto *VT = dyn_cast<FixedVectorType>(BufferTy)) {
570- if (!IsRaw)
571- assert (VT->getNumElements () == 2 &&
572- " TypedBufferLoad vector must be size 2" );
573570 ExtractNum = 2 * VT->getNumElements ();
571+ IsVector = true ;
572+ assert (IsRaw || ExtractNum == 4 && " TypedBufferLoad vector must be size 2" );
574573 }
575574
576575 SmallVector<Value *, 2 > Loads;
577576 Value *Result = PoisonValue::get (BufferTy);
578577 unsigned Base = 0 ;
578+ // If we need to extract more than 4 i32; we need to break it up into
579+ // more than one load. LoadNum tells us how many i32s we are loading in
580+ // each load
579581 while (ExtractNum > 0 ) {
580582 unsigned LoadNum = std::min (ExtractNum, 4u );
581583 Type *Ty = VectorType::get (Builder.getInt32Ty (), LoadNum, false );
@@ -649,6 +651,8 @@ static bool expandBufferLoadIntrinsic(CallInst *Orig, bool IsRaw) {
649651 } else {
650652 // Use of the check bit
651653 assert (Indices[0 ] == 1 && " Unexpected type for typedbufferload" );
654+ // Note: This does not always match the historical behaviour of DXC.
655+ // See https://github.com/microsoft/DirectXShaderCompiler/issues/7622
652656 if (!CheckBit) {
653657 SmallVector<Value *, 2 > CheckBits;
654658 for (Value *L : Loads)
@@ -666,22 +670,22 @@ static bool expandBufferLoadIntrinsic(CallInst *Orig, bool IsRaw) {
666670static bool expandBufferStoreIntrinsic (CallInst *Orig, bool IsRaw) {
667671 IRBuilder<> Builder (Orig);
668672
669- Type *BufferTy = Orig->getFunctionType ()->getParamType (IsRaw ? 3 : 2 );
673+ unsigned ValIndex = IsRaw ? 3 : 2 ;
674+ Type *BufferTy = Orig->getFunctionType ()->getParamType (ValIndex);
670675 Type *ScalarTy = BufferTy->getScalarType ();
671676 bool IsDouble = ScalarTy->isDoubleTy ();
672677 assert ((IsDouble || ScalarTy->isIntegerTy (64 )) &&
673678 " Only expand double or int64 scalars or vectors" );
674679
675680 // Determine if we're dealing with a vector or scalar
676- bool IsVector = isa<FixedVectorType>(BufferTy) ;
681+ bool IsVector = false ;
677682 unsigned ExtractNum = 2 ;
678683 unsigned VecLen = 0 ;
679684 if (auto *VT = dyn_cast<FixedVectorType>(BufferTy)) {
680- if (!IsRaw)
681- assert (VT->getNumElements () == 2 &&
682- " TypedBufferStore vector must be size 2" );
683685 VecLen = VT->getNumElements ();
686+ assert (IsRaw || VecLen == 2 && " TypedBufferStore vector must be size 2" );
684687 ExtractNum = VecLen * 2 ;
688+ IsVector = true ;
685689 }
686690
687691 // Create the appropriate vector type for the result
@@ -699,12 +703,12 @@ static bool expandBufferStoreIntrinsic(CallInst *Orig, bool IsRaw) {
699703 if (IsDouble) {
700704 auto *SplitTy = llvm::StructType::get (SplitElementTy, SplitElementTy);
701705 Value *Split = Builder.CreateIntrinsic (SplitTy, Intrinsic::dx_splitdouble,
702- {Orig->getOperand (IsRaw ? 3 : 2 )});
706+ {Orig->getOperand (ValIndex )});
703707 LowBits = Builder.CreateExtractValue (Split, 0 );
704708 HighBits = Builder.CreateExtractValue (Split, 1 );
705709 } else {
706710 // Handle int64 type(s)
707- Value *InputVal = Orig->getOperand (IsRaw ? 3 : 2 );
711+ Value *InputVal = Orig->getOperand (ValIndex );
708712 Constant *ShiftAmt = Builder.getInt64 (32 );
709713 if (IsVector)
710714 ShiftAmt =
@@ -728,6 +732,9 @@ static bool expandBufferStoreIntrinsic(CallInst *Orig, bool IsRaw) {
728732 Val = Builder.CreateInsertElement (Val, HighBits, Builder.getInt32 (1 ));
729733 }
730734
735+ // If we need to extract more than 4 i32; we need to break it up into
736+ // more than one store. StoreNum tells us how many i32s we are storing in
737+ // each store
731738 unsigned Base = 0 ;
732739 while (ExtractNum > 0 ) {
733740 unsigned StoreNum = std::min (ExtractNum, 4u );
@@ -744,7 +751,10 @@ static bool expandBufferStoreIntrinsic(CallInst *Orig, bool IsRaw) {
744751 for (unsigned I = 0 ; I < StoreNum; ++I) {
745752 Mask.push_back (Base + I);
746753 }
747- Value *SubVal = Builder.CreateShuffleVector (Val, Mask);
754+
755+ Value *SubVal = Val;
756+ if (VecLen > 2 )
757+ SubVal = Builder.CreateShuffleVector (Val, Mask);
748758
749759 Args.push_back (SubVal);
750760 // Create the final intrinsic call
0 commit comments