@@ -71,12 +71,23 @@ static bool isIntrinsicExpansion(Function &F) {
71
71
case Intrinsic::vector_reduce_add:
72
72
case Intrinsic::vector_reduce_fadd:
73
73
return true ;
74
+ case Intrinsic::dx_resource_load_rawbuffer:
75
+ if (F.getParent ()->getTargetTriple ().getDXILVersion () > VersionTuple (1 , 2 ))
76
+ return false ;
77
+ // fallthrough to check if double or i64
78
+ LLVM_FALLTHROUGH;
74
79
case Intrinsic::dx_resource_load_typedbuffer: {
75
80
// We need to handle i64, doubles, and vectors of them.
76
81
Type *ScalarTy =
77
82
F.getReturnType ()->getStructElementType (0 )->getScalarType ();
78
83
return ScalarTy->isDoubleTy () || ScalarTy->isIntegerTy (64 );
79
84
}
85
+ case Intrinsic::dx_resource_store_rawbuffer: {
86
+ if (F.getParent ()->getTargetTriple ().getDXILVersion () > VersionTuple (1 , 2 ))
87
+ return false ;
88
+ Type *ScalarTy = F.getFunctionType ()->getParamType (3 )->getScalarType ();
89
+ return ScalarTy->isDoubleTy () || ScalarTy->isIntegerTy (64 );
90
+ }
80
91
case Intrinsic::dx_resource_store_typedbuffer: {
81
92
// We need to handle i64 and doubles and vectors of i64 and doubles.
82
93
Type *ScalarTy = F.getFunctionType ()->getParamType (2 )->getScalarType ();
@@ -544,63 +555,81 @@ static Value *expandRadiansIntrinsic(CallInst *Orig) {
544
555
return Builder.CreateFMul (X, PiOver180);
545
556
}
546
557
547
- static bool expandTypedBufferLoadIntrinsic (CallInst *Orig) {
558
+ static bool expandBufferLoadIntrinsic (CallInst *Orig, bool IsRaw ) {
548
559
IRBuilder<> Builder (Orig);
549
560
550
561
Type *BufferTy = Orig->getType ()->getStructElementType (0 );
551
562
Type *ScalarTy = BufferTy->getScalarType ();
552
563
bool IsDouble = ScalarTy->isDoubleTy ();
553
564
assert (IsDouble || ScalarTy->isIntegerTy (64 ) &&
554
565
" Only expand double or int64 scalars or vectors" );
566
+ bool IsVector = isa<FixedVectorType>(BufferTy);
555
567
556
568
unsigned ExtractNum = 2 ;
557
569
if (auto *VT = dyn_cast<FixedVectorType>(BufferTy)) {
558
- assert (VT->getNumElements () == 2 &&
559
- " TypedBufferLoad vector must be size 2" );
560
- ExtractNum = 4 ;
570
+ if (!IsRaw)
571
+ assert (VT->getNumElements () == 2 &&
572
+ " TypedBufferLoad vector must be size 2" );
573
+ ExtractNum = 2 * VT->getNumElements ();
561
574
}
562
575
563
- Type *Ty = VectorType::get (Builder.getInt32Ty (), ExtractNum, false );
564
-
565
- Type *LoadType = StructType::get (Ty, Builder.getInt1Ty ());
566
- CallInst *Load =
567
- Builder.CreateIntrinsic (LoadType, Intrinsic::dx_resource_load_typedbuffer,
568
- {Orig->getOperand (0 ), Orig->getOperand (1 )});
569
-
570
- // extract the buffer load's result
571
- Value *Extract = Builder.CreateExtractValue (Load, {0 });
572
-
573
- SmallVector<Value *> ExtractElements;
574
- for (unsigned I = 0 ; I < ExtractNum; ++I)
575
- ExtractElements.push_back (
576
- Builder.CreateExtractElement (Extract, Builder.getInt32 (I)));
577
-
578
- // combine into double(s) or int64(s)
576
+ SmallVector<Value *, 2 > Loads;
579
577
Value *Result = PoisonValue::get (BufferTy);
580
- for (unsigned I = 0 ; I < ExtractNum; I += 2 ) {
581
- Value *Combined = nullptr ;
582
- if (IsDouble)
583
- // For doubles, use dx_asdouble intrinsic
584
- Combined =
585
- Builder.CreateIntrinsic (Builder.getDoubleTy (), Intrinsic::dx_asdouble,
586
- {ExtractElements[I], ExtractElements[I + 1 ]});
587
- else {
588
- // For int64, manually combine two int32s
589
- // First, zero-extend both values to i64
590
- Value *Lo = Builder.CreateZExt (ExtractElements[I], Builder.getInt64Ty ());
591
- Value *Hi =
592
- Builder.CreateZExt (ExtractElements[I + 1 ], Builder.getInt64Ty ());
593
- // Shift the high bits left by 32 bits
594
- Value *ShiftedHi = Builder.CreateShl (Hi, Builder.getInt64 (32 ));
595
- // OR the high and low bits together
596
- Combined = Builder.CreateOr (Lo, ShiftedHi);
578
+ unsigned Base = 0 ;
579
+ while (ExtractNum > 0 ) {
580
+ unsigned LoadNum = std::min (ExtractNum, 4u );
581
+ Type *Ty = VectorType::get (Builder.getInt32Ty (), LoadNum, false );
582
+
583
+ Type *LoadType = StructType::get (Ty, Builder.getInt1Ty ());
584
+ Intrinsic::ID LoadIntrinsic = Intrinsic::dx_resource_load_typedbuffer;
585
+ SmallVector<Value *, 3 > Args = {Orig->getOperand (0 ), Orig->getOperand (1 )};
586
+ if (IsRaw) {
587
+ LoadIntrinsic = Intrinsic::dx_resource_load_rawbuffer;
588
+ Value *Tmp = Builder.getInt32 (4 * Base * 2 );
589
+ Args.push_back (Builder.CreateAdd (Orig->getOperand (2 ), Tmp));
597
590
}
598
591
599
- if (ExtractNum == 4 )
600
- Result = Builder.CreateInsertElement (Result, Combined,
601
- Builder.getInt32 (I / 2 ));
602
- else
603
- Result = Combined;
592
+ CallInst *Load = Builder.CreateIntrinsic (LoadType, LoadIntrinsic, Args);
593
+ Loads.push_back (Load);
594
+
595
+ // extract the buffer load's result
596
+ Value *Extract = Builder.CreateExtractValue (Load, {0 });
597
+
598
+ SmallVector<Value *> ExtractElements;
599
+ for (unsigned I = 0 ; I < LoadNum; ++I)
600
+ ExtractElements.push_back (
601
+ Builder.CreateExtractElement (Extract, Builder.getInt32 (I)));
602
+
603
+ // combine into double(s) or int64(s)
604
+ for (unsigned I = 0 ; I < LoadNum; I += 2 ) {
605
+ Value *Combined = nullptr ;
606
+ if (IsDouble)
607
+ // For doubles, use dx_asdouble intrinsic
608
+ Combined = Builder.CreateIntrinsic (
609
+ Builder.getDoubleTy (), Intrinsic::dx_asdouble,
610
+ {ExtractElements[I], ExtractElements[I + 1 ]});
611
+ else {
612
+ // For int64, manually combine two int32s
613
+ // First, zero-extend both values to i64
614
+ Value *Lo =
615
+ Builder.CreateZExt (ExtractElements[I], Builder.getInt64Ty ());
616
+ Value *Hi =
617
+ Builder.CreateZExt (ExtractElements[I + 1 ], Builder.getInt64Ty ());
618
+ // Shift the high bits left by 32 bits
619
+ Value *ShiftedHi = Builder.CreateShl (Hi, Builder.getInt64 (32 ));
620
+ // OR the high and low bits together
621
+ Combined = Builder.CreateOr (Lo, ShiftedHi);
622
+ }
623
+
624
+ if (IsVector)
625
+ Result = Builder.CreateInsertElement (Result, Combined,
626
+ Builder.getInt32 ((I / 2 ) + Base));
627
+ else
628
+ Result = Combined;
629
+ }
630
+
631
+ ExtractNum -= LoadNum;
632
+ Base += LoadNum / 2 ;
604
633
}
605
634
606
635
Value *CheckBit = nullptr ;
@@ -620,8 +649,12 @@ static bool expandTypedBufferLoadIntrinsic(CallInst *Orig) {
620
649
} else {
621
650
// Use of the check bit
622
651
assert (Indices[0 ] == 1 && " Unexpected type for typedbufferload" );
623
- if (!CheckBit)
624
- CheckBit = Builder.CreateExtractValue (Load, {1 });
652
+ if (!CheckBit) {
653
+ SmallVector<Value *, 2 > CheckBits;
654
+ for (Value *L : Loads)
655
+ CheckBits.push_back (Builder.CreateExtractValue (L, {1 }));
656
+ CheckBit = Builder.CreateAnd (CheckBits);
657
+ }
625
658
EVI->replaceAllUsesWith (CheckBit);
626
659
}
627
660
EVI->eraseFromParent ();
@@ -630,46 +663,52 @@ static bool expandTypedBufferLoadIntrinsic(CallInst *Orig) {
630
663
return true ;
631
664
}
632
665
633
- static bool expandTypedBufferStoreIntrinsic (CallInst *Orig) {
666
+ static bool expandBufferStoreIntrinsic (CallInst *Orig, bool IsRaw ) {
634
667
IRBuilder<> Builder (Orig);
635
668
636
- Type *BufferTy = Orig->getFunctionType ()->getParamType (2 );
669
+ Type *BufferTy = Orig->getFunctionType ()->getParamType (IsRaw ? 3 : 2 );
637
670
Type *ScalarTy = BufferTy->getScalarType ();
638
671
bool IsDouble = ScalarTy->isDoubleTy ();
639
672
assert ((IsDouble || ScalarTy->isIntegerTy (64 )) &&
640
673
" Only expand double or int64 scalars or vectors" );
641
674
642
675
// Determine if we're dealing with a vector or scalar
643
676
bool IsVector = isa<FixedVectorType>(BufferTy);
644
- if (IsVector) {
645
- assert (cast<FixedVectorType>(BufferTy)->getNumElements () == 2 &&
646
- " TypedBufferStore vector must be size 2" );
677
+ unsigned ExtractNum = 2 ;
678
+ unsigned VecLen = 0 ;
679
+ if (auto *VT = dyn_cast<FixedVectorType>(BufferTy)) {
680
+ if (!IsRaw)
681
+ assert (VT->getNumElements () == 2 &&
682
+ " TypedBufferStore vector must be size 2" );
683
+ VecLen = VT->getNumElements ();
684
+ ExtractNum = VecLen * 2 ;
647
685
}
648
686
649
687
// Create the appropriate vector type for the result
650
688
Type *Int32Ty = Builder.getInt32Ty ();
651
- Type *ResultTy = VectorType::get (Int32Ty, IsVector ? 4 : 2 , false );
689
+ Type *ResultTy = VectorType::get (Int32Ty, ExtractNum , false );
652
690
Value *Val = PoisonValue::get (ResultTy);
653
691
654
692
Type *SplitElementTy = Int32Ty;
655
693
if (IsVector)
656
- SplitElementTy = VectorType::get (SplitElementTy, 2 , false );
694
+ SplitElementTy = VectorType::get (SplitElementTy, VecLen , false );
657
695
658
696
Value *LowBits = nullptr ;
659
697
Value *HighBits = nullptr ;
660
698
// Split the 64-bit values into 32-bit components
661
699
if (IsDouble) {
662
700
auto *SplitTy = llvm::StructType::get (SplitElementTy, SplitElementTy);
663
701
Value *Split = Builder.CreateIntrinsic (SplitTy, Intrinsic::dx_splitdouble,
664
- {Orig->getOperand (2 )});
702
+ {Orig->getOperand (IsRaw ? 3 : 2 )});
665
703
LowBits = Builder.CreateExtractValue (Split, 0 );
666
704
HighBits = Builder.CreateExtractValue (Split, 1 );
667
705
} else {
668
706
// Handle int64 type(s)
669
- Value *InputVal = Orig->getOperand (2 );
707
+ Value *InputVal = Orig->getOperand (IsRaw ? 3 : 2 );
670
708
Constant *ShiftAmt = Builder.getInt64 (32 );
671
709
if (IsVector)
672
- ShiftAmt = ConstantVector::getSplat (ElementCount::getFixed (2 ), ShiftAmt);
710
+ ShiftAmt =
711
+ ConstantVector::getSplat (ElementCount::getFixed (VecLen), ShiftAmt);
673
712
674
713
// Split into low and high 32-bit parts
675
714
LowBits = Builder.CreateTrunc (InputVal, SplitElementTy);
@@ -678,17 +717,42 @@ static bool expandTypedBufferStoreIntrinsic(CallInst *Orig) {
678
717
}
679
718
680
719
if (IsVector) {
681
- Val = Builder.CreateShuffleVector (LowBits, HighBits, {0 , 2 , 1 , 3 });
720
+ SmallVector<int , 8 > Mask;
721
+ for (unsigned I = 0 ; I < VecLen; ++I) {
722
+ Mask.push_back (I);
723
+ Mask.push_back (I + VecLen);
724
+ }
725
+ Val = Builder.CreateShuffleVector (LowBits, HighBits, Mask);
682
726
} else {
683
727
Val = Builder.CreateInsertElement (Val, LowBits, Builder.getInt32 (0 ));
684
728
Val = Builder.CreateInsertElement (Val, HighBits, Builder.getInt32 (1 ));
685
729
}
686
730
687
- // Create the final intrinsic call
688
- Builder.CreateIntrinsic (Builder.getVoidTy (),
689
- Intrinsic::dx_resource_store_typedbuffer,
690
- {Orig->getOperand (0 ), Orig->getOperand (1 ), Val});
731
+ unsigned Base = 0 ;
732
+ while (ExtractNum > 0 ) {
733
+ unsigned StoreNum = std::min (ExtractNum, 4u );
734
+
735
+ Intrinsic::ID StoreIntrinsic = Intrinsic::dx_resource_store_typedbuffer;
736
+ SmallVector<Value *, 4 > Args = {Orig->getOperand (0 ), Orig->getOperand (1 )};
737
+ if (IsRaw) {
738
+ StoreIntrinsic = Intrinsic::dx_resource_store_rawbuffer;
739
+ Value *Tmp = Builder.getInt32 (4 * Base);
740
+ Args.push_back (Builder.CreateAdd (Orig->getOperand (2 ), Tmp));
741
+ }
742
+
743
+ SmallVector<int , 4 > Mask;
744
+ for (unsigned I = 0 ; I < StoreNum; ++I) {
745
+ Mask.push_back (Base + I);
746
+ }
747
+ Value *SubVal = Builder.CreateShuffleVector (Val, Mask);
748
+
749
+ Args.push_back (SubVal);
750
+ // Create the final intrinsic call
751
+ Builder.CreateIntrinsic (Builder.getVoidTy (), StoreIntrinsic, Args);
691
752
753
+ ExtractNum -= StoreNum;
754
+ Base += StoreNum;
755
+ }
692
756
Orig->eraseFromParent ();
693
757
return true ;
694
758
}
@@ -821,12 +885,20 @@ static bool expandIntrinsic(Function &F, CallInst *Orig) {
821
885
case Intrinsic::dx_radians:
822
886
Result = expandRadiansIntrinsic (Orig);
823
887
break ;
888
+ case Intrinsic::dx_resource_load_rawbuffer:
889
+ if (expandBufferLoadIntrinsic (Orig, /* IsRaw*/ true ))
890
+ return true ;
891
+ break ;
892
+ case Intrinsic::dx_resource_store_rawbuffer:
893
+ if (expandBufferStoreIntrinsic (Orig, /* IsRaw*/ true ))
894
+ return true ;
895
+ break ;
824
896
case Intrinsic::dx_resource_load_typedbuffer:
825
- if (expandTypedBufferLoadIntrinsic (Orig))
897
+ if (expandBufferLoadIntrinsic (Orig, /* IsRaw */ false ))
826
898
return true ;
827
899
break ;
828
900
case Intrinsic::dx_resource_store_typedbuffer:
829
- if (expandTypedBufferStoreIntrinsic (Orig))
901
+ if (expandBufferStoreIntrinsic (Orig, /* IsRaw */ false ))
830
902
return true ;
831
903
break ;
832
904
case Intrinsic::usub_sat:
0 commit comments