Skip to content

Commit 5e2f7a5

Browse files
committed
respond to pr comments
1 parent 4722eee commit 5e2f7a5

File tree

5 files changed

+52
-58
lines changed

5 files changed

+52
-58
lines changed

llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp

Lines changed: 44 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,15 @@ class DXILIntrinsicExpansionLegacy : public ModulePass {
4242
static char ID; // Pass identification.
4343
};
4444

45+
static bool resourceAccessNeeds64BitExpansion(Module *M, Type *OverloadTy,
46+
bool IsRaw) {
47+
if (IsRaw && M->getTargetTriple().getDXILVersion() > VersionTuple(1, 2))
48+
return false;
49+
50+
Type *ScalarTy = OverloadTy->getScalarType();
51+
return ScalarTy->isDoubleTy() || ScalarTy->isIntegerTy(64);
52+
}
53+
4554
static bool isIntrinsicExpansion(Function &F) {
4655
switch (F.getIntrinsicID()) {
4756
case Intrinsic::abs:
@@ -72,27 +81,19 @@ static bool isIntrinsicExpansion(Function &F) {
7281
case Intrinsic::vector_reduce_fadd:
7382
return true;
7483
case Intrinsic::dx_resource_load_rawbuffer:
75-
if (F.getParent()->getTargetTriple().getDXILVersion() > VersionTuple(1, 2))
76-
return false;
77-
// fallthrough to check if double or i64
78-
LLVM_FALLTHROUGH;
79-
case Intrinsic::dx_resource_load_typedbuffer: {
80-
// We need to handle i64, doubles, and vectors of them.
81-
Type *ScalarTy =
82-
F.getReturnType()->getStructElementType(0)->getScalarType();
83-
return ScalarTy->isDoubleTy() || ScalarTy->isIntegerTy(64);
84-
}
85-
case Intrinsic::dx_resource_store_rawbuffer: {
86-
if (F.getParent()->getTargetTriple().getDXILVersion() > VersionTuple(1, 2))
87-
return false;
88-
Type *ScalarTy = F.getFunctionType()->getParamType(3)->getScalarType();
89-
return ScalarTy->isDoubleTy() || ScalarTy->isIntegerTy(64);
90-
}
91-
case Intrinsic::dx_resource_store_typedbuffer: {
92-
// We need to handle i64 and doubles and vectors of i64 and doubles.
93-
Type *ScalarTy = F.getFunctionType()->getParamType(2)->getScalarType();
94-
return ScalarTy->isDoubleTy() || ScalarTy->isIntegerTy(64);
95-
}
84+
return resourceAccessNeeds64BitExpansion(
85+
F.getParent(), F.getReturnType()->getStructElementType(0),
86+
/*IsRaw*/ true);
87+
case Intrinsic::dx_resource_load_typedbuffer:
88+
return resourceAccessNeeds64BitExpansion(
89+
F.getParent(), F.getReturnType()->getStructElementType(0),
90+
/*IsRaw*/ false);
91+
case Intrinsic::dx_resource_store_rawbuffer:
92+
return resourceAccessNeeds64BitExpansion(
93+
F.getParent(), F.getFunctionType()->getParamType(3), /*IsRaw*/ true);
94+
case Intrinsic::dx_resource_store_typedbuffer:
95+
return resourceAccessNeeds64BitExpansion(
96+
F.getParent(), F.getFunctionType()->getParamType(2), /*IsRaw*/ false);
9697
}
9798
return false;
9899
}
@@ -563,19 +564,20 @@ static bool expandBufferLoadIntrinsic(CallInst *Orig, bool IsRaw) {
563564
bool IsDouble = ScalarTy->isDoubleTy();
564565
assert(IsDouble || ScalarTy->isIntegerTy(64) &&
565566
"Only expand double or int64 scalars or vectors");
566-
bool IsVector = isa<FixedVectorType>(BufferTy);
567-
567+
bool IsVector = false;
568568
unsigned ExtractNum = 2;
569569
if (auto *VT = dyn_cast<FixedVectorType>(BufferTy)) {
570-
if (!IsRaw)
571-
assert(VT->getNumElements() == 2 &&
572-
"TypedBufferLoad vector must be size 2");
573570
ExtractNum = 2 * VT->getNumElements();
571+
IsVector = true;
572+
assert(IsRaw || ExtractNum == 4 && "TypedBufferLoad vector must be size 2");
574573
}
575574

576575
SmallVector<Value *, 2> Loads;
577576
Value *Result = PoisonValue::get(BufferTy);
578577
unsigned Base = 0;
578+
// If we need to extract more than 4 i32; we need to break it up into
579+
// more than one load. LoadNum tells us how many i32s we are loading in
580+
// each load
579581
while (ExtractNum > 0) {
580582
unsigned LoadNum = std::min(ExtractNum, 4u);
581583
Type *Ty = VectorType::get(Builder.getInt32Ty(), LoadNum, false);
@@ -649,6 +651,8 @@ static bool expandBufferLoadIntrinsic(CallInst *Orig, bool IsRaw) {
649651
} else {
650652
// Use of the check bit
651653
assert(Indices[0] == 1 && "Unexpected type for typedbufferload");
654+
// Note: This does not always match the historical behaviour of DXC.
655+
// See https://github.com/microsoft/DirectXShaderCompiler/issues/7622
652656
if (!CheckBit) {
653657
SmallVector<Value *, 2> CheckBits;
654658
for (Value *L : Loads)
@@ -666,22 +670,22 @@ static bool expandBufferLoadIntrinsic(CallInst *Orig, bool IsRaw) {
666670
static bool expandBufferStoreIntrinsic(CallInst *Orig, bool IsRaw) {
667671
IRBuilder<> Builder(Orig);
668672

669-
Type *BufferTy = Orig->getFunctionType()->getParamType(IsRaw ? 3 : 2);
673+
unsigned ValIndex = IsRaw ? 3 : 2;
674+
Type *BufferTy = Orig->getFunctionType()->getParamType(ValIndex);
670675
Type *ScalarTy = BufferTy->getScalarType();
671676
bool IsDouble = ScalarTy->isDoubleTy();
672677
assert((IsDouble || ScalarTy->isIntegerTy(64)) &&
673678
"Only expand double or int64 scalars or vectors");
674679

675680
// Determine if we're dealing with a vector or scalar
676-
bool IsVector = isa<FixedVectorType>(BufferTy);
681+
bool IsVector = false;
677682
unsigned ExtractNum = 2;
678683
unsigned VecLen = 0;
679684
if (auto *VT = dyn_cast<FixedVectorType>(BufferTy)) {
680-
if (!IsRaw)
681-
assert(VT->getNumElements() == 2 &&
682-
"TypedBufferStore vector must be size 2");
683685
VecLen = VT->getNumElements();
686+
assert(IsRaw || VecLen == 2 && "TypedBufferStore vector must be size 2");
684687
ExtractNum = VecLen * 2;
688+
IsVector = true;
685689
}
686690

687691
// Create the appropriate vector type for the result
@@ -699,12 +703,12 @@ static bool expandBufferStoreIntrinsic(CallInst *Orig, bool IsRaw) {
699703
if (IsDouble) {
700704
auto *SplitTy = llvm::StructType::get(SplitElementTy, SplitElementTy);
701705
Value *Split = Builder.CreateIntrinsic(SplitTy, Intrinsic::dx_splitdouble,
702-
{Orig->getOperand(IsRaw ? 3 : 2)});
706+
{Orig->getOperand(ValIndex)});
703707
LowBits = Builder.CreateExtractValue(Split, 0);
704708
HighBits = Builder.CreateExtractValue(Split, 1);
705709
} else {
706710
// Handle int64 type(s)
707-
Value *InputVal = Orig->getOperand(IsRaw ? 3 : 2);
711+
Value *InputVal = Orig->getOperand(ValIndex);
708712
Constant *ShiftAmt = Builder.getInt64(32);
709713
if (IsVector)
710714
ShiftAmt =
@@ -728,6 +732,9 @@ static bool expandBufferStoreIntrinsic(CallInst *Orig, bool IsRaw) {
728732
Val = Builder.CreateInsertElement(Val, HighBits, Builder.getInt32(1));
729733
}
730734

735+
// If we need to extract more than 4 i32; we need to break it up into
736+
// more than one store. StoreNum tells us how many i32s we are storing in
737+
// each store
731738
unsigned Base = 0;
732739
while (ExtractNum > 0) {
733740
unsigned StoreNum = std::min(ExtractNum, 4u);
@@ -744,7 +751,10 @@ static bool expandBufferStoreIntrinsic(CallInst *Orig, bool IsRaw) {
744751
for (unsigned I = 0; I < StoreNum; ++I) {
745752
Mask.push_back(Base + I);
746753
}
747-
Value *SubVal = Builder.CreateShuffleVector(Val, Mask);
754+
755+
Value *SubVal = Val;
756+
if (VecLen > 2)
757+
SubVal = Builder.CreateShuffleVector(Val, Mask);
748758

749759
Args.push_back(SubVal);
750760
// Create the final intrinsic call

llvm/test/CodeGen/DirectX/BufferStoreDouble.ll

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,8 @@ define void @storef64(double %0) {
1616
; CHECK: [[Hi:%.*]] = extractvalue { i32, i32 } [[SD]], 1
1717
; CHECK: [[Vec1:%.*]] = insertelement <2 x i32> poison, i32 [[Lo]], i32 0
1818
; CHECK: [[Vec2:%.*]] = insertelement <2 x i32> [[Vec1]], i32 [[Hi]], i32 1
19-
; this shufflevector is unnecessary but generated to avoid specalization
20-
; CHECK: [[Vec3:%.*]] = shufflevector <2 x i32> [[Vec2]], <2 x i32> poison, <2 x i32> <i32 0, i32 1>
2119
; CHECK: call void @llvm.dx.resource.store.typedbuffer.tdx.TypedBuffer_f64_1_0_0t.v2i32(
22-
; CHECK-SAME: target("dx.TypedBuffer", double, 1, 0, 0) [[B]], i32 0, <2 x i32> [[Vec3]])
20+
; CHECK-SAME: target("dx.TypedBuffer", double, 1, 0, 0) [[B]], i32 0, <2 x i32> [[Vec2]])
2321
call void @llvm.dx.resource.store.typedbuffer(
2422
target("dx.TypedBuffer", double, 1, 0, 0) %buffer, i32 0,
2523
double %0)
@@ -40,10 +38,8 @@ define void @storev2f64(<2 x double> %0) {
4038
; CHECK: [[Lo:%.*]] = extractvalue { <2 x i32>, <2 x i32> } [[SD]], 0
4139
; CHECK: [[Hi:%.*]] = extractvalue { <2 x i32>, <2 x i32> } [[SD]], 1
4240
; CHECK: [[Vec:%.*]] = shufflevector <2 x i32> [[Lo]], <2 x i32> [[Hi]], <4 x i32> <i32 0, i32 2, i32 1, i32 3>
43-
; this shufflevector is unnecessary but generated to avoid specalization
44-
; CHECK: [[Vec2:%.*]] = shufflevector <4 x i32> [[Vec]], <4 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4541
; CHECK: call void @llvm.dx.resource.store.typedbuffer.tdx.TypedBuffer_v2f64_1_0_0t.v4i32(
46-
; CHECK-SAME: target("dx.TypedBuffer", <2 x double>, 1, 0, 0) [[B]], i32 0, <4 x i32> [[Vec2]])
42+
; CHECK-SAME: target("dx.TypedBuffer", <2 x double>, 1, 0, 0) [[B]], i32 0, <4 x i32> [[Vec]])
4743
call void @llvm.dx.resource.store.typedbuffer(
4844
target("dx.TypedBuffer", <2 x double>, 1, 0, 0) %buffer, i32 0,
4945
<2 x double> %0)

llvm/test/CodeGen/DirectX/BufferStoreInt64.ll

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,7 @@ define void @storei64(i64 %0) {
1212
; CHECK-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
1313
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> poison, i32 [[TMP2]], i32 0
1414
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> [[TMP5]], i32 [[TMP4]], i32 1
15-
; the shufflevector is unnecessary but generated to avoid too much specalization
16-
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i32> [[TMP6]], <2 x i32> poison, <2 x i32> <i32 0, i32 1>
17-
; CHECK-NEXT: call void @llvm.dx.resource.store.typedbuffer.tdx.TypedBuffer_i64_1_0_0t.v2i32(target("dx.TypedBuffer", i64, 1, 0, 0) [[BUFFER]], i32 0, <2 x i32> [[TMP7]])
15+
; CHECK-NEXT: call void @llvm.dx.resource.store.typedbuffer.tdx.TypedBuffer_i64_1_0_0t.v2i32(target("dx.TypedBuffer", i64, 1, 0, 0) [[BUFFER]], i32 0, <2 x i32> [[TMP6]])
1816
; CHECK-NEXT: ret void
1917
;
2018
%buffer = tail call target("dx.TypedBuffer", i64, 1, 0, 0) @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_i64_1_0_0t(i32 0, i32 0, i32 1, i32 0, i1 false, ptr null)
@@ -31,9 +29,7 @@ define void @storev2i64(<2 x i64> %0) {
3129
; CHECK-NEXT: [[TMP3:%.*]] = lshr <2 x i64> [[TMP0]], splat (i64 32)
3230
; CHECK-NEXT: [[TMP4:%.*]] = trunc <2 x i64> [[TMP3]] to <2 x i32>
3331
; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP4]], <4 x i32> <i32 0, i32 2, i32 1, i32 3>
34-
; the shufflevector is unnecessary but generated to avoid too much specalization
35-
; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <4 x i32> [[TMP13]], <4 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
36-
; CHECK-NEXT: call void @llvm.dx.resource.store.typedbuffer.tdx.TypedBuffer_v2i64_1_0_0t.v4i32(target("dx.TypedBuffer", <2 x i64>, 1, 0, 0) [[BUFFER]], i32 0, <4 x i32> [[TMP14]])
32+
; CHECK-NEXT: call void @llvm.dx.resource.store.typedbuffer.tdx.TypedBuffer_v2i64_1_0_0t.v4i32(target("dx.TypedBuffer", <2 x i64>, 1, 0, 0) [[BUFFER]], i32 0, <4 x i32> [[TMP13]])
3733
; CHECK-NEXT: ret void
3834
;
3935
%buffer = tail call target("dx.TypedBuffer", <2 x i64>, 1, 0, 0) @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_v2i64_1_0_0t(i32 0, i32 0, i32 1, i32 0, i1 false, ptr null)

llvm/test/CodeGen/DirectX/RawBufferStoreDouble.ll

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,8 @@ define void @storef64(double %0, i32 %index) {
1616
; CHECK: [[Hi:%.*]] = extractvalue { i32, i32 } [[SD]], 1
1717
; CHECK: [[Vec1:%.*]] = insertelement <2 x i32> poison, i32 [[Lo]], i32 0
1818
; CHECK: [[Vec2:%.*]] = insertelement <2 x i32> [[Vec1]], i32 [[Hi]], i32 1
19-
; this shufflevector is unnecessary but generated to avoid specalization
20-
; CHECK: [[Vec3:%.*]] = shufflevector <2 x i32> [[Vec2]], <2 x i32> poison, <2 x i32> <i32 0, i32 1>
2119
; CHECK: call void @llvm.dx.resource.store.rawbuffer.tdx.RawBuffer_f64_1_0t.v2i32(
22-
; CHECK-SAME: target("dx.RawBuffer", double, 1, 0) [[B]], i32 %index, i32 0, <2 x i32> [[Vec3]])
20+
; CHECK-SAME: target("dx.RawBuffer", double, 1, 0) [[B]], i32 %index, i32 0, <2 x i32> [[Vec2]])
2321
call void @llvm.dx.resource.store.rawbuffer(
2422
target("dx.RawBuffer", double, 1, 0) %buffer, i32 %index, i32 0,
2523
double %0)
@@ -39,10 +37,8 @@ define void @storev2f64(<2 x double> %0, i32 %index) {
3937
; CHECK: [[Lo:%.*]] = extractvalue { <2 x i32>, <2 x i32> } [[SD]], 0
4038
; CHECK: [[Hi:%.*]] = extractvalue { <2 x i32>, <2 x i32> } [[SD]], 1
4139
; CHECK: [[Vec:%.*]] = shufflevector <2 x i32> [[Lo]], <2 x i32> [[Hi]], <4 x i32> <i32 0, i32 2, i32 1, i32 3>
42-
; this shufflevector is unnecessary but generated to avoid specalization
43-
; CHECK: [[Vec2:%.*]] = shufflevector <4 x i32> [[Vec]], <4 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4440
; CHECK: call void @llvm.dx.resource.store.rawbuffer.tdx.RawBuffer_v2f64_1_0t.v4i32(
45-
; CHECK-SAME: target("dx.RawBuffer", <2 x double>, 1, 0) [[B]], i32 %index, i32 0, <4 x i32> [[Vec2]])
41+
; CHECK-SAME: target("dx.RawBuffer", <2 x double>, 1, 0) [[B]], i32 %index, i32 0, <4 x i32> [[Vec]])
4642
call void @llvm.dx.resource.store.rawbuffer(
4743
target("dx.RawBuffer", <2 x double>, 1, 0) %buffer, i32 %index, i32 0,
4844
<2 x double> %0)

llvm/test/CodeGen/DirectX/RawBufferStoreInt64.ll

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,8 @@ define void @storei64(i64 %0, i32 %index) {
1616
; CHECK: [[C:%.*]] = trunc i64 [[B]] to i32
1717
; CHECK: [[Vec1:%.*]] = insertelement <2 x i32> poison, i32 [[A]], i32 0
1818
; CHECK: [[Vec2:%.*]] = insertelement <2 x i32> [[Vec1]], i32 [[C]], i32 1
19-
; this shufflevector is unnecessary but generated to avoid specalization
20-
; CHECK: [[Vec3:%.*]] = shufflevector <2 x i32> [[Vec2]], <2 x i32> poison, <2 x i32> <i32 0, i32 1>
2119
; CHECK: call void @llvm.dx.resource.store.rawbuffer.tdx.RawBuffer_i64_1_0t.v2i32(
22-
; CHECK-SAME: target("dx.RawBuffer", i64, 1, 0) [[Buf]], i32 %index, i32 0, <2 x i32> [[Vec3]])
20+
; CHECK-SAME: target("dx.RawBuffer", i64, 1, 0) [[Buf]], i32 %index, i32 0, <2 x i32> [[Vec2]])
2321
call void @llvm.dx.resource.store.rawbuffer(
2422
target("dx.RawBuffer", i64, 1, 0) %buffer, i32 %index, i32 0,
2523
i64 %0)
@@ -38,10 +36,8 @@ define void @storev2i64(<2 x i64> %0, i32 %index) {
3836
; CHECK: [[B:%.*]] = lshr <2 x i64> %0, splat (i64 32)
3937
; CHECK: [[C:%.*]] = trunc <2 x i64> [[B]] to <2 x i32>
4038
; CHECK: [[Vec:%.*]] = shufflevector <2 x i32> [[A]], <2 x i32> [[C]], <4 x i32> <i32 0, i32 2, i32 1, i32 3>
41-
; this shufflevector is unnecessary but generated to avoid specalization
42-
; CHECK: [[Vec2:%.*]] = shufflevector <4 x i32> [[Vec]], <4 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
4339
; CHECK: call void @llvm.dx.resource.store.rawbuffer.tdx.RawBuffer_v2i64_1_0t.v4i32(
44-
; CHECK-SAME: target("dx.RawBuffer", <2 x i64>, 1, 0) [[Buf]], i32 %index, i32 0, <4 x i32> [[Vec2]])
40+
; CHECK-SAME: target("dx.RawBuffer", <2 x i64>, 1, 0) [[Buf]], i32 %index, i32 0, <4 x i32> [[Vec]])
4541
call void @llvm.dx.resource.store.rawbuffer(
4642
target("dx.RawBuffer", <2 x i64>, 1, 0) %buffer, i32 %index, i32 0,
4743
<2 x i64> %0)

0 commit comments

Comments
 (0)