Skip to content

Commit 689e958

Browse files
authored
[DirectX] Add a GEP to scalar load/store on globals and remove incorrect assertion (#149191)
Fixes #149180 This PR removes an assertion that triggered on valid IR. It has been replaced with an if statement that returns early if the conditions are not correct. This PR also adds GEPs to scalar loads and stores from/to global variables.
1 parent 5d78332 commit 689e958

File tree

4 files changed

+50
-19
lines changed

4 files changed

+50
-19
lines changed

llvm/lib/Target/DirectX/DXILLegalizePass.cpp

Lines changed: 22 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -563,7 +563,7 @@ legalizeGetHighLowi64Bytes(Instruction &I,
563563
}
564564

565565
static void
566-
legalizeLoadStoreOnArrayAllocas(Instruction &I,
566+
legalizeScalarLoadStoreOnArrays(Instruction &I,
567567
SmallVectorImpl<Instruction *> &ToRemove,
568568
DenseMap<Value *, Value *> &) {
569569

@@ -581,23 +581,31 @@ legalizeLoadStoreOnArrayAllocas(Instruction &I,
581581
} else
582582
return;
583583

584-
assert(LoadStoreTy->isSingleValueType() &&
585-
"Expected load/store type to be a single-valued type");
584+
// If the load/store is not of a single-value type (i.e., scalar or vector)
585+
// then we do not modify it. It shouldn't be a vector either because the
586+
// dxil-data-scalarization pass is expected to run before this, but it's not
587+
// incorrect to apply this transformation to vector load/stores.
588+
if (!LoadStoreTy->isSingleValueType())
589+
return;
586590

587-
auto *AllocaPtrOp = dyn_cast<AllocaInst>(PtrOp);
588-
if (!AllocaPtrOp)
591+
Type *ArrayTy;
592+
if (auto *GlobalVarPtrOp = dyn_cast<GlobalVariable>(PtrOp))
593+
ArrayTy = GlobalVarPtrOp->getValueType();
594+
else if (auto *AllocaPtrOp = dyn_cast<AllocaInst>(PtrOp))
595+
ArrayTy = AllocaPtrOp->getAllocatedType();
596+
else
589597
return;
590598

591-
Type *Ty = AllocaPtrOp->getAllocatedType();
592-
if (!isa<ArrayType>(Ty))
599+
if (!isa<ArrayType>(ArrayTy))
593600
return;
594-
assert(!isa<ArrayType>(Ty->getArrayElementType()) &&
595-
"Expected allocated type of AllocaInst to be a flat ArrayType");
596601

597-
IRBuilder<> Builder(&I);
598-
Value *Zero = Builder.getInt32(0);
599-
Value *GEP = Builder.CreateGEP(Ty, AllocaPtrOp, {Zero, Zero}, "",
600-
GEPNoWrapFlags::all());
602+
assert(ArrayTy->getArrayElementType() == LoadStoreTy &&
603+
"Expected array element type to be the same as to the scalar load or "
604+
"store type");
605+
606+
Value *Zero = ConstantInt::get(Type::getInt32Ty(I.getContext()), 0);
607+
Value *GEP = GetElementPtrInst::Create(
608+
ArrayTy, PtrOp, {Zero, Zero}, GEPNoWrapFlags::all(), "", I.getIterator());
601609
I.setOperand(PtrOpIndex, GEP);
602610
}
603611

@@ -651,7 +659,7 @@ class DXILLegalizationPipeline {
651659
// downcastI64toI32InsertExtractElements needs to handle.
652660
LegalizationPipeline[Stage2].push_back(
653661
downcastI64toI32InsertExtractElements);
654-
LegalizationPipeline[Stage2].push_back(legalizeLoadStoreOnArrayAllocas);
662+
LegalizationPipeline[Stage2].push_back(legalizeScalarLoadStoreOnArrays);
655663
}
656664
};
657665

llvm/test/CodeGen/DirectX/legalize-load-store-array-alloca.ll

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,3 +21,21 @@ define void @store() {
2121
store i32 0, ptr %a, align 4
2222
ret void
2323
}
24+
25+
@g = local_unnamed_addr addrspace(3) global [4 x i32] zeroinitializer, align 4
26+
define void @load_whole_global () {
27+
; CHECK-LABEL: define void @load_whole_global
28+
; CHECK-NEXT: load [4 x i32], ptr addrspace(3) @g, align 4
29+
; CHECK-NEXT: ret void
30+
%l = load [4 x i32], ptr addrspace(3) @g, align 4
31+
ret void
32+
}
33+
34+
define void @load_global_index0 () {
35+
; CHECK-LABEL: define void @load_global_index0
36+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds nuw [4 x i32], ptr addrspace(3) @g, i32 0, i32 0
37+
; CHECK-NEXT: load i32, ptr addrspace(3) [[GEP]], align 4
38+
; CHECK-NEXT: ret void
39+
%l = load i32, ptr addrspace(3) @g, align 4
40+
ret void
41+
}

llvm/test/CodeGen/DirectX/llc-vector-load-scalarize.ll

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,8 @@
2424
define <4 x i32> @load_array_vec_test() #0 {
2525
; CHECK-LABEL: define <4 x i32> @load_array_vec_test(
2626
; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
27-
; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(3) @arrayofVecData.scalarized.1dim, align 4
27+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds nuw [8 x i32], ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 0, i32 0
28+
; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(3) [[GEP]], align 4
2829
; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(3) getelementptr ([8 x i32], ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 0, i32 1), align 4
2930
; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(3) getelementptr ([8 x i32], ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 0, i32 2), align 4
3031
; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(3) getelementptr ([8 x i32], ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 0, i32 3), align 4
@@ -52,7 +53,8 @@ define <4 x i32> @load_array_vec_test() #0 {
5253
define <4 x i32> @load_vec_test() #0 {
5354
; CHECK-LABEL: define <4 x i32> @load_vec_test(
5455
; CHECK-SAME: ) #[[ATTR0]] {
55-
; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(3) @vecData.scalarized, align 4
56+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds nuw [4 x i32], ptr addrspace(3) @vecData.scalarized, i32 0, i32 0
57+
; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(3) [[GEP]], align 4
5658
; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(3) getelementptr ([4 x i32], ptr addrspace(3) @vecData.scalarized, i32 0, i32 1), align 4
5759
; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(3) getelementptr ([4 x i32], ptr addrspace(3) @vecData.scalarized, i32 0, i32 2), align 4
5860
; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(3) getelementptr ([4 x i32], ptr addrspace(3) @vecData.scalarized, i32 0, i32 3), align 4
@@ -203,7 +205,8 @@ define <4 x i32> @load_static_array_of_vec_from_i8_gep_test(i32 %index) #0 {
203205
define <4 x i32> @multid_load_test() #0 {
204206
; CHECK-LABEL: define <4 x i32> @multid_load_test(
205207
; CHECK-SAME: ) #[[ATTR0]] {
206-
; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(3) @groupshared2dArrayofVectors.scalarized.1dim, align 4
208+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds nuw [36 x i32], ptr addrspace(3) @groupshared2dArrayofVectors.scalarized.1dim, i32 0, i32 0
209+
; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(3) [[GEP]], align 4
207210
; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(3) getelementptr ([36 x i32], ptr addrspace(3) @groupshared2dArrayofVectors.scalarized.1dim, i32 0, i32 1), align 4
208211
; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(3) getelementptr ([36 x i32], ptr addrspace(3) @groupshared2dArrayofVectors.scalarized.1dim, i32 0, i32 2), align 4
209212
; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(3) getelementptr ([36 x i32], ptr addrspace(3) @groupshared2dArrayofVectors.scalarized.1dim, i32 0, i32 3), align 4

llvm/test/CodeGen/DirectX/scalar-store.ll

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,8 @@
1414

1515
; CHECK-LABEL: store_array_vec_test
1616
define void @store_array_vec_test () local_unnamed_addr #0 {
17-
; CHECK-NEXT: store float 1.000000e+00, ptr addrspace(3) @arrayofVecData.scalarized.1dim, align 16
17+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds nuw [6 x float], ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 0, i32 0
18+
; CHECK-NEXT: store float 1.000000e+00, ptr addrspace(3) [[GEP]], align 16
1819
; CHECK-NEXT: store float 2.000000e+00, ptr addrspace(3) getelementptr ([6 x float], ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 0, i32 1), align 4
1920
; CHECK-NEXT: store float 3.000000e+00, ptr addrspace(3) getelementptr ([6 x float], ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 0, i32 2), align 8
2021
; CHECK-NEXT: store float 2.000000e+00, ptr addrspace(3) getelementptr inbounds ([6 x float], ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 0, i32 3), align 16
@@ -30,7 +31,8 @@ define void @store_array_vec_test () local_unnamed_addr #0 {
3031
; CHECK-LABEL: store_vec_test
3132
define void @store_vec_test(<4 x i32> %inputVec) #0 {
3233
; CHECK-NEXT: [[INPUTVEC_I01:%.*]] = extractelement <4 x i32> %inputVec, i32 0
33-
; CHECK-NEXT: store i32 [[INPUTVEC_I01]], ptr addrspace(3) @vecData.scalarized, align 4
34+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds nuw [4 x i32], ptr addrspace(3) @vecData.scalarized, i32 0, i32 0
35+
; CHECK-NEXT: store i32 [[INPUTVEC_I01]], ptr addrspace(3) [[GEP]], align 4
3436
; CHECK-NEXT: [[INPUTVEC_I12:%.*]] = extractelement <4 x i32> %inputVec, i32 1
3537
; CHECK-NEXT: store i32 [[INPUTVEC_I12]], ptr addrspace(3) getelementptr ([4 x i32], ptr addrspace(3) @vecData.scalarized, i32 0, i32 1), align 4
3638
; CHECK-NEXT: [[INPUTVEC_I23:%.*]] = extractelement <4 x i32> %inputVec, i32 2

0 commit comments

Comments
 (0)