Skip to content

Commit 0ac9d2e

Browse files
Icohedronaadeshps-mcw
authored andcommitted
[DirectX] Simplify DXIL data scalarization, and data scalarize whole GEP chains (llvm#168096)
- The DXIL data scalarizer only needs to change vectors into arrays. It does not need to change the types of GEPs to match the pointer type. This PR simplifies the `visitGetElementPtrInst` method to do just that while also accounting for nested GEPs from ConstantExprs. (Before this PR, there were still vector types lingering in nested GEPs with ConstantExprs.) - The `equivalentArrayTypeFromVector` function was awkwardly placed near the top of the file and away from the other helper functions. The function is now moved next to the other helper functions. - Removed an unnecessary `||` condition from `isVectorOrArrayOfVectors` Related tests have also been cleaned up, and the test CHECKs have been modified to account for the new simplified behavior.
1 parent 84d8608 commit 0ac9d2e

File tree

4 files changed

+75
-103
lines changed

4 files changed

+75
-103
lines changed

llvm/lib/Target/DirectX/DXILDataScalarization.cpp

Lines changed: 48 additions & 83 deletions
Original file line numberDiff line numberDiff line change
@@ -29,20 +29,6 @@ static const int MaxVecSize = 4;
2929

3030
using namespace llvm;
3131

32-
// Recursively creates an array-like version of a given vector type.
33-
static Type *equivalentArrayTypeFromVector(Type *T) {
34-
if (auto *VecTy = dyn_cast<VectorType>(T))
35-
return ArrayType::get(VecTy->getElementType(),
36-
dyn_cast<FixedVectorType>(VecTy)->getNumElements());
37-
if (auto *ArrayTy = dyn_cast<ArrayType>(T)) {
38-
Type *NewElementType =
39-
equivalentArrayTypeFromVector(ArrayTy->getElementType());
40-
return ArrayType::get(NewElementType, ArrayTy->getNumElements());
41-
}
42-
// If it's not a vector or array, return the original type.
43-
return T;
44-
}
45-
4632
class DXILDataScalarizationLegacy : public ModulePass {
4733

4834
public:
@@ -121,12 +107,25 @@ DataScalarizerVisitor::lookupReplacementGlobal(Value *CurrOperand) {
121107
static bool isVectorOrArrayOfVectors(Type *T) {
122108
if (isa<VectorType>(T))
123109
return true;
124-
if (ArrayType *ArrType = dyn_cast<ArrayType>(T))
125-
return isa<VectorType>(ArrType->getElementType()) ||
126-
isVectorOrArrayOfVectors(ArrType->getElementType());
110+
if (ArrayType *ArrayTy = dyn_cast<ArrayType>(T))
111+
return isVectorOrArrayOfVectors(ArrayTy->getElementType());
127112
return false;
128113
}
129114

115+
// Recursively creates an array-like version of a given vector type.
116+
static Type *equivalentArrayTypeFromVector(Type *T) {
117+
if (auto *VecTy = dyn_cast<VectorType>(T))
118+
return ArrayType::get(VecTy->getElementType(),
119+
dyn_cast<FixedVectorType>(VecTy)->getNumElements());
120+
if (auto *ArrayTy = dyn_cast<ArrayType>(T)) {
121+
Type *NewElementType =
122+
equivalentArrayTypeFromVector(ArrayTy->getElementType());
123+
return ArrayType::get(NewElementType, ArrayTy->getNumElements());
124+
}
125+
// If it's not a vector or array, return the original type.
126+
return T;
127+
}
128+
130129
bool DataScalarizerVisitor::visitAllocaInst(AllocaInst &AI) {
131130
Type *AllocatedType = AI.getAllocatedType();
132131
if (!isVectorOrArrayOfVectors(AllocatedType))
@@ -135,7 +134,7 @@ bool DataScalarizerVisitor::visitAllocaInst(AllocaInst &AI) {
135134
IRBuilder<> Builder(&AI);
136135
Type *NewType = equivalentArrayTypeFromVector(AllocatedType);
137136
AllocaInst *ArrAlloca =
138-
Builder.CreateAlloca(NewType, nullptr, AI.getName() + ".scalarize");
137+
Builder.CreateAlloca(NewType, nullptr, AI.getName() + ".scalarized");
139138
ArrAlloca->setAlignment(AI.getAlign());
140139
AI.replaceAllUsesWith(ArrAlloca);
141140
AI.eraseFromParent();
@@ -303,78 +302,44 @@ bool DataScalarizerVisitor::visitExtractElementInst(ExtractElementInst &EEI) {
303302
bool DataScalarizerVisitor::visitGetElementPtrInst(GetElementPtrInst &GEPI) {
304303
GEPOperator *GOp = cast<GEPOperator>(&GEPI);
305304
Value *PtrOperand = GOp->getPointerOperand();
306-
Type *NewGEPType = GOp->getSourceElementType();
307-
308-
// Unwrap GEP ConstantExprs to find the base operand and element type
309-
while (auto *GEPCE = dyn_cast_or_null<GEPOperator>(
310-
dyn_cast<ConstantExpr>(PtrOperand))) {
311-
GOp = GEPCE;
312-
PtrOperand = GEPCE->getPointerOperand();
313-
NewGEPType = GEPCE->getSourceElementType();
314-
}
315-
316-
Type *const OrigGEPType = NewGEPType;
317-
Value *const OrigOperand = PtrOperand;
318-
319-
if (GlobalVariable *NewGlobal = lookupReplacementGlobal(PtrOperand)) {
320-
NewGEPType = NewGlobal->getValueType();
321-
PtrOperand = NewGlobal;
322-
} else if (AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrOperand)) {
323-
Type *AllocatedType = Alloca->getAllocatedType();
324-
if (isa<ArrayType>(AllocatedType) &&
325-
AllocatedType != GOp->getResultElementType())
326-
NewGEPType = AllocatedType;
327-
} else
328-
return false; // Only GEPs into an alloca or global variable are considered
329-
330-
// Defer changing i8 GEP types until dxil-flatten-arrays
331-
if (OrigGEPType->isIntegerTy(8))
332-
NewGEPType = OrigGEPType;
333-
334-
// If the original type is a "sub-type" of the new type, then ensure the gep
335-
// correctly zero-indexes the extra dimensions to keep the offset calculation
336-
// correct.
337-
// Eg:
338-
// i32, [4 x i32] and [8 x [4 x i32]] are sub-types of [8 x [4 x i32]], etc.
339-
//
340-
// So then:
341-
// gep [4 x i32] %idx
342-
// -> gep [8 x [4 x i32]], i32 0, i32 %idx
343-
// gep i32 %idx
344-
// -> gep [8 x [4 x i32]], i32 0, i32 0, i32 %idx
345-
uint32_t MissingDims = 0;
346-
Type *SubType = NewGEPType;
347-
348-
// The new type will be in its array version; so match accordingly.
349-
Type *const GEPArrType = equivalentArrayTypeFromVector(OrigGEPType);
350-
351-
while (SubType != GEPArrType) {
352-
MissingDims++;
353-
354-
ArrayType *ArrType = dyn_cast<ArrayType>(SubType);
355-
if (!ArrType) {
356-
assert(SubType == GEPArrType &&
357-
"GEP uses an DXIL invalid sub-type of alloca/global variable");
358-
break;
359-
}
360-
361-
SubType = ArrType->getElementType();
305+
Type *GEPType = GOp->getSourceElementType();
306+
307+
// Replace a GEP ConstantExpr pointer operand with a GEP instruction so that
308+
// it can be visited
309+
if (auto *PtrOpGEPCE = dyn_cast<ConstantExpr>(PtrOperand);
310+
PtrOpGEPCE && PtrOpGEPCE->getOpcode() == Instruction::GetElementPtr) {
311+
GetElementPtrInst *OldGEPI =
312+
cast<GetElementPtrInst>(PtrOpGEPCE->getAsInstruction());
313+
OldGEPI->insertBefore(GEPI.getIterator());
314+
315+
IRBuilder<> Builder(&GEPI);
316+
SmallVector<Value *> Indices(GEPI.indices());
317+
Value *NewGEP =
318+
Builder.CreateGEP(GEPI.getSourceElementType(), OldGEPI, Indices,
319+
GEPI.getName(), GEPI.getNoWrapFlags());
320+
assert(isa<GetElementPtrInst>(NewGEP) &&
321+
"Expected newly-created GEP to be an instruction");
322+
GetElementPtrInst *NewGEPI = cast<GetElementPtrInst>(NewGEP);
323+
324+
GEPI.replaceAllUsesWith(NewGEPI);
325+
GEPI.eraseFromParent();
326+
visitGetElementPtrInst(*OldGEPI);
327+
visitGetElementPtrInst(*NewGEPI);
328+
return true;
362329
}
363330

364-
bool NeedsTransform = OrigOperand != PtrOperand ||
365-
OrigGEPType != NewGEPType || MissingDims != 0;
331+
Type *NewGEPType = equivalentArrayTypeFromVector(GEPType);
332+
Value *NewPtrOperand = PtrOperand;
333+
if (GlobalVariable *NewGlobal = lookupReplacementGlobal(PtrOperand))
334+
NewPtrOperand = NewGlobal;
366335

336+
bool NeedsTransform = NewPtrOperand != PtrOperand || NewGEPType != GEPType;
367337
if (!NeedsTransform)
368338
return false;
369339

370340
IRBuilder<> Builder(&GEPI);
371-
SmallVector<Value *, MaxVecSize> Indices;
372-
373-
for (uint32_t I = 0; I < MissingDims; I++)
374-
Indices.push_back(Builder.getInt32(0));
375-
llvm::append_range(Indices, GOp->indices());
376-
377-
Value *NewGEP = Builder.CreateGEP(NewGEPType, PtrOperand, Indices,
341+
SmallVector<Value *, MaxVecSize> Indices(GOp->idx_begin(), GOp->idx_end());
342+
Value *NewGEP = Builder.CreateGEP(NewGEPType, NewPtrOperand, Indices,
378343
GOp->getName(), GOp->getNoWrapFlags());
379344

380345
GOp->replaceAllUsesWith(NewGEP);

llvm/test/CodeGen/DirectX/bugfix_150050_data_scalarize_const_gep.ll

Lines changed: 21 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,10 @@ define void @CSMain() {
1111
; CHECK-NEXT: [[ENTRY:.*:]]
1212
; CHECK-NEXT: [[AFRAGPACKED_I_SCALARIZE:%.*]] = alloca [4 x i32], align 16
1313
;
14-
; SCHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [10 x <4 x i32>], ptr addrspace(3) getelementptr inbounds ([10 x [10 x [4 x i32]]], ptr addrspace(3) @aTile.scalarized, i32 0, i32 1), i32 0, i32 2
15-
; SCHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr addrspace(3) [[TMP0]], align 16
16-
; SCHECK-NEXT: store <4 x i32> [[TMP1]], ptr [[AFRAGPACKED_I_SCALARIZE]], align 16
14+
; SCHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds [10 x [10 x [4 x i32]]], ptr addrspace(3) @aTile.scalarized, i32 0, i32 1
15+
; SCHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds [10 x [4 x i32]], ptr addrspace(3) [[GEP0]], i32 0, i32 2
16+
; SCHECK-NEXT: [[LOAD:%.*]] = load <4 x i32>, ptr addrspace(3) [[GEP1]], align 16
17+
; SCHECK-NEXT: store <4 x i32> [[LOAD]], ptr [[AFRAGPACKED_I_SCALARIZE]], align 16
1718
;
1819
; FCHECK-NEXT: [[AFRAGPACKED_I_SCALARIZE_I14:%.*]] = getelementptr [4 x i32], ptr [[AFRAGPACKED_I_SCALARIZE]], i32 0, i32 1
1920
; FCHECK-NEXT: [[AFRAGPACKED_I_SCALARIZE_I25:%.*]] = getelementptr [4 x i32], ptr [[AFRAGPACKED_I_SCALARIZE]], i32 0, i32 2
@@ -40,12 +41,13 @@ define void @Main() {
4041
; CHECK-NEXT: [[ENTRY:.*:]]
4142
; CHECK-NEXT: [[BFRAGPACKED_I:%.*]] = alloca i32, align 16
4243
;
43-
; SCHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [10 x i32], ptr addrspace(3) getelementptr inbounds ([10 x [10 x i32]], ptr addrspace(3) @bTile, i32 0, i32 1), i32 0, i32 1
44-
; SCHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(3) [[TMP0]], align 16
45-
; SCHECK-NEXT: store i32 [[TMP1]], ptr [[BFRAGPACKED_I]], align 16
44+
; SCHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr addrspace(3) @bTile, i32 0, i32 1
45+
; SCHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds [10 x i32], ptr addrspace(3) [[GEP0]], i32 0, i32 1
46+
; SCHECK-NEXT: [[LOAD:%.*]] = load i32, ptr addrspace(3) [[GEP1]], align 16
47+
; SCHECK-NEXT: store i32 [[LOAD]], ptr [[BFRAGPACKED_I]], align 16
4648
;
47-
; FCHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(3) getelementptr inbounds ([100 x i32], ptr addrspace(3) @bTile.1dim, i32 0, i32 11), align 16
48-
; FCHECK-NEXT: store i32 [[TMP0]], ptr [[BFRAGPACKED_I]], align 16
49+
; FCHECK-NEXT: [[LOAD:%.*]] = load i32, ptr addrspace(3) getelementptr inbounds ([100 x i32], ptr addrspace(3) @bTile.1dim, i32 0, i32 11), align 16
50+
; FCHECK-NEXT: store i32 [[LOAD]], ptr [[BFRAGPACKED_I]], align 16
4951
;
5052
; CHECK-NEXT: ret void
5153
entry:
@@ -57,10 +59,12 @@ entry:
5759

5860
define void @global_nested_geps_3d() {
5961
; CHECK-LABEL: define void @global_nested_geps_3d() {
60-
; SCHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <2 x i32>, ptr getelementptr inbounds ([2 x <2 x i32>], ptr getelementptr inbounds ([2 x [2 x [2 x i32]]], ptr @cTile.scalarized, i32 0, i32 1), i32 0, i32 1), i32 0, i32 1
61-
; SCHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4
62+
; SCHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds [2 x [2 x [2 x i32]]], ptr @cTile.scalarized, i32 0, i32 1
63+
; SCHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds [2 x [2 x i32]], ptr [[GEP0]], i32 0, i32 1
64+
; SCHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds [2 x i32], ptr [[GEP1]], i32 0, i32 1
65+
; SCHECK-NEXT: load i32, ptr [[GEP2]], align 4
6266
;
63-
; FCHECK-NEXT: [[TMP1:%.*]] = load i32, ptr getelementptr inbounds ([8 x i32], ptr @cTile.scalarized.1dim, i32 0, i32 7), align 4
67+
; FCHECK-NEXT: load i32, ptr getelementptr inbounds ([8 x i32], ptr @cTile.scalarized.1dim, i32 0, i32 7), align 4
6468
;
6569
; CHECK-NEXT: ret void
6670
%1 = load i32, i32* getelementptr inbounds (<2 x i32>, <2 x i32>* getelementptr inbounds ([2 x <2 x i32>], [2 x <2 x i32>]* getelementptr inbounds ([2 x [2 x <2 x i32>]], [2 x [2 x <2 x i32>]]* @cTile, i32 0, i32 1), i32 0, i32 1), i32 0, i32 1), align 4
@@ -69,10 +73,13 @@ define void @global_nested_geps_3d() {
6973

7074
define void @global_nested_geps_4d() {
7175
; CHECK-LABEL: define void @global_nested_geps_4d() {
72-
; SCHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds <2 x i32>, ptr getelementptr inbounds ([2 x <2 x i32>], ptr getelementptr inbounds ([2 x [2 x <2 x i32>]], ptr getelementptr inbounds ([2 x [2 x [2 x [2 x i32]]]], ptr @dTile.scalarized, i32 0, i32 1), i32 0, i32 1), i32 0, i32 1), i32 0, i32 1
73-
; SCHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4
76+
; SCHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds [2 x [2 x [2 x [2 x i32]]]], ptr @dTile.scalarized, i32 0, i32 1
77+
; SCHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds [2 x [2 x [2 x i32]]], ptr [[GEP0]], i32 0, i32 1
78+
; SCHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds [2 x [2 x i32]], ptr [[GEP1]], i32 0, i32 1
79+
; SCHECK-NEXT: [[GEP3:%.*]] = getelementptr inbounds [2 x i32], ptr [[GEP2]], i32 0, i32 1
80+
; SCHECK-NEXT: load i32, ptr [[GEP3]], align 4
7481
;
75-
; FCHECK-NEXT: [[TMP1:%.*]] = load i32, ptr getelementptr inbounds ([16 x i32], ptr @dTile.scalarized.1dim, i32 0, i32 15), align 4
82+
; FCHECK-NEXT: load i32, ptr getelementptr inbounds ([16 x i32], ptr @dTile.scalarized.1dim, i32 0, i32 15), align 4
7683
;
7784
; CHECK-NEXT: ret void
7885
%1 = load i32, i32* getelementptr inbounds (<2 x i32>, <2 x i32>* getelementptr inbounds ([2 x <2 x i32>], [2 x <2 x i32>]* getelementptr inbounds ([2 x [2 x <2 x i32>]], [2 x [2 x <2 x i32>]]* getelementptr inbounds ([2 x [2 x [2 x <2 x i32>]]], [2 x [2 x [2 x <2 x i32>]]]* @dTile, i32 0, i32 1), i32 0, i32 1), i32 0, i32 1), i32 0, i32 1), align 4

llvm/test/CodeGen/DirectX/scalarize-alloca.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ define void @subtype_array_test() {
4848
; SCHECK: [[alloca_val:%.*]] = alloca [8 x [4 x i32]], align 4
4949
; FCHECK: [[alloca_val:%.*]] = alloca [32 x i32], align 4
5050
; CHECK: [[tid:%.*]] = tail call i32 @llvm.dx.thread.id(i32 0)
51-
; SCHECK: [[gep:%.*]] = getelementptr inbounds nuw [8 x [4 x i32]], ptr [[alloca_val]], i32 0, i32 [[tid]]
51+
; SCHECK: [[gep:%.*]] = getelementptr inbounds nuw [4 x i32], ptr [[alloca_val]], i32 [[tid]]
5252
; FCHECK: [[flatidx_mul:%.*]] = mul i32 [[tid]], 4
5353
; FCHECK: [[flatidx:%.*]] = add i32 0, [[flatidx_mul]]
5454
; FCHECK: [[gep:%.*]] = getelementptr inbounds nuw [32 x i32], ptr [[alloca_val]], i32 0, i32 [[flatidx]]
@@ -64,7 +64,7 @@ define void @subtype_vector_test() {
6464
; SCHECK: [[alloca_val:%.*]] = alloca [8 x [4 x i32]], align 4
6565
; FCHECK: [[alloca_val:%.*]] = alloca [32 x i32], align 4
6666
; CHECK: [[tid:%.*]] = tail call i32 @llvm.dx.thread.id(i32 0)
67-
; SCHECK: [[gep:%.*]] = getelementptr inbounds nuw [8 x [4 x i32]], ptr [[alloca_val]], i32 0, i32 [[tid]]
67+
; SCHECK: [[gep:%.*]] = getelementptr inbounds nuw [4 x i32], ptr [[alloca_val]], i32 [[tid]]
6868
; FCHECK: [[flatidx_mul:%.*]] = mul i32 [[tid]], 4
6969
; FCHECK: [[flatidx:%.*]] = add i32 0, [[flatidx_mul]]
7070
; FCHECK: [[gep:%.*]] = getelementptr inbounds nuw [32 x i32], ptr [[alloca_val]], i32 0, i32 [[flatidx]]
@@ -80,7 +80,7 @@ define void @subtype_scalar_test() {
8080
; SCHECK: [[alloca_val:%.*]] = alloca [8 x [4 x i32]], align 4
8181
; FCHECK: [[alloca_val:%.*]] = alloca [32 x i32], align 4
8282
; CHECK: [[tid:%.*]] = tail call i32 @llvm.dx.thread.id(i32 0)
83-
; SCHECK: [[gep:%.*]] = getelementptr inbounds nuw [8 x [4 x i32]], ptr [[alloca_val]], i32 0, i32 0, i32 [[tid]]
83+
; SCHECK: [[gep:%.*]] = getelementptr inbounds nuw i32, ptr [[alloca_val]], i32 [[tid]]
8484
; FCHECK: [[flatidx_mul:%.*]] = mul i32 [[tid]], 1
8585
; FCHECK: [[flatidx:%.*]] = add i32 0, [[flatidx_mul]]
8686
; FCHECK: [[gep:%.*]] = getelementptr inbounds nuw [32 x i32], ptr [[alloca_val]], i32 0, i32 [[flatidx]]

llvm/test/CodeGen/DirectX/scalarize-global.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
; CHECK-LABEL: subtype_array_test
1212
define <4 x i32> @subtype_array_test() {
1313
; CHECK: [[tid:%.*]] = tail call i32 @llvm.dx.thread.id(i32 0)
14-
; SCHECK: [[gep:%.*]] = getelementptr inbounds nuw [8 x [4 x i32]], ptr addrspace(3) [[arrayofVecData]], i32 0, i32 [[tid]]
14+
; SCHECK: [[gep:%.*]] = getelementptr inbounds nuw [4 x i32], ptr addrspace(3) [[arrayofVecData]], i32 [[tid]]
1515
; FCHECK: [[flatidx_mul:%.*]] = mul i32 [[tid]], 4
1616
; FCHECK: [[flatidx:%.*]] = add i32 0, [[flatidx_mul]]
1717
; FCHECK: [[gep:%.*]] = getelementptr inbounds nuw [32 x i32], ptr addrspace(3) [[arrayofVecData]], i32 0, i32 [[flatidx]]
@@ -26,7 +26,7 @@ define <4 x i32> @subtype_array_test() {
2626
; CHECK-LABEL: subtype_vector_test
2727
define <4 x i32> @subtype_vector_test() {
2828
; CHECK: [[tid:%.*]] = tail call i32 @llvm.dx.thread.id(i32 0)
29-
; SCHECK: [[gep:%.*]] = getelementptr inbounds nuw [8 x [4 x i32]], ptr addrspace(3) [[arrayofVecData]], i32 0, i32 [[tid]]
29+
; SCHECK: [[gep:%.*]] = getelementptr inbounds nuw [4 x i32], ptr addrspace(3) [[arrayofVecData]], i32 [[tid]]
3030
; FCHECK: [[flatidx_mul:%.*]] = mul i32 [[tid]], 4
3131
; FCHECK: [[flatidx:%.*]] = add i32 0, [[flatidx_mul]]
3232
; FCHECK: [[gep:%.*]] = getelementptr inbounds nuw [32 x i32], ptr addrspace(3) [[arrayofVecData]], i32 0, i32 [[flatidx]]
@@ -41,7 +41,7 @@ define <4 x i32> @subtype_vector_test() {
4141
; CHECK-LABEL: subtype_scalar_test
4242
define <4 x i32> @subtype_scalar_test() {
4343
; CHECK: [[tid:%.*]] = tail call i32 @llvm.dx.thread.id(i32 0)
44-
; SCHECK: [[gep:%.*]] = getelementptr inbounds nuw [8 x [4 x i32]], ptr addrspace(3) [[arrayofVecData]], i32 0, i32 0, i32 [[tid]]
44+
; SCHECK: [[gep:%.*]] = getelementptr inbounds nuw i32, ptr addrspace(3) [[arrayofVecData]], i32 [[tid]]
4545
; FCHECK: [[flatidx_mul:%.*]] = mul i32 [[tid]], 1
4646
; FCHECK: [[flatidx:%.*]] = add i32 0, [[flatidx_mul]]
4747
; FCHECK: [[gep:%.*]] = getelementptr inbounds nuw [32 x i32], ptr addrspace(3) [[arrayofVecData]], i32 0, i32 [[flatidx]]

0 commit comments

Comments
 (0)