Skip to content

Commit 6714d7e

Browse files
committed
[DirectX] Don't limit visitGetElementPtrInst to global ptrs
fixes #144608 - there is a getPointerOperandIndex function so we don't need to iterate the operands trying to find the pointer. This resulted in a small cleanup to visitStoreInst and visitLoadInst. - The meat of this change was in visitGetElementPtrInst to account for allocas and not bail when we don't find a global.
1 parent 53ea522 commit 6714d7e

File tree

2 files changed

+72
-51
lines changed

2 files changed

+72
-51
lines changed

llvm/lib/Target/DirectX/DXILDataScalarization.cpp

Lines changed: 55 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,13 @@
1414
#include "llvm/IR/GlobalVariable.h"
1515
#include "llvm/IR/IRBuilder.h"
1616
#include "llvm/IR/InstVisitor.h"
17+
#include "llvm/IR/Instructions.h"
1718
#include "llvm/IR/Module.h"
1819
#include "llvm/IR/Operator.h"
1920
#include "llvm/IR/PassManager.h"
2021
#include "llvm/IR/ReplaceConstant.h"
2122
#include "llvm/IR/Type.h"
23+
#include "llvm/Support/Casting.h"
2224
#include "llvm/Transforms/Utils/Cloning.h"
2325
#include "llvm/Transforms/Utils/Local.h"
2426

@@ -127,71 +129,75 @@ bool DataScalarizerVisitor::visitAllocaInst(AllocaInst &AI) {
127129
}
128130

129131
bool DataScalarizerVisitor::visitLoadInst(LoadInst &LI) {
130-
unsigned NumOperands = LI.getNumOperands();
131-
for (unsigned I = 0; I < NumOperands; ++I) {
132-
Value *CurrOpperand = LI.getOperand(I);
133-
ConstantExpr *CE = dyn_cast<ConstantExpr>(CurrOpperand);
134-
if (CE && CE->getOpcode() == Instruction::GetElementPtr) {
135-
GetElementPtrInst *OldGEP =
136-
cast<GetElementPtrInst>(CE->getAsInstruction());
137-
OldGEP->insertBefore(LI.getIterator());
138-
IRBuilder<> Builder(&LI);
139-
LoadInst *NewLoad =
140-
Builder.CreateLoad(LI.getType(), OldGEP, LI.getName());
141-
NewLoad->setAlignment(LI.getAlign());
142-
LI.replaceAllUsesWith(NewLoad);
143-
LI.eraseFromParent();
144-
visitGetElementPtrInst(*OldGEP);
145-
return true;
146-
}
147-
if (GlobalVariable *NewGlobal = lookupReplacementGlobal(CurrOpperand))
148-
LI.setOperand(I, NewGlobal);
132+
Value *PtrOperand = LI.getPointerOperand();
133+
ConstantExpr *CE = dyn_cast<ConstantExpr>(PtrOperand);
134+
if (CE && CE->getOpcode() == Instruction::GetElementPtr) {
135+
GetElementPtrInst *OldGEP = cast<GetElementPtrInst>(CE->getAsInstruction());
136+
OldGEP->insertBefore(LI.getIterator());
137+
IRBuilder<> Builder(&LI);
138+
LoadInst *NewLoad = Builder.CreateLoad(LI.getType(), OldGEP, LI.getName());
139+
NewLoad->setAlignment(LI.getAlign());
140+
LI.replaceAllUsesWith(NewLoad);
141+
LI.eraseFromParent();
142+
visitGetElementPtrInst(*OldGEP);
143+
return true;
149144
}
145+
if (GlobalVariable *NewGlobal = lookupReplacementGlobal(PtrOperand))
146+
LI.setOperand(LI.getPointerOperandIndex(), NewGlobal);
150147
return false;
151148
}
152149

153150
bool DataScalarizerVisitor::visitStoreInst(StoreInst &SI) {
154-
unsigned NumOperands = SI.getNumOperands();
155-
for (unsigned I = 0; I < NumOperands; ++I) {
156-
Value *CurrOpperand = SI.getOperand(I);
157-
ConstantExpr *CE = dyn_cast<ConstantExpr>(CurrOpperand);
158-
if (CE && CE->getOpcode() == Instruction::GetElementPtr) {
159-
GetElementPtrInst *OldGEP =
160-
cast<GetElementPtrInst>(CE->getAsInstruction());
161-
OldGEP->insertBefore(SI.getIterator());
162-
IRBuilder<> Builder(&SI);
163-
StoreInst *NewStore = Builder.CreateStore(SI.getValueOperand(), OldGEP);
164-
NewStore->setAlignment(SI.getAlign());
165-
SI.replaceAllUsesWith(NewStore);
166-
SI.eraseFromParent();
167-
visitGetElementPtrInst(*OldGEP);
168-
return true;
169-
}
170-
if (GlobalVariable *NewGlobal = lookupReplacementGlobal(CurrOpperand))
171-
SI.setOperand(I, NewGlobal);
151+
152+
Value *PtrOperand = SI.getPointerOperand();
153+
ConstantExpr *CE = dyn_cast<ConstantExpr>(PtrOperand);
154+
if (CE && CE->getOpcode() == Instruction::GetElementPtr) {
155+
GetElementPtrInst *OldGEP = cast<GetElementPtrInst>(CE->getAsInstruction());
156+
OldGEP->insertBefore(SI.getIterator());
157+
IRBuilder<> Builder(&SI);
158+
StoreInst *NewStore = Builder.CreateStore(SI.getValueOperand(), OldGEP);
159+
NewStore->setAlignment(SI.getAlign());
160+
SI.replaceAllUsesWith(NewStore);
161+
SI.eraseFromParent();
162+
visitGetElementPtrInst(*OldGEP);
163+
return true;
172164
}
165+
if (GlobalVariable *NewGlobal = lookupReplacementGlobal(PtrOperand))
166+
SI.setOperand(SI.getPointerOperandIndex(), NewGlobal);
167+
173168
return false;
174169
}
175170

176171
bool DataScalarizerVisitor::visitGetElementPtrInst(GetElementPtrInst &GEPI) {
177-
178-
unsigned NumOperands = GEPI.getNumOperands();
179-
GlobalVariable *NewGlobal = nullptr;
180-
for (unsigned I = 0; I < NumOperands; ++I) {
181-
Value *CurrOpperand = GEPI.getOperand(I);
182-
NewGlobal = lookupReplacementGlobal(CurrOpperand);
183-
if (NewGlobal)
184-
break;
172+
Value *PtrOperand = GEPI.getPointerOperand();
173+
Type *OrigGEPType = GEPI.getPointerOperandType();
174+
Type *NewGEPType = OrigGEPType;
175+
bool NeedsTransform = false;
176+
177+
if (GlobalVariable *NewGlobal = lookupReplacementGlobal(PtrOperand)) {
178+
NewGEPType = NewGlobal->getValueType();
179+
PtrOperand = NewGlobal;
180+
NeedsTransform = true;
181+
} else if (AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrOperand)) {
182+
Type *AllocatedType = Alloca->getAllocatedType();
183+
// OrigGEPType might just be a pointer lets make sure
184+
// to add the allocated type so we have a size
185+
if (AllocatedType != OrigGEPType) {
186+
NewGEPType = AllocatedType;
187+
NeedsTransform = true;
188+
}
185189
}
186-
if (!NewGlobal)
190+
191+
// Note: We bail if this isn't a gep touched via alloca or global
192+
// transformations
193+
if (!NeedsTransform)
187194
return false;
188195

189196
IRBuilder<> Builder(&GEPI);
190197
SmallVector<Value *, MaxVecSize> Indices(GEPI.indices());
191198

192-
Value *NewGEP =
193-
Builder.CreateGEP(NewGlobal->getValueType(), NewGlobal, Indices,
194-
GEPI.getName(), GEPI.getNoWrapFlags());
199+
Value *NewGEP = Builder.CreateGEP(NewGEPType, PtrOperand, Indices,
200+
GEPI.getName(), GEPI.getNoWrapFlags());
195201
GEPI.replaceAllUsesWith(NewGEP);
196202
GEPI.eraseFromParent();
197203
return true;
Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,25 @@
1-
; RUN: opt -S -passes='dxil-data-scalarization' -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s --check-prefix=SCHECK
2-
; RUN: opt -S -passes='dxil-data-scalarization,dxil-flatten-arrays' -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s --check-prefix=FCHECK
1+
; RUN: opt -S -passes='dxil-data-scalarization' -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s --check-prefixes=SCHECK,CHECK
2+
; RUN: opt -S -passes='dxil-data-scalarization,dxil-flatten-arrays' -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s --check-prefixes=FCHECK,CHECK
33

44
; CHECK-LABEL: alloca_2d__vec_test
55
define void @alloca_2d__vec_test() local_unnamed_addr #2 {
66
; SCHECK: alloca [2 x [4 x i32]], align 16
77
; FCHECK: alloca [8 x i32], align 16
8+
; CHECK: ret void
89
%1 = alloca [2 x <4 x i32>], align 16
910
ret void
1011
}
12+
13+
; CHECK-LABEL: alloca_2d_gep_test
14+
define void @alloca_2d_gep_test() {
15+
; SCHECK: [[alloca_val:%.*]] = alloca [2 x [2 x i32]], align 16
16+
; FCHECK: [[alloca_val:%.*]] = alloca [4 x i32], align 16
17+
; CHECK: [[tid:%.*]] = tail call i32 @llvm.dx.thread.id(i32 0)
18+
; SCHECK: [[gep:%.*]] = getelementptr inbounds nuw [2 x [2 x i32]], ptr [[alloca_val]], i32 0, i32 [[tid]]
19+
; FCHECK: [[gep:%.*]] = getelementptr inbounds nuw [4 x i32], ptr [[alloca_val]], i32 0, i32 [[tid]]
20+
; CHECK: ret void
21+
%1 = alloca [2 x <2 x i32>], align 16
22+
%2 = tail call i32 @llvm.dx.thread.id(i32 0)
23+
%3 = getelementptr inbounds nuw [2 x <2 x i32>], ptr %1, i32 0, i32 %2
24+
ret void
25+
}

0 commit comments

Comments
 (0)