Skip to content

Commit 244b01e

Browse files
committed
[DirectX] add GEP i8 legalization
The i8 legalization code in DXILLegalizePass's `fixI8UseChain` needs to be updated to check for i8 geps. It seems like there are i8 GEPs being left around after we remove all the other i8 instructions and this is causing problem on validation. Since this is cleaning up a missed GEP The approach is to assume the getPointerOperand is to an alloca we further will check if this is an array alloca then do some byte offset arithmetic to figure out the memory index to use. Finally we will emit the new gep and cleanup the old one. Finally needed to update upcastI8AllocasAndUses to account for loads off of GEPs instead of just loads from the alloca.
1 parent 11713e8 commit 244b01e

File tree

2 files changed

+104
-8
lines changed

2 files changed

+104
-8
lines changed

llvm/lib/Target/DirectX/DXILLegalizePass.cpp

Lines changed: 50 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,8 @@ static void fixI8UseChain(Instruction &I,
9595
Type *ElementType = NewOperands[0]->getType();
9696
if (auto *AI = dyn_cast<AllocaInst>(NewOperands[0]))
9797
ElementType = AI->getAllocatedType();
98+
if (auto *GEP = dyn_cast<GetElementPtrInst>(NewOperands[0]))
99+
ElementType = GEP->getSourceElementType();
98100
LoadInst *NewLoad = Builder.CreateLoad(ElementType, NewOperands[0]);
99101
ReplacedValues[Load] = NewLoad;
100102
ToRemove.push_back(Load);
@@ -164,6 +166,36 @@ static void fixI8UseChain(Instruction &I,
164166
if (AdjustedCast)
165167
Cast->replaceAllUsesWith(AdjustedCast);
166168
}
169+
if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
170+
if (!GEP->getType()->isPointerTy() ||
171+
!GEP->getSourceElementType()->isIntegerTy(8))
172+
return;
173+
174+
Value *BasePtr = GEP->getPointerOperand();
175+
if (ReplacedValues.count(BasePtr))
176+
BasePtr = ReplacedValues[BasePtr];
177+
178+
Type *ElementType = BasePtr->getType();
179+
if (auto *AI = dyn_cast<AllocaInst>(BasePtr))
180+
ElementType = AI->getAllocatedType();
181+
if (auto *ArrTy = dyn_cast<ArrayType>(ElementType))
182+
ElementType = ArrTy->getArrayElementType();
183+
184+
ConstantInt *Offset = dyn_cast<ConstantInt>(GEP->getOperand(1));
185+
// Note: the only way to convert an i8 offset to an i32 offset without
186+
// emitting code Would be to emit code. We sould expect this value to be a
187+
// ConstantInt since Offsets are very regulalrly converted.
188+
assert(Offset && "Offset is expected to be a ConstantInt");
189+
uint32_t ByteOffset = Offset->getZExtValue();
190+
uint32_t ElemSize = GEP->getDataLayout().getTypeAllocSize(ElementType);
191+
assert(ElemSize > 0 && "ElementSize must be set");
192+
uint32_t Index = ByteOffset / ElemSize;
193+
Value *NewGEP =
194+
Builder.CreateGEP(ElementType, BasePtr, Builder.getInt32(Index),
195+
GEP->getName(), GEP->getNoWrapFlags());
196+
ReplacedValues[GEP] = NewGEP;
197+
ToRemove.push_back(GEP);
198+
}
167199
}
168200

169201
static void upcastI8AllocasAndUses(Instruction &I,
@@ -175,25 +207,35 @@ static void upcastI8AllocasAndUses(Instruction &I,
175207

176208
Type *SmallestType = nullptr;
177209

178-
for (User *U : AI->users()) {
179-
auto *Load = dyn_cast<LoadInst>(U);
180-
if (!Load)
181-
continue;
210+
auto ProcessLoad = [&](LoadInst *Load) {
182211
for (User *LU : Load->users()) {
183212
Type *Ty = nullptr;
184-
if (auto *Cast = dyn_cast<CastInst>(LU))
213+
if (auto *Cast = dyn_cast<CastInst>(LU)) {
185214
Ty = Cast->getType();
186-
if (CallInst *CI = dyn_cast<CallInst>(LU)) {
215+
} else if (auto *CI = dyn_cast<CallInst>(LU)) {
187216
if (CI->getIntrinsicID() == Intrinsic::memset)
188217
Ty = Type::getInt32Ty(CI->getContext());
189218
}
190219

191220
if (!Ty)
192221
continue;
193222

194-
if (!SmallestType ||
195-
Ty->getPrimitiveSizeInBits() < SmallestType->getPrimitiveSizeInBits())
223+
if (!SmallestType || Ty->getPrimitiveSizeInBits() <
224+
SmallestType->getPrimitiveSizeInBits()) {
196225
SmallestType = Ty;
226+
}
227+
}
228+
};
229+
230+
for (User *U : AI->users()) {
231+
if (auto *Load = dyn_cast<LoadInst>(U))
232+
ProcessLoad(Load);
233+
else if (auto *GEP = dyn_cast<GetElementPtrInst>(U)) {
234+
for (User *GU : GEP->users()) {
235+
if (auto *Load = dyn_cast<LoadInst>(GU)) {
236+
ProcessLoad(Load);
237+
}
238+
}
197239
}
198240
}
199241

llvm/test/CodeGen/DirectX/legalize-i8.ll

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,3 +106,57 @@ define i32 @all_imm() {
106106
%2 = sext i8 %1 to i32
107107
ret i32 %2
108108
}
109+
110+
define i32 @scalar_i8_geps() {
111+
; CHECK-LABEL: define i32 @scalar_i8_geps(
112+
; CHECK-NEXT: [[ALLOCA:%.*]] = alloca i32, align 4
113+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds nuw i32, ptr [[ALLOCA]], i32 0
114+
; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[GEP]], align 4
115+
; CHECK-NEXT: ret i32 [[LOAD]]
116+
%1 = alloca i8, align 4
117+
%2 = getelementptr inbounds nuw i8, ptr %1, i32 0
118+
%3 = load i8, ptr %2
119+
%4 = sext i8 %3 to i32
120+
ret i32 %4
121+
}
122+
123+
define i32 @i8_geps_index0() {
124+
; CHECK-LABEL: define i32 @i8_geps_index0(
125+
; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [2 x i32], align 8
126+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds nuw i32, ptr [[ALLOCA]], i32 0
127+
; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[GEP]], align 4
128+
; CHECK-NEXT: ret i32 [[LOAD]]
129+
%1 = alloca [2 x i32], align 8
130+
%2 = getelementptr inbounds nuw i8, ptr %1, i32 0
131+
%3 = load i8, ptr %2
132+
%4 = sext i8 %3 to i32
133+
ret i32 %4
134+
}
135+
136+
define i32 @i8_geps_index1() {
137+
; CHECK-LABEL: define i32 @i8_geps_index1(
138+
; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [2 x i32], align 8
139+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds nuw i32, ptr [[ALLOCA]], i32 1
140+
; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[GEP]]
141+
; CHECK-NEXT: ret i32 [[LOAD]]
142+
%1 = alloca [2 x i32], align 8
143+
%2 = getelementptr inbounds nuw i8, ptr %1, i32 4
144+
%3 = load i8, ptr %2
145+
%4 = sext i8 %3 to i32
146+
ret i32 %4
147+
}
148+
149+
define i32 @i8_gep_store() {
150+
; CHECK-LABEL: define i32 @i8_gep_store(
151+
; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [2 x i32], align 8
152+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds nuw i32, ptr [[ALLOCA]], i32 1
153+
; CHECK-NEXT: store i32 1, ptr [[GEP]], align 4
154+
; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[GEP]]
155+
; CHECK-NEXT: ret i32 [[LOAD]]
156+
%1 = alloca [2 x i32], align 8
157+
%2 = getelementptr inbounds nuw i8, ptr %1, i32 4
158+
store i8 1, ptr %2
159+
%3 = load i8, ptr %2
160+
%4 = sext i8 %3 to i32
161+
ret i32 %4
162+
}

0 commit comments

Comments
 (0)