Skip to content

Commit c1439a3

Browse files
committed
[AMDGPU] Enable i8 GEP promotion for vector allocas
1 parent 5ad2487 commit c1439a3

File tree

2 files changed

+33
-2
lines changed

2 files changed

+33
-2
lines changed

llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -456,10 +456,21 @@ static Value *GEPToVectorIndex(GetElementPtrInst *GEP, AllocaInst *Alloca,
456456
const auto &VarOffset = VarOffsets.front();
457457
APInt OffsetQuot;
458458
APInt::sdivrem(VarOffset.second, VecElemSize, OffsetQuot, Rem);
459-
if (Rem != 0 || OffsetQuot.isZero())
460-
return nullptr;
459+
460+
Value *Scaled = nullptr;
461+
if (Rem != 0 || OffsetQuot.isZero()) {
462+
unsigned ElemSizeShift = Log2_64(VecElemSize);
463+
Scaled = Builder.CreateLShr(VarOffset.first, ElemSizeShift);
464+
if (Instruction *NewInst = dyn_cast<Instruction>(Scaled))
465+
NewInsts.push_back(NewInst);
466+
OffsetQuot = APInt(BW, 1);
467+
Rem = 0;
468+
}
461469

462470
Value *Offset = VarOffset.first;
471+
if (Scaled)
472+
Offset = Scaled;
473+
463474
auto *OffsetType = dyn_cast<IntegerType>(Offset->getType());
464475
if (!OffsetType)
465476
return nullptr;

llvm/test/CodeGen/AMDGPU/promote-alloca-vector-gep.ll

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -250,6 +250,26 @@ bb2:
250250
store i32 0, ptr addrspace(5) %extractelement
251251
ret void
252252
}
253+
254+
define amdgpu_kernel void @scalar_alloca_vector_gep_i8(ptr %buffer, float %data, i32 %index) {
255+
; CHECK-LABEL: define amdgpu_kernel void @scalar_alloca_vector_gep_i8(
256+
; CHECK-SAME: ptr [[BUFFER:%.*]], float [[DATA:%.*]], i32 [[INDEX:%.*]]) {
257+
; CHECK-NEXT: [[ALLOCA:%.*]] = freeze <3 x float> poison
258+
; CHECK-NEXT: [[VEC:%.*]] = load <3 x float>, ptr [[BUFFER]], align 16
259+
; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[INDEX]], 2
260+
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <3 x float> [[VEC]], float [[DATA]], i32 [[TMP1]]
261+
; CHECK-NEXT: store <3 x float> [[TMP2]], ptr [[BUFFER]], align 16
262+
; CHECK-NEXT: ret void
263+
;
264+
%alloca = alloca <3 x float>, align 16, addrspace(5)
265+
%vec = load <3 x float>, ptr %buffer
266+
store <3 x float> %vec, ptr addrspace(5) %alloca
267+
%elt = getelementptr inbounds nuw i8, ptr addrspace(5) %alloca, i32 %index
268+
store float %data, ptr addrspace(5) %elt, align 4
269+
%updated = load <3 x float>, ptr addrspace(5) %alloca, align 16
270+
store <3 x float> %updated, ptr %buffer, align 16
271+
ret void
272+
}
253273
;.
254274
; CHECK: [[META0]] = !{}
255275
; CHECK: [[RNG1]] = !{i32 0, i32 1025}

0 commit comments

Comments
 (0)