Skip to content

Commit 6858552

Browse files
committed
[AMDGPU] Fix AMDGPUPromoteAlloca handling certain loads incorrectly
`AMDGPUPromoteAlloca` was incorrectly handling loads from the alloca which were used as a gep index into the same alloca. Change-Id: I91059749dc80a960555b44f67043233e4102d271
1 parent 56a37a3 commit 6858552

File tree

2 files changed

+27
-0
lines changed

2 files changed

+27
-0
lines changed

llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -397,6 +397,14 @@ calculateVectorIndex(Value *Ptr,
397397
return I->second;
398398
}
399399

400+
static void updateVectorIndex(Value *OldIdx, Value *NewIdx,
401+
std::map<GetElementPtrInst *, Value *> &GEPIdx) {
402+
for (auto &[GEP, Idx] : GEPIdx) {
403+
if (Idx == OldIdx)
404+
GEPIdx[GEP] = NewIdx;
405+
}
406+
}
407+
400408
static Value *GEPToVectorIndex(GetElementPtrInst *GEP, AllocaInst *Alloca,
401409
Type *VecElemTy, const DataLayout &DL) {
402410
// TODO: Extracting a "multiple of X" from a GEP might be a useful generic
@@ -544,6 +552,9 @@ static Value *promoteAllocaUserToVector(
544552
ExtractElement = Builder.CreateBitOrPointerCast(ExtractElement, AccessTy);
545553

546554
Inst->replaceAllUsesWith(ExtractElement);
555+
// If the loaded value is used as an index into a GEP, update all its uses
556+
// in the GEPVectorIdx map.
557+
updateVectorIndex(Inst, ExtractElement, GEPVectorIdx);
547558
return nullptr;
548559
}
549560
case Instruction::Store: {
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-promote-alloca < %s | FileCheck %s
3+
4+
define void @vector_alloca_with_loaded_value_as_index(<2 x i64> %arg) {
5+
; CHECK-LABEL: define void @vector_alloca_with_loaded_value_as_index(
6+
; CHECK-SAME: <2 x i64> [[ARG:%.*]]) {
7+
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i64> [[ARG]], i64 0
8+
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i64> [[ARG]], i64 1
9+
; CHECK-NEXT: ret void
10+
;
11+
%alloca = alloca <2 x i64>, align 16
12+
%idx = load i64, ptr %alloca, align 4
13+
%gep = getelementptr <1 x double>, ptr %alloca, i64 %idx
14+
store <2 x i64> %arg, ptr %gep, align 16
15+
ret void
16+
}

0 commit comments

Comments
 (0)