diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp index e27ef71c1c088..d8dcdc6afd18c 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp @@ -397,6 +397,14 @@ calculateVectorIndex(Value *Ptr, return I->second; } +static void updateVectorIndex(Value *OldIdx, Value *NewIdx, + std::map &GEPIdx) { + for (auto &[GEP, Idx] : GEPIdx) { + if (Idx == OldIdx) + GEPIdx[GEP] = NewIdx; + } +} + static Value *GEPToVectorIndex(GetElementPtrInst *GEP, AllocaInst *Alloca, Type *VecElemTy, const DataLayout &DL) { // TODO: Extracting a "multiple of X" from a GEP might be a useful generic @@ -544,6 +552,9 @@ static Value *promoteAllocaUserToVector( ExtractElement = Builder.CreateBitOrPointerCast(ExtractElement, AccessTy); Inst->replaceAllUsesWith(ExtractElement); + // If the loaded value is used as an index into a GEP, update all its uses + // in the GEPVectorIdx map. + updateVectorIndex(Inst, ExtractElement, GEPVectorIdx); return nullptr; } case Instruction::Store: { diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-update-vector-idx.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-update-vector-idx.ll new file mode 100644 index 0000000000000..4fef7d1941381 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-update-vector-idx.ll @@ -0,0 +1,16 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-promote-alloca < %s | FileCheck %s + +define void @vector_alloca_with_loaded_value_as_index(<2 x i64> %arg) { +; CHECK-LABEL: define void @vector_alloca_with_loaded_value_as_index( +; CHECK-SAME: <2 x i64> [[ARG:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i64> [[ARG]], i64 0 +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i64> [[ARG]], i64 1 +; CHECK-NEXT: ret void +; + %alloca = alloca <2 x i64>, align 16 + %idx = load i64, ptr %alloca, align 4 + %gep = getelementptr <1 x double>, ptr %alloca, i64 %idx + store <2 x i64> %arg, ptr %gep, align 16 + ret void +}