From 6858552f3d2e7cb72caea7c7877963dcaeeeba4b Mon Sep 17 00:00:00 2001 From: Chinmay Deshpande Date: Thu, 16 Jan 2025 04:21:26 -0500 Subject: [PATCH] [AMDGPU] Fix `AMDGPUPromoteAlloca` handling certain loads incorrectly `AMDGPUPromoteAlloca` was incorrectly handling loads from the alloca which were used as a gep index into the same alloca. Change-Id: I91059749dc80a960555b44f67043233e4102d271 --- llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp | 11 +++++++++++ .../AMDGPU/promote-alloca-update-vector-idx.ll | 16 ++++++++++++++++ 2 files changed, 27 insertions(+) create mode 100644 llvm/test/CodeGen/AMDGPU/promote-alloca-update-vector-idx.ll diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp index e27ef71c1c088..d8dcdc6afd18c 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp @@ -397,6 +397,14 @@ calculateVectorIndex(Value *Ptr, return I->second; } +static void updateVectorIndex(Value *OldIdx, Value *NewIdx, + std::map &GEPIdx) { + for (auto &[GEP, Idx] : GEPIdx) { + if (Idx == OldIdx) + GEPIdx[GEP] = NewIdx; + } +} + static Value *GEPToVectorIndex(GetElementPtrInst *GEP, AllocaInst *Alloca, Type *VecElemTy, const DataLayout &DL) { // TODO: Extracting a "multiple of X" from a GEP might be a useful generic @@ -544,6 +552,9 @@ static Value *promoteAllocaUserToVector( ExtractElement = Builder.CreateBitOrPointerCast(ExtractElement, AccessTy); Inst->replaceAllUsesWith(ExtractElement); + // If the loaded value is used as an index into a GEP, update all its uses + // in the GEPVectorIdx map. + updateVectorIndex(Inst, ExtractElement, GEPVectorIdx); return nullptr; } case Instruction::Store: { diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-update-vector-idx.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-update-vector-idx.ll new file mode 100644 index 0000000000000..4fef7d1941381 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-update-vector-idx.ll @@ -0,0 +1,16 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-promote-alloca < %s | FileCheck %s + +define void @vector_alloca_with_loaded_value_as_index(<2 x i64> %arg) { +; CHECK-LABEL: define void @vector_alloca_with_loaded_value_as_index( +; CHECK-SAME: <2 x i64> [[ARG:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i64> [[ARG]], i64 0 +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i64> [[ARG]], i64 1 +; CHECK-NEXT: ret void +; + %alloca = alloca <2 x i64>, align 16 + %idx = load i64, ptr %alloca, align 4 + %gep = getelementptr <1 x double>, ptr %alloca, i64 %idx + store <2 x i64> %arg, ptr %gep, align 16 + ret void +}