diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp index 28016b5936ccf..007f930cea4f3 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp @@ -759,6 +759,14 @@ bool AMDGPUPromoteAllocaImpl::tryPromoteAllocaToVector(AllocaInst &Alloca) { return false; } + Type *VecEltTy = VectorTy->getElementType(); + constexpr unsigned SIZE_OF_BYTE = 8; + unsigned ElementSizeInBits = DL->getTypeSizeInBits(VecEltTy); + // FIXME: The non-byte type like i1 can be packed and be supported, but + // currently we do not handle them. + if (ElementSizeInBits % SIZE_OF_BYTE != 0) + return false; + std::map GEPVectorIdx; SmallVector WorkList; SmallVector UsersToRemove; @@ -776,8 +784,7 @@ bool AMDGPUPromoteAllocaImpl::tryPromoteAllocaToVector(AllocaInst &Alloca) { LLVM_DEBUG(dbgs() << " Attempting promotion to: " << *VectorTy << "\n"); - Type *VecEltTy = VectorTy->getElementType(); - unsigned ElementSize = DL->getTypeSizeInBits(VecEltTy) / 8; + unsigned ElementSize = ElementSizeInBits / SIZE_OF_BYTE; for (auto *U : Uses) { Instruction *Inst = cast(U->getUser()); diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-skip-non-byte-type.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-skip-non-byte-type.ll new file mode 100644 index 0000000000000..3d2234f0a7ac3 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-skip-non-byte-type.ll @@ -0,0 +1,21 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -passes=amdgpu-promote-alloca < %s | FileCheck %s + +; Verify that we do not crash and not promote non-byte alloca types. +define <8 x i1> @non_byte_alloca_type() { +; CHECK-LABEL: define <8 x i1> @non_byte_alloca_type() { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[C:%.*]] = icmp ugt <16 x i1> zeroinitializer, zeroinitializer +; CHECK-NEXT: [[RP:%.*]] = alloca <8 x i1>, align 1 +; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i1>, ptr [[RP]], align 1 +; CHECK-NEXT: store <16 x i1> [[C]], ptr [[RP]], align 2 +; CHECK-NEXT: ret <8 x i1> [[TMP0]] +; +entry: + %C = icmp ugt <16 x i1> zeroinitializer, zeroinitializer + %RP = alloca <8 x i1>, align 1 + %0 = load <8 x i1>, ptr %RP, align 1 + store <16 x i1> %C, ptr %RP, align 2 + ret <8 x i1> %0 +} +