llvm · sgundapa · Feb 25, 2025 · arsenm · Feb 26, 2025 · sgundapa
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
@@ -759,6 +759,14 @@ bool AMDGPUPromoteAllocaImpl::tryPromoteAllocaToVector(AllocaInst &Alloca) {
     return false;
   }
 
+  Type *VecEltTy = VectorTy->getElementType();
+  constexpr unsigned SIZE_OF_BYTE = 8;
+  unsigned ElementSizeInBits = DL->getTypeSizeInBits(VecEltTy);
+  // FIXME: The non-byte type like i1 can be packed and be supported, but
+  // currently we do not handle them.
+  if (ElementSizeInBits % SIZE_OF_BYTE != 0)
+    return false;
+
   std::map<GetElementPtrInst *, WeakTrackingVH> GEPVectorIdx;
   SmallVector<Instruction *> WorkList;
   SmallVector<Instruction *> UsersToRemove;
@@ -776,8 +784,7 @@ bool AMDGPUPromoteAllocaImpl::tryPromoteAllocaToVector(AllocaInst &Alloca) {
 
   LLVM_DEBUG(dbgs() << "  Attempting promotion to: " << *VectorTy << "\n");
 
-  Type *VecEltTy = VectorTy->getElementType();
-  unsigned ElementSize = DL->getTypeSizeInBits(VecEltTy) / 8;
+  unsigned ElementSize = ElementSizeInBits / SIZE_OF_BYTE;
   for (auto *U : Uses) {
     Instruction *Inst = cast<Instruction>(U->getUser());
 

diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-skip-non-byte-type.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-skip-non-byte-type.ll
@@ -0,0 +1,21 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -passes=amdgpu-promote-alloca < %s | FileCheck %s
+
+; Verify that we do not crash and not promote non-byte alloca types.
+define <8 x i1> @non_byte_alloca_type() {
+; CHECK-LABEL: define <8 x i1> @non_byte_alloca_type() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[C:%.*]] = icmp ugt <16 x i1> zeroinitializer, zeroinitializer
+; CHECK-NEXT:    [[RP:%.*]] = alloca <8 x i1>, align 1
+; CHECK-NEXT:    [[TMP0:%.*]] = load <8 x i1>, ptr [[RP]], align 1
+; CHECK-NEXT:    store <16 x i1> [[C]], ptr [[RP]], align 2
+; CHECK-NEXT:    ret <8 x i1> [[TMP0]]
+;
+entry:
+  %C = icmp ugt <16 x i1> zeroinitializer, zeroinitializer
+  %RP = alloca <8 x i1>, align 1
+  %0 = load <8 x i1>, ptr %RP, align 1
+  store <16 x i1> %C, ptr %RP, align 2
+  ret <8 x i1> %0
+}
+