File tree Expand file tree Collapse file tree 2 files changed +30
-2
lines changed Expand file tree Collapse file tree 2 files changed +30
-2
lines changed Original file line number Diff line number Diff line change @@ -759,6 +759,14 @@ bool AMDGPUPromoteAllocaImpl::tryPromoteAllocaToVector(AllocaInst &Alloca) {
759759 return false ;
760760 }
761761
762+ Type *VecEltTy = VectorTy->getElementType ();
763+ constexpr unsigned SIZE_OF_BYTE = 8 ;
764+ unsigned ElementSizeInBits = DL->getTypeSizeInBits (VecEltTy);
765+ // FIXME: The non-byte type like i1 can be packed and be supported, but
766+ // currently we do not handle them.
767+ if (ElementSizeInBits % SIZE_OF_BYTE != 0 )
768+ return false ;
769+
762770 std::map<GetElementPtrInst *, WeakTrackingVH> GEPVectorIdx;
763771 SmallVector<Instruction *> WorkList;
764772 SmallVector<Instruction *> UsersToRemove;
@@ -776,8 +784,7 @@ bool AMDGPUPromoteAllocaImpl::tryPromoteAllocaToVector(AllocaInst &Alloca) {
776784
777785 LLVM_DEBUG (dbgs () << " Attempting promotion to: " << *VectorTy << " \n " );
778786
779- Type *VecEltTy = VectorTy->getElementType ();
780- unsigned ElementSize = DL->getTypeSizeInBits (VecEltTy) / 8 ;
787+ unsigned ElementSize = ElementSizeInBits / SIZE_OF_BYTE;
781788 for (auto *U : Uses) {
782789 Instruction *Inst = cast<Instruction>(U->getUser ());
783790
Original file line number Diff line number Diff line change 1+ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+ ; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -passes=amdgpu-promote-alloca < %s | FileCheck %s
3+
4+ ; Verify that we do not crash and not promote non-byte alloca types.
5+ define <8 x i1 > @non_byte_alloca_type () {
6+ ; CHECK-LABEL: define <8 x i1> @non_byte_alloca_type() {
7+ ; CHECK-NEXT: [[ENTRY:.*:]]
8+ ; CHECK-NEXT: [[C:%.*]] = icmp ugt <16 x i1> zeroinitializer, zeroinitializer
9+ ; CHECK-NEXT: [[RP:%.*]] = alloca <8 x i1>, align 1
10+ ; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i1>, ptr [[RP]], align 1
11+ ; CHECK-NEXT: store <16 x i1> [[C]], ptr [[RP]], align 2
12+ ; CHECK-NEXT: ret <8 x i1> [[TMP0]]
13+ ;
14+ entry:
15+ %C = icmp ugt <16 x i1 > zeroinitializer , zeroinitializer
16+ %RP = alloca <8 x i1 >, align 1
17+ %0 = load <8 x i1 >, ptr %RP , align 1
18+ store <16 x i1 > %C , ptr %RP , align 2
19+ ret <8 x i1 > %0
20+ }
21+
You can’t perform that action at this time.
0 commit comments