diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index 179e29e40614e..689355a168c2a 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -5157,8 +5157,9 @@ InstructionCost X86TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, InstructionCost Cost = 0; - // Add a cost for constant load to vector. - if (Opcode == Instruction::Store && OpInfo.isConstant()) + // Add a cost for constant load to vector, if pointer is not a constant. + if (auto *SI = dyn_cast_or_null(I); + SI && !isa(SI->getPointerOperand()) && OpInfo.isConstant()) Cost += getMemoryOpCost(Instruction::Load, Src, DL.getABITypeAlign(Src), /*AddressSpace=*/0, CostKind); diff --git a/llvm/test/Transforms/LoopUnroll/unroll-cleanup.ll b/llvm/test/Transforms/LoopUnroll/unroll-cleanup.ll index da1808fc278c0..75829cb71814f 100644 --- a/llvm/test/Transforms/LoopUnroll/unroll-cleanup.ll +++ b/llvm/test/Transforms/LoopUnroll/unroll-cleanup.ll @@ -57,9 +57,7 @@ define void @_Z3fn1v(ptr %r, ptr %a) #0 { ; CHECK-NEXT: [[CMP_PROL:%.*]] = icmp eq i8 [[TMP3_PROL]], 0 ; CHECK-NEXT: br i1 [[CMP_PROL]], label %[[IF_THEN_PROL:.*]], label %[[FOR_INC_PROL:.*]] ; CHECK: [[IF_THEN_PROL]]: -; CHECK-NEXT: [[ARRAYIDX_PROL:%.*]] = getelementptr inbounds i8, ptr [[R_022]], i64 2 -; CHECK-NEXT: store i16 0, ptr [[ARRAYIDX_PROL]], align 2 -; CHECK-NEXT: store i16 0, ptr [[R_022]], align 2 +; CHECK-NEXT: store <2 x i16> zeroinitializer, ptr [[R_022]], align 2 ; CHECK-NEXT: [[ARRAYIDX5_PROL:%.*]] = getelementptr inbounds i8, ptr [[R_022]], i64 4 ; CHECK-NEXT: store i16 0, ptr [[ARRAYIDX5_PROL]], align 2 ; CHECK-NEXT: br label %[[FOR_INC_PROL]] @@ -82,9 +80,7 @@ define void @_Z3fn1v(ptr %r, ptr %a) #0 { ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[TMP3]], 0 ; CHECK-NEXT: br i1 [[CMP]], label %[[IF_THEN:.*]], label %[[FOR_INC:.*]] ; CHECK: [[IF_THEN]]: -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[R_117]], i64 2 -; CHECK-NEXT: store i16 0, ptr [[ARRAYIDX]], align 2 -; CHECK-NEXT: store i16 0, ptr [[R_117]], align 2 +; CHECK-NEXT: store <2 x i16> zeroinitializer, ptr [[R_117]], align 2 ; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i8, ptr [[R_117]], i64 4 ; CHECK-NEXT: store i16 0, ptr [[ARRAYIDX5]], align 2 ; CHECK-NEXT: br label %[[FOR_INC]] @@ -96,9 +92,7 @@ define void @_Z3fn1v(ptr %r, ptr %a) #0 { ; CHECK-NEXT: br i1 [[CMP_1]], label %[[IF_THEN_1:.*]], label %[[FOR_INC_1]] ; CHECK: [[IF_THEN_1]]: ; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr [[R_117]], i64 6 -; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i8, ptr [[R_117]], i64 8 -; CHECK-NEXT: store i16 0, ptr [[ARRAYIDX_1]], align 2 -; CHECK-NEXT: store i16 0, ptr [[ADD_PTR]], align 2 +; CHECK-NEXT: store <2 x i16> zeroinitializer, ptr [[ADD_PTR]], align 2 ; CHECK-NEXT: [[ARRAYIDX5_1:%.*]] = getelementptr inbounds i8, ptr [[R_117]], i64 10 ; CHECK-NEXT: store i16 0, ptr [[ARRAYIDX5_1]], align 2 ; CHECK-NEXT: br label %[[FOR_INC_1]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/buildvector_store_constant.ll b/llvm/test/Transforms/SLPVectorizer/X86/buildvector_store_constant.ll new file mode 100644 index 0000000000000..1d9e4d20d20af --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/X86/buildvector_store_constant.ll @@ -0,0 +1,20 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -mtriple=x86_64-unknown-linux-gnu --passes=slp-vectorizer -S -o - %s | FileCheck %s + +@arr = global [20 x i64] zeroinitializer, align 16 + +define void @store_from_constant_ptr() { +; CHECK-LABEL: define void @store_from_constant_ptr() { +; CHECK-NEXT: store <2 x i64> splat (i64 1), ptr @arr, align 16 +; CHECK-NEXT: store <2 x i64> splat (i64 1), ptr getelementptr inbounds (i8, ptr @arr, i64 16), align 16 +; CHECK-NEXT: store <2 x i64> splat (i64 1), ptr getelementptr inbounds (i8, ptr @arr, i64 32), align 16 +; CHECK-NEXT: ret void +; + store i64 1, ptr @arr, align 16 + store i64 1, ptr getelementptr inbounds (i8, ptr @arr, i64 8), align 8 + store i64 1, ptr getelementptr inbounds (i8, ptr @arr, i64 16), align 16 + store i64 1, ptr getelementptr inbounds (i8, ptr @arr, i64 24), align 8 + store i64 1, ptr getelementptr inbounds (i8, ptr @arr, i64 32), align 16 + store i64 1, ptr getelementptr inbounds (i8, ptr @arr, i64 40), align 8 + ret void +}