Skip to content

Commit a6c9bf0

Browse files
[SLPVectorizer][X86] Free load cost for stores with constant pointers
When estimating the cost for stores of constant buildvectors, do not take into account the cost of the additional load to materialize a vector from a constant pool when dealing with a constant pointer. In such cases, the load is avoided in the first place, as the only operations required simply involve computing the address of the constant (`rip+base_addr+offset`) and the store itself. Fixes regression: #111126.
1 parent 3a01b46 commit a6c9bf0

File tree

3 files changed

+26
-11
lines changed

3 files changed

+26
-11
lines changed

llvm/lib/Target/X86/X86TargetTransformInfo.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5157,8 +5157,9 @@ InstructionCost X86TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
51575157

51585158
InstructionCost Cost = 0;
51595159

5160-
// Add a cost for constant load to vector.
5161-
if (Opcode == Instruction::Store && OpInfo.isConstant())
5160+
// Add a cost for constant load to vector, if pointer is not a constant.
5161+
if (auto *SI = dyn_cast_or_null<StoreInst>(I);
5162+
SI && !isa<Constant>(SI->getPointerOperand()) && OpInfo.isConstant())
51625163
Cost += getMemoryOpCost(Instruction::Load, Src, DL.getABITypeAlign(Src),
51635164
/*AddressSpace=*/0, CostKind);
51645165

llvm/test/Transforms/LoopUnroll/unroll-cleanup.ll

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -57,9 +57,7 @@ define void @_Z3fn1v(ptr %r, ptr %a) #0 {
5757
; CHECK-NEXT: [[CMP_PROL:%.*]] = icmp eq i8 [[TMP3_PROL]], 0
5858
; CHECK-NEXT: br i1 [[CMP_PROL]], label %[[IF_THEN_PROL:.*]], label %[[FOR_INC_PROL:.*]]
5959
; CHECK: [[IF_THEN_PROL]]:
60-
; CHECK-NEXT: [[ARRAYIDX_PROL:%.*]] = getelementptr inbounds i8, ptr [[R_022]], i64 2
61-
; CHECK-NEXT: store i16 0, ptr [[ARRAYIDX_PROL]], align 2
62-
; CHECK-NEXT: store i16 0, ptr [[R_022]], align 2
60+
; CHECK-NEXT: store <2 x i16> zeroinitializer, ptr [[R_022]], align 2
6361
; CHECK-NEXT: [[ARRAYIDX5_PROL:%.*]] = getelementptr inbounds i8, ptr [[R_022]], i64 4
6462
; CHECK-NEXT: store i16 0, ptr [[ARRAYIDX5_PROL]], align 2
6563
; CHECK-NEXT: br label %[[FOR_INC_PROL]]
@@ -82,9 +80,7 @@ define void @_Z3fn1v(ptr %r, ptr %a) #0 {
8280
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[TMP3]], 0
8381
; CHECK-NEXT: br i1 [[CMP]], label %[[IF_THEN:.*]], label %[[FOR_INC:.*]]
8482
; CHECK: [[IF_THEN]]:
85-
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[R_117]], i64 2
86-
; CHECK-NEXT: store i16 0, ptr [[ARRAYIDX]], align 2
87-
; CHECK-NEXT: store i16 0, ptr [[R_117]], align 2
83+
; CHECK-NEXT: store <2 x i16> zeroinitializer, ptr [[R_117]], align 2
8884
; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i8, ptr [[R_117]], i64 4
8985
; CHECK-NEXT: store i16 0, ptr [[ARRAYIDX5]], align 2
9086
; CHECK-NEXT: br label %[[FOR_INC]]
@@ -96,9 +92,7 @@ define void @_Z3fn1v(ptr %r, ptr %a) #0 {
9692
; CHECK-NEXT: br i1 [[CMP_1]], label %[[IF_THEN_1:.*]], label %[[FOR_INC_1]]
9793
; CHECK: [[IF_THEN_1]]:
9894
; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr [[R_117]], i64 6
99-
; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i8, ptr [[R_117]], i64 8
100-
; CHECK-NEXT: store i16 0, ptr [[ARRAYIDX_1]], align 2
101-
; CHECK-NEXT: store i16 0, ptr [[ADD_PTR]], align 2
95+
; CHECK-NEXT: store <2 x i16> zeroinitializer, ptr [[ADD_PTR]], align 2
10296
; CHECK-NEXT: [[ARRAYIDX5_1:%.*]] = getelementptr inbounds i8, ptr [[R_117]], i64 10
10397
; CHECK-NEXT: store i16 0, ptr [[ARRAYIDX5_1]], align 2
10498
; CHECK-NEXT: br label %[[FOR_INC_1]]
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -mtriple=x86_64-unknown-linux-gnu --passes=slp-vectorizer -S -o - %s | FileCheck %s
3+
4+
@arr = global [20 x i64] zeroinitializer, align 16
5+
6+
define void @store_from_constant_ptr() {
7+
; CHECK-LABEL: define void @store_from_constant_ptr() {
8+
; CHECK-NEXT: store <2 x i64> splat (i64 1), ptr @arr, align 16
9+
; CHECK-NEXT: store <2 x i64> splat (i64 1), ptr getelementptr inbounds (i8, ptr @arr, i64 16), align 16
10+
; CHECK-NEXT: store <2 x i64> splat (i64 1), ptr getelementptr inbounds (i8, ptr @arr, i64 32), align 16
11+
; CHECK-NEXT: ret void
12+
;
13+
store i64 1, ptr @arr, align 16
14+
store i64 1, ptr getelementptr inbounds (i8, ptr @arr, i64 8), align 8
15+
store i64 1, ptr getelementptr inbounds (i8, ptr @arr, i64 16), align 16
16+
store i64 1, ptr getelementptr inbounds (i8, ptr @arr, i64 24), align 8
17+
store i64 1, ptr getelementptr inbounds (i8, ptr @arr, i64 32), align 16
18+
store i64 1, ptr getelementptr inbounds (i8, ptr @arr, i64 40), align 8
19+
ret void
20+
}

0 commit comments

Comments
 (0)