Skip to content

Commit 70017d4

Browse files
committed
[SOL] Fine tune memory operations threshold (#134)
1 parent 4176755 commit 70017d4

File tree

2 files changed

+16
-12
lines changed

2 files changed

+16
-12
lines changed

llvm/lib/Target/SBF/SBFISelLowering.cpp

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -170,15 +170,20 @@ SBFTargetLowering::SBFTargetLowering(const TargetMachine &TM,
170170
MaxStoresPerMemmove = MaxStoresPerMemmoveOptSize = 0;
171171
MaxLoadsPerMemcmp = 0;
172172
} else {
173-
auto SelectionDAGInfo = STI.getSelectionDAGInfo();
174-
// inline memcpy() for kernel to see explicit copy
175-
unsigned CommonMaxStores =
176-
SelectionDAGInfo->getCommonMaxStoresPerMemFunc();
177-
178-
MaxStoresPerMemset = MaxStoresPerMemsetOptSize = CommonMaxStores;
179-
MaxStoresPerMemcpy = MaxStoresPerMemcpyOptSize = CommonMaxStores;
180-
MaxStoresPerMemmove = MaxStoresPerMemmoveOptSize = CommonMaxStores;
181-
MaxLoadsPerMemcmp = MaxLoadsPerMemcmpOptSize = CommonMaxStores;
173+
// A syscall consumes at least 10 CUs, so we should only invoke it when
174+
// the number of instructions is at least 10.
175+
176+
// Memset translates to stdw or stdxw, so the maximum should be 10.
177+
MaxStoresPerMemset = MaxStoresPerMemsetOptSize = 10;
178+
// Each store in memcpy follows a load, so the maximum is 5.
179+
MaxStoresPerMemcpy = MaxStoresPerMemcpyOptSize = 5;
180+
// Each store in memmove follows a load, so the maximum is 5.
181+
MaxStoresPerMemmove = MaxStoresPerMemmoveOptSize = 5;
182+
// Memcmp expands to three instructions for each load:
183+
// 1. One load for each pointer being compared.
184+
// 2. One jne for each load.
185+
// The limit here should be three, since 3*3 = 9;
186+
MaxLoadsPerMemcmp = MaxLoadsPerMemcmpOptSize = 3;
182187
}
183188

184189
// CPU/Feature control

llvm/test/CodeGen/SBF/i128.ll

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,10 +32,9 @@ entry:
3232
}
3333

3434
; CHECK-LABEL: test
35-
; CHECK: mov64 r7, r10
36-
; CHECK: add64 r7, -32
37-
; CHECK: mov64 r1, r7
3835
; CHECK: stxw [r10 - 32], r{{[0-9]+}}
36+
; CHECK: mov64 r1, r10
37+
; CHECK: add64 r1, -32
3938

4039
; Function Attrs: argmemonly nounwind willreturn
4140
declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #1

0 commit comments

Comments
 (0)