Skip to content

Commit 0c0b0ea

Browse files
authored
[SPARC] Mark branches as being expensive in early Niagara CPUs (#166489)
Early Niagara processors (T1-T3) lacks any branch predictor, yet they also have a pipeline long enough that the delay slot cannot cover for all of the branch latency. This means that branch instructions will stall the processor for a couple cycles, which makes them an expensive operation. Additionally, the high cost of branching means that it's still profitable to prefer conditional moves even when the conditional is predictable, so let LLVM know about both things. On SPARC T2, a pgbench test seem to show a modest, but pretty consistent speedup (up to around 3%).
1 parent c3b2849 commit 0c0b0ea

File tree

3 files changed

+97
-3
lines changed

3 files changed

+97
-3
lines changed

llvm/lib/Target/Sparc/Sparc.td

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,9 @@ def FeatureSoftFloat : SubtargetFeature<"soft-float", "UseSoftFloat", "true",
9595
def TuneSlowRDPC : SubtargetFeature<"slow-rdpc", "HasSlowRDPC", "true",
9696
"rd %pc, %XX is slow", [FeatureV9]>;
9797

98+
def TuneNoPredictor : SubtargetFeature<"no-predictor", "HasNoPredictor", "true",
99+
"Processor has no branch predictor, branches stall execution", []>;
100+
98101
//==== Features added predmoninantly for LEON subtarget support
99102
include "LeonFeatures.td"
100103

@@ -174,12 +177,15 @@ def : Proc<"ultrasparc3", [FeatureV9, FeatureV8Deprecated, FeatureVIS,
174177
FeatureVIS2],
175178
[TuneSlowRDPC]>;
176179
def : Proc<"niagara", [FeatureV9, FeatureV8Deprecated, FeatureVIS,
177-
FeatureVIS2, FeatureUA2005]>;
180+
FeatureVIS2, FeatureUA2005],
181+
[TuneNoPredictor]>;
178182
def : Proc<"niagara2", [FeatureV9, FeatureV8Deprecated, UsePopc,
179-
FeatureVIS, FeatureVIS2, FeatureUA2005]>;
183+
FeatureVIS, FeatureVIS2, FeatureUA2005],
184+
[TuneNoPredictor]>;
180185
def : Proc<"niagara3", [FeatureV9, FeatureV8Deprecated, UsePopc,
181186
FeatureVIS, FeatureVIS2, FeatureVIS3,
182-
FeatureUA2005, FeatureUA2007]>;
187+
FeatureUA2005, FeatureUA2007],
188+
[TuneNoPredictor]>;
183189
def : Proc<"niagara4", [FeatureV9, FeatureV8Deprecated, UsePopc,
184190
FeatureVIS, FeatureVIS2, FeatureVIS3,
185191
FeatureUA2005, FeatureUA2007, FeatureOSA2011,

llvm/lib/Target/Sparc/SparcISelLowering.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2000,6 +2000,14 @@ SparcTargetLowering::SparcTargetLowering(const TargetMachine &TM,
20002000

20012001
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
20022002

2003+
// Some processors have no branch predictor and have pipelines longer than
2004+
// what can be covered by the delay slot. This results in a stall, so mark
2005+
// branches to be expensive on those processors.
2006+
setJumpIsExpensive(Subtarget->hasNoPredictor());
2007+
// The high cost of branching means that using conditional moves will
2008+
// still be profitable even if the condition is predictable.
2009+
PredictableSelectIsExpensive = !isJumpExpensive();
2010+
20032011
setMinFunctionAlignment(Align(4));
20042012

20052013
computeRegisterProperties(Subtarget->getRegisterInfo());
Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -O3 < %s -relocation-model=pic -mtriple=sparc -mcpu=v9 | FileCheck --check-prefix=SPARC %s
3+
; RUN: llc -O3 < %s -relocation-model=pic -mtriple=sparcv9 | FileCheck --check-prefix=SPARC64 %s
4+
; RUN: llc -O3 < %s -relocation-model=pic -mtriple=sparc -mcpu=v9 -mattr=+no-predictor | FileCheck --check-prefix=SPARC-NO-PREDICTOR %s
5+
; RUN: llc -O3 < %s -relocation-model=pic -mtriple=sparcv9 -mattr=+no-predictor | FileCheck --check-prefix=SPARC64-NO-PREDICTOR %s
6+
7+
;; Normally, highly predictable selects should be turned into branches.
8+
;; On the other hand, early Niagara processors should prefer conditional moves
9+
;; over branches even when it's predictable.
10+
11+
define i32 @cdiv(i32 %cond, i32 %num) #0 {
12+
; SPARC-LABEL: cdiv:
13+
; SPARC: ! %bb.0: ! %entry
14+
; SPARC-NEXT: cmp %o0, 0
15+
; SPARC-NEXT: be %icc, .LBB0_2
16+
; SPARC-NEXT: mov %o1, %o0
17+
; SPARC-NEXT: ! %bb.1: ! %select.end
18+
; SPARC-NEXT: retl
19+
; SPARC-NEXT: nop
20+
; SPARC-NEXT: .LBB0_2: ! %select.true.sink
21+
; SPARC-NEXT: sethi 1398101, %o1
22+
; SPARC-NEXT: or %o1, 342, %o1
23+
; SPARC-NEXT: smul %o0, %o1, %o0
24+
; SPARC-NEXT: rd %y, %o0
25+
; SPARC-NEXT: srl %o0, 31, %o1
26+
; SPARC-NEXT: retl
27+
; SPARC-NEXT: add %o0, %o1, %o0
28+
;
29+
; SPARC64-LABEL: cdiv:
30+
; SPARC64: ! %bb.0: ! %entry
31+
; SPARC64-NEXT: cmp %o0, 0
32+
; SPARC64-NEXT: be %icc, .LBB0_2
33+
; SPARC64-NEXT: mov %o1, %o0
34+
; SPARC64-NEXT: ! %bb.1: ! %select.end
35+
; SPARC64-NEXT: retl
36+
; SPARC64-NEXT: nop
37+
; SPARC64-NEXT: .LBB0_2: ! %select.true.sink
38+
; SPARC64-NEXT: sra %o0, 0, %o0
39+
; SPARC64-NEXT: sethi 1398101, %o1
40+
; SPARC64-NEXT: or %o1, 342, %o1
41+
; SPARC64-NEXT: mulx %o0, %o1, %o0
42+
; SPARC64-NEXT: srlx %o0, 63, %o1
43+
; SPARC64-NEXT: srlx %o0, 32, %o0
44+
; SPARC64-NEXT: retl
45+
; SPARC64-NEXT: add %o0, %o1, %o0
46+
;
47+
; SPARC-NO-PREDICTOR-LABEL: cdiv:
48+
; SPARC-NO-PREDICTOR: ! %bb.0: ! %entry
49+
; SPARC-NO-PREDICTOR-NEXT: sethi 1398101, %o2
50+
; SPARC-NO-PREDICTOR-NEXT: or %o2, 342, %o2
51+
; SPARC-NO-PREDICTOR-NEXT: smul %o1, %o2, %o2
52+
; SPARC-NO-PREDICTOR-NEXT: rd %y, %o2
53+
; SPARC-NO-PREDICTOR-NEXT: srl %o2, 31, %o3
54+
; SPARC-NO-PREDICTOR-NEXT: add %o2, %o3, %o2
55+
; SPARC-NO-PREDICTOR-NEXT: cmp %o0, 0
56+
; SPARC-NO-PREDICTOR-NEXT: move %icc, %o2, %o1
57+
; SPARC-NO-PREDICTOR-NEXT: retl
58+
; SPARC-NO-PREDICTOR-NEXT: mov %o1, %o0
59+
;
60+
; SPARC64-NO-PREDICTOR-LABEL: cdiv:
61+
; SPARC64-NO-PREDICTOR: ! %bb.0: ! %entry
62+
; SPARC64-NO-PREDICTOR-NEXT: sra %o1, 0, %o2
63+
; SPARC64-NO-PREDICTOR-NEXT: sethi 1398101, %o3
64+
; SPARC64-NO-PREDICTOR-NEXT: or %o3, 342, %o3
65+
; SPARC64-NO-PREDICTOR-NEXT: mulx %o2, %o3, %o2
66+
; SPARC64-NO-PREDICTOR-NEXT: srlx %o2, 63, %o3
67+
; SPARC64-NO-PREDICTOR-NEXT: srlx %o2, 32, %o2
68+
; SPARC64-NO-PREDICTOR-NEXT: add %o2, %o3, %o2
69+
; SPARC64-NO-PREDICTOR-NEXT: cmp %o0, 0
70+
; SPARC64-NO-PREDICTOR-NEXT: move %icc, %o2, %o1
71+
; SPARC64-NO-PREDICTOR-NEXT: retl
72+
; SPARC64-NO-PREDICTOR-NEXT: mov %o1, %o0
73+
entry:
74+
%div = sdiv i32 %num, 3
75+
%cmp = icmp eq i32 %cond, 0
76+
%ret = select i1 %cmp, i32 %div, i32 %num
77+
ret i32 %ret
78+
}
79+
80+
attributes #0 = { nounwind }

0 commit comments

Comments
 (0)