Skip to content

Commit 8e57689

Browse files
authored
[RISCV] add load/store misched/PostRA subtarget features (#149409)
Some processors benefit more from store clustering than load clustering, and vice-versa, depending on factors that are exclusive to each one (e.g. macrofusions implemented). Likewise, certain optimizations benefits more from misched clustering than postRA clustering. Macrofusions are again an example: in a processor with store pair macrofusions, like the veyron-v1, it is observed that misched clustering increases the amount of macrofusions more than postRA clustering. This of course isn't necessarily true for other processors, but it shows that processors can benefit from a more fine grained control of clustering mutations, and each one is able to do it differently. Add 4 new subtarget features that deprecates the existing riscv-misched-load-store-clustering and riscv-postmisched-load-store-clustering options: - disable-misched-load-clustering and disable-misched-store-clustering: disable load/store clustering during misched; - disable-postmisched-load-clustering and disable-postmisched-store-clustering: disable load/store clustering during PostRA. Note that the new subtarget features disables specific stages of the default clustering settings. The default per se (load and store clustering for both misched and PostRA) is left untouched. Disable all clustering but misched-store-clustering for the veyron-v1 processor using the new features.
1 parent 3686e5b commit 8e57689

File tree

7 files changed

+160
-20
lines changed

7 files changed

+160
-20
lines changed

llvm/lib/Target/RISCV/RISCVFeatures.td

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1700,6 +1700,18 @@ def TuneNLogNVRGather
17001700
def TunePostRAScheduler : SubtargetFeature<"use-postra-scheduler",
17011701
"UsePostRAScheduler", "true", "Schedule again after register allocation">;
17021702

1703+
def TuneDisableMISchedLoadClustering : SubtargetFeature<"disable-misched-load-clustering",
1704+
"EnableMISchedLoadClustering", "false", "Disable load clustering in the machine scheduler">;
1705+
1706+
def TuneDisableMISchedStoreClustering : SubtargetFeature<"disable-misched-store-clustering",
1707+
"EnableMISchedStoreClustering", "false", "Disable store clustering in the machine scheduler">;
1708+
1709+
def TuneDisablePostMISchedLoadClustering : SubtargetFeature<"disable-postmisched-load-clustering",
1710+
"EnablePostMISchedLoadClustering", "false", "Disable PostRA load clustering in the machine scheduler">;
1711+
1712+
def TuneDisablePostMISchedStoreClustering : SubtargetFeature<"disable-postmisched-store-clustering",
1713+
"EnablePostMISchedStoreClustering", "false", "Disable PostRA store clustering in the machine scheduler">;
1714+
17031715
def TuneDisableLatencySchedHeuristic
17041716
: SubtargetFeature<"disable-latency-sched-heuristic", "DisableLatencySchedHeuristic", "true",
17051717
"Disable latency scheduling heuristic">;

llvm/lib/Target/RISCV/RISCVProcessors.td

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -590,6 +590,9 @@ def VENTANA_VEYRON_V1 : RISCVProcessorModel<"veyron-v1",
590590
FeatureStdExtZicboz,
591591
FeatureVendorXVentanaCondOps],
592592
[TuneVentanaVeyron,
593+
TuneDisableMISchedLoadClustering,
594+
TuneDisablePostMISchedLoadClustering,
595+
TuneDisablePostMISchedStoreClustering,
593596
TuneLUIADDIFusion,
594597
TuneAUIPCADDIFusion,
595598
TuneZExtHFusion,

llvm/lib/Target/RISCV/RISCVTargetMachine.cpp

Lines changed: 10 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -94,16 +94,6 @@ static cl::opt<bool>
9494
cl::desc("Enable the loop data prefetch pass"),
9595
cl::init(true));
9696

97-
static cl::opt<bool> EnableMISchedLoadStoreClustering(
98-
"riscv-misched-load-store-clustering", cl::Hidden,
99-
cl::desc("Enable load and store clustering in the machine scheduler"),
100-
cl::init(true));
101-
102-
static cl::opt<bool> EnablePostMISchedLoadStoreClustering(
103-
"riscv-postmisched-load-store-clustering", cl::Hidden,
104-
cl::desc("Enable PostRA load and store clustering in the machine scheduler"),
105-
cl::init(true));
106-
10797
static cl::opt<bool> DisableVectorMaskMutation(
10898
"riscv-disable-vector-mask-mutation",
10999
cl::desc("Disable the vector mask scheduling mutation"), cl::init(false),
@@ -294,15 +284,17 @@ bool RISCVTargetMachine::isNoopAddrSpaceCast(unsigned SrcAS,
294284

295285
ScheduleDAGInstrs *
296286
RISCVTargetMachine::createMachineScheduler(MachineSchedContext *C) const {
287+
const RISCVSubtarget &ST = C->MF->getSubtarget<RISCVSubtarget>();
297288
ScheduleDAGMILive *DAG = createSchedLive(C);
298-
if (EnableMISchedLoadStoreClustering) {
289+
290+
if (ST.enableMISchedLoadClustering())
299291
DAG->addMutation(createLoadClusterDAGMutation(
300292
DAG->TII, DAG->TRI, /*ReorderWhileClustering=*/true));
293+
294+
if (ST.enableMISchedStoreClustering())
301295
DAG->addMutation(createStoreClusterDAGMutation(
302296
DAG->TII, DAG->TRI, /*ReorderWhileClustering=*/true));
303-
}
304297

305-
const RISCVSubtarget &ST = C->MF->getSubtarget<RISCVSubtarget>();
306298
if (!DisableVectorMaskMutation && ST.hasVInstructions())
307299
DAG->addMutation(createRISCVVectorMaskDAGMutation(DAG->TRI));
308300

@@ -311,13 +303,16 @@ RISCVTargetMachine::createMachineScheduler(MachineSchedContext *C) const {
311303

312304
ScheduleDAGInstrs *
313305
RISCVTargetMachine::createPostMachineScheduler(MachineSchedContext *C) const {
306+
const RISCVSubtarget &ST = C->MF->getSubtarget<RISCVSubtarget>();
314307
ScheduleDAGMI *DAG = createSchedPostRA(C);
315-
if (EnablePostMISchedLoadStoreClustering) {
308+
309+
if (ST.enablePostMISchedLoadClustering())
316310
DAG->addMutation(createLoadClusterDAGMutation(
317311
DAG->TII, DAG->TRI, /*ReorderWhileClustering=*/true));
312+
313+
if (ST.enablePostMISchedStoreClustering())
318314
DAG->addMutation(createStoreClusterDAGMutation(
319315
DAG->TII, DAG->TRI, /*ReorderWhileClustering=*/true));
320-
}
321316

322317
return DAG;
323318
}

llvm/test/CodeGen/RISCV/features-info.ll

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,10 @@
1313
; CHECK-NEXT: conditional-cmv-fusion - Enable branch+c.mv fusion.
1414
; CHECK-NEXT: d - 'D' (Double-Precision Floating-Point).
1515
; CHECK-NEXT: disable-latency-sched-heuristic - Disable latency scheduling heuristic.
16+
; CHECK-NEXT: disable-misched-load-clustering - Disable load clustering in the machine scheduler.
17+
; CHECK-NEXT: disable-misched-store-clustering - Disable store clustering in the machine scheduler.
18+
; CHECK-NEXT: disable-postmisched-load-clustering - Disable PostRA load clustering in the machine scheduler.
19+
; CHECK-NEXT: disable-postmisched-store-clustering - Disable PostRA store clustering in the machine scheduler.
1620
; CHECK-NEXT: dlen-factor-2 - Vector unit DLEN(data path width) is half of VLEN.
1721
; CHECK-NEXT: e - 'E' (Embedded Instruction Set with 16 GPRs).
1822
; CHECK-NEXT: exact-asm - Enable Exact Assembly (Disables Compression and Relaxation).

llvm/test/CodeGen/RISCV/misched-load-clustering.ll

Lines changed: 44 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,42 @@
11
; REQUIRES: asserts
2-
; RUN: llc -mtriple=riscv32 -verify-misched -riscv-misched-load-store-clustering=false \
2+
;
3+
; Disable all misched clustering
4+
; RUN: llc -mtriple=riscv32 -verify-misched \
5+
; RUN: -mattr=+disable-misched-load-clustering,+disable-misched-store-clustering \
36
; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \
47
; RUN: | FileCheck -check-prefix=NOCLUSTER %s
5-
; RUN: llc -mtriple=riscv64 -verify-misched -riscv-misched-load-store-clustering=false \
8+
; RUN: llc -mtriple=riscv64 -verify-misched \
9+
; RUN: -mattr=+disable-misched-load-clustering,+disable-misched-store-clustering \
610
; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \
711
; RUN: | FileCheck -check-prefix=NOCLUSTER %s
12+
;
13+
; ST misched clustering only
14+
; RUN: llc -mtriple=riscv32 -verify-misched \
15+
; RUN: -mattr=+disable-misched-load-clustering \
16+
; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \
17+
; RUN: | FileCheck -check-prefix=STCLUSTER %s
18+
; RUN: llc -mtriple=riscv64 -verify-misched \
19+
; RUN: -mattr=+disable-misched-load-clustering \
20+
; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \
21+
; RUN: | FileCheck -check-prefix=STCLUSTER %s
22+
;
23+
; LD misched clustering only
824
; RUN: llc -mtriple=riscv32 -verify-misched \
25+
; RUN: -mattr=+disable-misched-store-clustering \
926
; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \
1027
; RUN: | FileCheck -check-prefix=LDCLUSTER %s
1128
; RUN: llc -mtriple=riscv64 -verify-misched \
29+
; RUN: -mattr=+disable-misched-store-clustering \
1230
; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \
1331
; RUN: | FileCheck -check-prefix=LDCLUSTER %s
14-
32+
;
33+
; Default misched cluster settings (i.e. both LD and ST clustering)
34+
; RUN: llc -mtriple=riscv32 -verify-misched \
35+
; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \
36+
; RUN: | FileCheck -check-prefix=DEFAULTCLUSTER %s
37+
; RUN: llc -mtriple=riscv64 -verify-misched \
38+
; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \
39+
; RUN: | FileCheck -check-prefix=DEFAULTCLUSTER %s
1540

1641
define i32 @load_clustering_1(ptr nocapture %p) {
1742
; NOCLUSTER: ********** MI Scheduling **********
@@ -22,13 +47,29 @@ define i32 @load_clustering_1(ptr nocapture %p) {
2247
; NOCLUSTER: SU(4): %4:gpr = LW %0:gpr, 4
2348
; NOCLUSTER: SU(5): %6:gpr = LW %0:gpr, 16
2449
;
50+
; STCLUSTER: ********** MI Scheduling **********
51+
; STCLUSTER-LABEL: load_clustering_1:%bb.0
52+
; STCLUSTER: *** Final schedule for %bb.0 ***
53+
; STCLUSTER: SU(1): %1:gpr = LW %0:gpr, 12
54+
; STCLUSTER: SU(2): %2:gpr = LW %0:gpr, 8
55+
; STCLUSTER: SU(4): %4:gpr = LW %0:gpr, 4
56+
; STCLUSTER: SU(5): %6:gpr = LW %0:gpr, 16
57+
;
2558
; LDCLUSTER: ********** MI Scheduling **********
2659
; LDCLUSTER-LABEL: load_clustering_1:%bb.0
2760
; LDCLUSTER: *** Final schedule for %bb.0 ***
2861
; LDCLUSTER: SU(4): %4:gpr = LW %0:gpr, 4
2962
; LDCLUSTER: SU(2): %2:gpr = LW %0:gpr, 8
3063
; LDCLUSTER: SU(1): %1:gpr = LW %0:gpr, 12
3164
; LDCLUSTER: SU(5): %6:gpr = LW %0:gpr, 16
65+
;
66+
; DEFAULTCLUSTER: ********** MI Scheduling **********
67+
; DEFAULTCLUSTER-LABEL: load_clustering_1:%bb.0
68+
; DEFAULTCLUSTER: *** Final schedule for %bb.0 ***
69+
; DEFAULTCLUSTER: SU(4): %4:gpr = LW %0:gpr, 4
70+
; DEFAULTCLUSTER: SU(2): %2:gpr = LW %0:gpr, 8
71+
; DEFAULTCLUSTER: SU(1): %1:gpr = LW %0:gpr, 12
72+
; DEFAULTCLUSTER: SU(5): %6:gpr = LW %0:gpr, 16
3273
entry:
3374
%arrayidx0 = getelementptr inbounds i32, ptr %p, i32 3
3475
%val0 = load i32, ptr %arrayidx0

llvm/test/CodeGen/RISCV/misched-mem-clustering.mir

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
11
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
22
# RUN: llc -mtriple=riscv64 -x mir -mcpu=sifive-p470 -verify-misched -enable-post-misched=false \
3-
# RUN: -riscv-postmisched-load-store-clustering=false -debug-only=machine-scheduler \
3+
# RUN: -mattr=+disable-postmisched-load-clustering \
4+
# RUN: -mattr=+disable-postmisched-store-clustering -debug-only=machine-scheduler \
45
# RUN: -start-before=machine-scheduler -stop-after=postmisched -misched-regpressure=false -o - 2>&1 < %s \
56
# RUN: | FileCheck -check-prefix=NOPOSTMISCHED %s
67
# RUN: llc -mtriple=riscv64 -x mir -mcpu=sifive-p470 -mattr=+use-postra-scheduler -verify-misched -enable-post-misched=true \
7-
# RUN: -riscv-postmisched-load-store-clustering=false -debug-only=machine-scheduler \
8+
# RUN: -mattr=+disable-postmisched-load-clustering \
9+
# RUN: -mattr=+disable-postmisched-store-clustering -debug-only=machine-scheduler \
810
# RUN: -start-before=machine-scheduler -stop-after=postmisched -misched-regpressure=false -o - 2>&1 < %s \
911
# RUN: | FileCheck -check-prefix=NOCLUSTER %s
1012
# RUN: llc -mtriple=riscv64 -x mir -mcpu=sifive-p470 -mattr=+use-postra-scheduler -verify-misched -enable-post-misched=true \
Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
; REQUIRES: asserts
2+
;
3+
; Disable all misched clustering
4+
; RUN: llc -mtriple=riscv32 -verify-misched \
5+
; RUN: -mattr=+disable-misched-load-clustering,+disable-misched-store-clustering \
6+
; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \
7+
; RUN: | FileCheck -check-prefix=NOCLUSTER %s
8+
; RUN: llc -mtriple=riscv64 -verify-misched \
9+
; RUN: -mattr=+disable-misched-load-clustering,+disable-misched-store-clustering \
10+
; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \
11+
; RUN: | FileCheck -check-prefix=NOCLUSTER %s
12+
;
13+
; ST misched clustering only
14+
; RUN: llc -mtriple=riscv32 -verify-misched \
15+
; RUN: -mattr=+disable-misched-load-clustering \
16+
; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \
17+
; RUN: | FileCheck -check-prefix=STCLUSTER %s
18+
; RUN: llc -mtriple=riscv64 -verify-misched \
19+
; RUN: -mattr=+disable-misched-load-clustering \
20+
; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \
21+
; RUN: | FileCheck -check-prefix=STCLUSTER %s
22+
;
23+
; LD misched clustering only
24+
; RUN: llc -mtriple=riscv32 -verify-misched \
25+
; RUN: -mattr=+disable-misched-store-clustering \
26+
; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \
27+
; RUN: | FileCheck -check-prefix=LDCLUSTER %s
28+
; RUN: llc -mtriple=riscv64 -verify-misched \
29+
; RUN: -mattr=+disable-misched-store-clustering \
30+
; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \
31+
; RUN: | FileCheck -check-prefix=LDCLUSTER %s
32+
;
33+
; Default misched cluster settings (i.e. both LD and ST clustering)
34+
; RUN: llc -mtriple=riscv32 -verify-misched \
35+
; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \
36+
; RUN: | FileCheck -check-prefix=DEFAULTCLUSTER %s
37+
; RUN: llc -mtriple=riscv64 -verify-misched \
38+
; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \
39+
; RUN: | FileCheck -check-prefix=DEFAULTCLUSTER %s
40+
41+
define i32 @store_clustering_1(ptr nocapture %p, i32 %v) {
42+
; NOCLUSTER: ********** MI Scheduling **********
43+
; NOCLUSTER-LABEL: store_clustering_1:%bb.0
44+
; NOCLUSTER: *** Final schedule for %bb.0 ***
45+
; NOCLUSTER: SU(2): SW %1:gpr, %0:gpr, 12 :: (store (s32) into %ir.arrayidx0)
46+
; NOCLUSTER: SU(3): SW %1:gpr, %0:gpr, 8 :: (store (s32) into %ir.arrayidx1)
47+
; NOCLUSTER: SU(4): SW %1:gpr, %0:gpr, 4 :: (store (s32) into %ir.arrayidx2)
48+
; NOCLUSTER: SU(5): SW %1:gpr, %0:gpr, 16 :: (store (s32) into %ir.arrayidx3)
49+
;
50+
; STCLUSTER: ********** MI Scheduling **********
51+
; STCLUSTER-LABEL: store_clustering_1:%bb.0
52+
; STCLUSTER: *** Final schedule for %bb.0 ***
53+
; STCLUSTER: SU(4): SW %1:gpr, %0:gpr, 4 :: (store (s32) into %ir.arrayidx2)
54+
; STCLUSTER: SU(3): SW %1:gpr, %0:gpr, 8 :: (store (s32) into %ir.arrayidx1)
55+
; STCLUSTER: SU(2): SW %1:gpr, %0:gpr, 12 :: (store (s32) into %ir.arrayidx0)
56+
; STCLUSTER: SU(5): SW %1:gpr, %0:gpr, 16 :: (store (s32) into %ir.arrayidx3)
57+
;
58+
; LDCLUSTER: ********** MI Scheduling **********
59+
; LDCLUSTER-LABEL: store_clustering_1:%bb.0
60+
; LDCLUSTER: *** Final schedule for %bb.0 ***
61+
; LDCLUSTER: SU(2): SW %1:gpr, %0:gpr, 12 :: (store (s32) into %ir.arrayidx0)
62+
; LDCLUSTER: SU(3): SW %1:gpr, %0:gpr, 8 :: (store (s32) into %ir.arrayidx1)
63+
; LDCLUSTER: SU(4): SW %1:gpr, %0:gpr, 4 :: (store (s32) into %ir.arrayidx2)
64+
; LDCLUSTER: SU(5): SW %1:gpr, %0:gpr, 16 :: (store (s32) into %ir.arrayidx3)
65+
;
66+
; DEFAULTCLUSTER: ********** MI Scheduling **********
67+
; DEFAULTCLUSTER-LABEL: store_clustering_1:%bb.0
68+
; DEFAULTCLUSTER: *** Final schedule for %bb.0 ***
69+
; DEFAULTCLUSTER: SU(4): SW %1:gpr, %0:gpr, 4 :: (store (s32) into %ir.arrayidx2)
70+
; DEFAULTCLUSTER: SU(3): SW %1:gpr, %0:gpr, 8 :: (store (s32) into %ir.arrayidx1)
71+
; DEFAULTCLUSTER: SU(2): SW %1:gpr, %0:gpr, 12 :: (store (s32) into %ir.arrayidx0)
72+
; DEFAULTCLUSTER: SU(5): SW %1:gpr, %0:gpr, 16 :: (store (s32) into %ir.arrayidx3)
73+
entry:
74+
%arrayidx0 = getelementptr inbounds i32, ptr %p, i32 3
75+
store i32 %v, ptr %arrayidx0
76+
%arrayidx1 = getelementptr inbounds i32, ptr %p, i32 2
77+
store i32 %v, ptr %arrayidx1
78+
%arrayidx2 = getelementptr inbounds i32, ptr %p, i32 1
79+
store i32 %v, ptr %arrayidx2
80+
%arrayidx3 = getelementptr inbounds i32, ptr %p, i32 4
81+
store i32 %v, ptr %arrayidx3
82+
ret i32 %v
83+
}

0 commit comments

Comments
 (0)