Skip to content

Commit 79cbb7b

Browse files
mahesh-attarderlavaee
authored andcommitted
[X86][CodeGen] Add Scheduling Model for LNL-P (llvm#139446)
With this patch, we introduce scheduling model for Lunarlake P-core. This Scheduling model is generated by tool developed by Intel Engineer @HaohaiWen Available here https://github.com/HaohaiWen/schedtool Data Sources: - Intel SDM https://www.intel.com/content/www/us/en/developer/articles/technical/intel-sdm.html - Alderlake Scheduling Model and Uops-info table for unchanged entries from previous generations.
1 parent 7fe43f6 commit 79cbb7b

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

41 files changed

+15626
-1
lines changed

llvm/lib/Target/X86/X86.td

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -811,6 +811,7 @@ include "X86SchedSkylakeClient.td"
811811
include "X86SchedSkylakeServer.td"
812812
include "X86SchedIceLake.td"
813813
include "X86SchedAlderlakeP.td"
814+
include "X86SchedLunarlakeP.td"
814815
include "X86SchedSapphireRapids.td"
815816

816817
//===----------------------------------------------------------------------===//
@@ -1870,10 +1871,12 @@ def : ProcModel<"meteorlake", AlderlakePModel,
18701871
ProcessorFeatures.ADLFeatures, ProcessorFeatures.ADLTuning>;
18711872
def : ProcModel<"arrowlake", AlderlakePModel,
18721873
ProcessorFeatures.ARLFeatures, ProcessorFeatures.ADLTuning>;
1873-
foreach P = ["arrowlake-s", "arrowlake_s", "lunarlake"] in {
1874+
foreach P = ["arrowlake-s", "arrowlake_s"] in {
18741875
def : ProcModel<P, AlderlakePModel,
18751876
ProcessorFeatures.ARLSFeatures, ProcessorFeatures.ADLTuning>;
18761877
}
1878+
def : ProcModel<"lunarlake", LunarlakePModel, ProcessorFeatures.ARLSFeatures,
1879+
ProcessorFeatures.ADLTuning>;
18771880
def : ProcModel<"pantherlake", AlderlakePModel,
18781881
ProcessorFeatures.PTLFeatures, ProcessorFeatures.ADLTuning>;
18791882
def : ProcModel<"clearwaterforest", AlderlakePModel,

llvm/lib/Target/X86/X86PfmCounters.td

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -236,6 +236,26 @@ def SapphireRapidsPfmCounters : ProcPfmCounters {
236236
}
237237
def : PfmCountersBinding<"sapphirerapids", SapphireRapidsPfmCounters>;
238238

239+
def LunarLakePfmCounters : ProcPfmCounters {
240+
let CycleCounter = UnhaltedCoreCyclesPfmCounter;
241+
let UopsCounter = UopsIssuedPfmCounter;
242+
let IssueCounters = [
243+
// Refer: https://perfmon-events.intel.com/ section Lunar Lake Hybrid Event
244+
// ALU Dispatch - Any of ALUs with latency 1 cycle that is not jmp or Shift.
245+
PfmIssueCounter<"LNLPVPort02_03", "uops_dispatched:alu">,
246+
PfmIssueCounter<"LNLPPort00_01_02_03_04_05", "uops_dispatched:int_eu_all">,
247+
PfmIssueCounter<"LNLPPort00_02_04", "uops_dispatched:jmp">,
248+
PfmIssueCounter<"LNLPPort20_21_22", "uops_dispatched:load">,
249+
PfmIssueCounter<"LNLPPort01_03_05", "uops_dispatched:shift">,
250+
// Slow Dispatch - If uops latency > 1, counted as slow. TBD
251+
// PfmIssueCounter<"LNLPPort01_03_05", "uops_dispatched:slow">,
252+
PfmIssueCounter<"LNLPPort25_26_27", "uops_dispatched:sta">,
253+
PfmIssueCounter<"LNLPPort10_11", "uops_dispatched:std">
254+
];
255+
let ValidationCounters = DefaultIntelPfmValidationCounters;
256+
}
257+
def : PfmCountersBinding<"Lunarlake", LunarLakePfmCounters>;
258+
239259
// AMD X86 Counters.
240260
defvar DefaultAMDPfmValidationCounters = [
241261
PfmValidationCounter<InstructionRetired, "RETIRED_INSTRUCTIONS">,

llvm/lib/Target/X86/X86SchedLunarlakeP.td

Lines changed: 2409 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 148 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,148 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
2+
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=lunarlake -timeline -timeline-max-iterations=1 < %s | FileCheck %s -check-prefixes=ALL,NOALIAS
3+
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=lunarlake -timeline -timeline-max-iterations=1 -noalias=false < %s | FileCheck %s -check-prefixes=ALL,YESALIAS
4+
5+
addq $44, 64(%r14)
6+
addq $44, 128(%r14)
7+
addq $44, 192(%r14)
8+
addq $44, 256(%r14)
9+
addq $44, 320(%r14)
10+
addq $44, 384(%r14)
11+
addq $44, 448(%r14)
12+
addq $44, 512(%r14)
13+
addq $44, 576(%r14)
14+
addq $44, 640(%r14)
15+
16+
# ALL: Iterations: 100
17+
# ALL-NEXT: Instructions: 1000
18+
19+
# NOALIAS-NEXT: Total Cycles: 681
20+
# YESALIAS-NEXT: Total Cycles: 12003
21+
22+
# ALL-NEXT: Total uOps: 4000
23+
24+
# ALL: Dispatch Width: 8
25+
26+
# NOALIAS-NEXT: uOps Per Cycle: 5.87
27+
# NOALIAS-NEXT: IPC: 1.47
28+
29+
# YESALIAS-NEXT: uOps Per Cycle: 0.33
30+
# YESALIAS-NEXT: IPC: 0.08
31+
32+
# ALL-NEXT: Block RThroughput: 6.7
33+
34+
# ALL: Instruction Info:
35+
# ALL-NEXT: [1]: #uOps
36+
# ALL-NEXT: [2]: Latency
37+
# ALL-NEXT: [3]: RThroughput
38+
# ALL-NEXT: [4]: MayLoad
39+
# ALL-NEXT: [5]: MayStore
40+
# ALL-NEXT: [6]: HasSideEffects (U)
41+
42+
# ALL: [1] [2] [3] [4] [5] [6] Instructions:
43+
# ALL-NEXT: 4 12 0.67 * * addq $44, 64(%r14)
44+
# ALL-NEXT: 4 12 0.67 * * addq $44, 128(%r14)
45+
# ALL-NEXT: 4 12 0.67 * * addq $44, 192(%r14)
46+
# ALL-NEXT: 4 12 0.67 * * addq $44, 256(%r14)
47+
# ALL-NEXT: 4 12 0.67 * * addq $44, 320(%r14)
48+
# ALL-NEXT: 4 12 0.67 * * addq $44, 384(%r14)
49+
# ALL-NEXT: 4 12 0.67 * * addq $44, 448(%r14)
50+
# ALL-NEXT: 4 12 0.67 * * addq $44, 512(%r14)
51+
# ALL-NEXT: 4 12 0.67 * * addq $44, 576(%r14)
52+
# ALL-NEXT: 4 12 0.67 * * addq $44, 640(%r14)
53+
54+
# ALL: Resources:
55+
# ALL-NEXT: [0] - LNLPPort00
56+
# ALL-NEXT: [1] - LNLPPort01
57+
# ALL-NEXT: [2] - LNLPPort02
58+
# ALL-NEXT: [3] - LNLPPort03
59+
# ALL-NEXT: [4] - LNLPPort04
60+
# ALL-NEXT: [5] - LNLPPort05
61+
# ALL-NEXT: [6] - LNLPPort10
62+
# ALL-NEXT: [7] - LNLPPort11
63+
# ALL-NEXT: [8] - LNLPPort20
64+
# ALL-NEXT: [9] - LNLPPort21
65+
# ALL-NEXT: [10] - LNLPPort22
66+
# ALL-NEXT: [11] - LNLPPort25
67+
# ALL-NEXT: [12] - LNLPPort26
68+
# ALL-NEXT: [13] - LNLPPort27
69+
# ALL-NEXT: [14] - LNLPPortInvalid
70+
# ALL-NEXT: [15] - LNLPVPort00
71+
# ALL-NEXT: [16] - LNLPVPort01
72+
# ALL-NEXT: [17] - LNLPVPort02
73+
# ALL-NEXT: [18] - LNLPVPort03
74+
75+
# ALL: Resource pressure per iteration:
76+
# ALL-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] [14] [15] [16] [17] [18]
77+
# ALL-NEXT: - 3.33 - 3.33 - 3.34 5.00 5.00 6.66 6.66 6.68 3.33 3.33 3.34 - - - - -
78+
79+
# ALL: Resource pressure by instruction:
80+
# ALL-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] [14] [15] [16] [17] [18] Instructions:
81+
# ALL-NEXT: - 0.33 - 0.33 - 0.34 - 1.00 0.66 0.66 0.68 0.33 0.33 0.34 - - - - - addq $44, 64(%r14)
82+
# ALL-NEXT: - 0.33 - 0.34 - 0.33 1.00 - 0.66 0.68 0.66 0.33 0.34 0.33 - - - - - addq $44, 128(%r14)
83+
# ALL-NEXT: - 0.34 - 0.33 - 0.33 - 1.00 0.68 0.66 0.66 0.34 0.33 0.33 - - - - - addq $44, 192(%r14)
84+
# ALL-NEXT: - 0.33 - 0.33 - 0.34 1.00 - 0.66 0.66 0.68 0.33 0.33 0.34 - - - - - addq $44, 256(%r14)
85+
# ALL-NEXT: - 0.33 - 0.34 - 0.33 - 1.00 0.66 0.68 0.66 0.33 0.34 0.33 - - - - - addq $44, 320(%r14)
86+
# ALL-NEXT: - 0.34 - 0.33 - 0.33 1.00 - 0.68 0.66 0.66 0.34 0.33 0.33 - - - - - addq $44, 384(%r14)
87+
# ALL-NEXT: - 0.33 - 0.33 - 0.34 - 1.00 0.66 0.66 0.68 0.33 0.33 0.34 - - - - - addq $44, 448(%r14)
88+
# ALL-NEXT: - 0.33 - 0.34 - 0.33 1.00 - 0.66 0.68 0.66 0.33 0.34 0.33 - - - - - addq $44, 512(%r14)
89+
# ALL-NEXT: - 0.34 - 0.33 - 0.33 - 1.00 0.68 0.66 0.66 0.34 0.33 0.33 - - - - - addq $44, 576(%r14)
90+
# ALL-NEXT: - 0.33 - 0.33 - 0.34 1.00 - 0.66 0.66 0.68 0.33 0.33 0.34 - - - - - addq $44, 640(%r14)
91+
92+
# ALL: Timeline view:
93+
94+
# NOALIAS-NEXT: 0123456789
95+
# NOALIAS-NEXT: Index 0123456789 0
96+
97+
# YESALIAS-NEXT: 0123456789 0123456789 0123456789 01234
98+
# YESALIAS-NEXT: Index 0123456789 0123456789 0123456789 0123456789
99+
100+
# NOALIAS: [0,0] DeeeeeeeeeeeeER. . addq $44, 64(%r14)
101+
# NOALIAS-NEXT: [0,1] DeeeeeeeeeeeeER. . addq $44, 128(%r14)
102+
# NOALIAS-NEXT: [0,2] .DeeeeeeeeeeeeER . addq $44, 192(%r14)
103+
# NOALIAS-NEXT: [0,3] .D=eeeeeeeeeeeeER . addq $44, 256(%r14)
104+
# NOALIAS-NEXT: [0,4] . DeeeeeeeeeeeeER . addq $44, 320(%r14)
105+
# NOALIAS-NEXT: [0,5] . D=eeeeeeeeeeeeER . addq $44, 384(%r14)
106+
# NOALIAS-NEXT: [0,6] . D=eeeeeeeeeeeeER . addq $44, 448(%r14)
107+
# NOALIAS-NEXT: [0,7] . D=eeeeeeeeeeeeER . addq $44, 512(%r14)
108+
# NOALIAS-NEXT: [0,8] . D=eeeeeeeeeeeeER. addq $44, 576(%r14)
109+
# NOALIAS-NEXT: [0,9] . D==eeeeeeeeeeeeER addq $44, 640(%r14)
110+
111+
# YESALIAS: [0,0] DeeeeeeeeeeeeER. . . . . . . . . . . . . addq $44, 64(%r14)
112+
# YESALIAS-NEXT: [0,1] D============eeeeeeeeeeeeER . . . . . . . . . . addq $44, 128(%r14)
113+
# YESALIAS-NEXT: [0,2] .D=======================eeeeeeeeeeeeER . . . . . . . . addq $44, 192(%r14)
114+
# YESALIAS-NEXT: [0,3] .D===================================eeeeeeeeeeeeER . . . . . addq $44, 256(%r14)
115+
# YESALIAS-NEXT: [0,4] . D==============================================eeeeeeeeeeeeER . . . addq $44, 320(%r14)
116+
# YESALIAS-NEXT: [0,5] . D==========================================================eeeeeeeeeeeeER addq $44, 384(%r14)
117+
# YESALIAS-NEXT: Truncated display due to cycle limit
118+
119+
# ALL: Average Wait times (based on the timeline view):
120+
# ALL-NEXT: [0]: Executions
121+
# ALL-NEXT: [1]: Average time spent waiting in a scheduler's queue
122+
# ALL-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
123+
# ALL-NEXT: [3]: Average time elapsed from WB until retire stage
124+
125+
# ALL: [0] [1] [2] [3]
126+
# ALL-NEXT: 0. 1 1.0 1.0 0.0 addq $44, 64(%r14)
127+
128+
# NOALIAS-NEXT: 1. 1 1.0 0.0 0.0 addq $44, 128(%r14)
129+
# NOALIAS-NEXT: 2. 1 1.0 1.0 0.0 addq $44, 192(%r14)
130+
# NOALIAS-NEXT: 3. 1 2.0 1.0 0.0 addq $44, 256(%r14)
131+
# NOALIAS-NEXT: 4. 1 1.0 0.0 0.0 addq $44, 320(%r14)
132+
# NOALIAS-NEXT: 5. 1 2.0 1.0 0.0 addq $44, 384(%r14)
133+
# NOALIAS-NEXT: 6. 1 2.0 1.0 0.0 addq $44, 448(%r14)
134+
# NOALIAS-NEXT: 7. 1 2.0 0.0 0.0 addq $44, 512(%r14)
135+
# NOALIAS-NEXT: 8. 1 2.0 1.0 0.0 addq $44, 576(%r14)
136+
# NOALIAS-NEXT: 9. 1 3.0 1.0 0.0 addq $44, 640(%r14)
137+
# NOALIAS-NEXT: 1 1.7 0.7 0.0 <total>
138+
139+
# YESALIAS-NEXT: 1. 1 13.0 0.0 0.0 addq $44, 128(%r14)
140+
# YESALIAS-NEXT: 2. 1 24.0 0.0 0.0 addq $44, 192(%r14)
141+
# YESALIAS-NEXT: 3. 1 36.0 0.0 0.0 addq $44, 256(%r14)
142+
# YESALIAS-NEXT: 4. 1 47.0 0.0 0.0 addq $44, 320(%r14)
143+
# YESALIAS-NEXT: 5. 1 59.0 0.0 0.0 addq $44, 384(%r14)
144+
# YESALIAS-NEXT: 6. 1 70.0 0.0 0.0 addq $44, 448(%r14)
145+
# YESALIAS-NEXT: 7. 1 82.0 0.0 0.0 addq $44, 512(%r14)
146+
# YESALIAS-NEXT: 8. 1 93.0 0.0 0.0 addq $44, 576(%r14)
147+
# YESALIAS-NEXT: 9. 1 105.0 0.0 0.0 addq $44, 640(%r14)
148+
# YESALIAS-NEXT: 1 53.0 0.1 0.0 <total>
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
2+
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=lunarlake -all-views=false -summary-view < %s | FileCheck %s
3+
4+
# Issue #57548
5+
6+
# Do not crash when simulating instructions that consume partially overlapping
7+
# resource groups.
8+
9+
vpsllw %xmm1, %ymm0, %ymm0
10+
vpsllw %xmm1, %xmm2, %xmm1
11+
vpand %ymm1, %ymm0, %ymm0
12+
13+
# CHECK: Iterations: 100
14+
# CHECK-NEXT: Instructions: 300
15+
# CHECK-NEXT: Total Cycles: 503
16+
# CHECK-NEXT: Total uOps: 500
17+
18+
# CHECK: Dispatch Width: 8
19+
# CHECK-NEXT: uOps Per Cycle: 0.99
20+
# CHECK-NEXT: IPC: 0.60
21+
# CHECK-NEXT: Block RThroughput: 1.0
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
2+
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=lunarlake -instruction-tables < %s | FileCheck %s
3+
4+
aesdec %xmm0, %xmm2
5+
aesdec (%rax), %xmm2
6+
7+
aesdeclast %xmm0, %xmm2
8+
aesdeclast (%rax), %xmm2
9+
10+
aesenc %xmm0, %xmm2
11+
aesenc (%rax), %xmm2
12+
13+
aesenclast %xmm0, %xmm2
14+
aesenclast (%rax), %xmm2
15+
16+
# CHECK: Instruction Info:
17+
# CHECK-NEXT: [1]: #uOps
18+
# CHECK-NEXT: [2]: Latency
19+
# CHECK-NEXT: [3]: RThroughput
20+
# CHECK-NEXT: [4]: MayLoad
21+
# CHECK-NEXT: [5]: MayStore
22+
# CHECK-NEXT: [6]: HasSideEffects (U)
23+
24+
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
25+
# CHECK-NEXT: 1 3 2.00 aesdec %xmm0, %xmm2
26+
# CHECK-NEXT: 2 11 2.33 * aesdec (%rax), %xmm2
27+
# CHECK-NEXT: 1 3 2.00 aesdeclast %xmm0, %xmm2
28+
# CHECK-NEXT: 2 11 2.33 * aesdeclast (%rax), %xmm2
29+
# CHECK-NEXT: 1 3 2.00 aesenc %xmm0, %xmm2
30+
# CHECK-NEXT: 2 11 2.33 * aesenc (%rax), %xmm2
31+
# CHECK-NEXT: 1 3 2.00 aesenclast %xmm0, %xmm2
32+
# CHECK-NEXT: 2 11 2.33 * aesenclast (%rax), %xmm2
33+
34+
# CHECK: Resources:
35+
# CHECK-NEXT: [0] - LNLPPort00
36+
# CHECK-NEXT: [1] - LNLPPort01
37+
# CHECK-NEXT: [2] - LNLPPort02
38+
# CHECK-NEXT: [3] - LNLPPort03
39+
# CHECK-NEXT: [4] - LNLPPort04
40+
# CHECK-NEXT: [5] - LNLPPort05
41+
# CHECK-NEXT: [6] - LNLPPort10
42+
# CHECK-NEXT: [7] - LNLPPort11
43+
# CHECK-NEXT: [8] - LNLPPort20
44+
# CHECK-NEXT: [9] - LNLPPort21
45+
# CHECK-NEXT: [10] - LNLPPort22
46+
# CHECK-NEXT: [11] - LNLPPort25
47+
# CHECK-NEXT: [12] - LNLPPort26
48+
# CHECK-NEXT: [13] - LNLPPort27
49+
# CHECK-NEXT: [14] - LNLPPortInvalid
50+
# CHECK-NEXT: [15] - LNLPVPort00
51+
# CHECK-NEXT: [16] - LNLPVPort01
52+
# CHECK-NEXT: [17] - LNLPVPort02
53+
# CHECK-NEXT: [18] - LNLPVPort03
54+
55+
# CHECK: Resource pressure per iteration:
56+
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] [14] [15] [16] [17] [18]
57+
# CHECK-NEXT: - - - - - - - - 9.33 9.33 9.33 - - - - 16.00 16.00 - -
58+
59+
# CHECK: Resource pressure by instruction:
60+
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] [14] [15] [16] [17] [18] Instructions:
61+
# CHECK-NEXT: - - - - - - - - - - - - - - - 2.00 2.00 - - aesdec %xmm0, %xmm2
62+
# CHECK-NEXT: - - - - - - - - 2.33 2.33 2.33 - - - - 2.00 2.00 - - aesdec (%rax), %xmm2
63+
# CHECK-NEXT: - - - - - - - - - - - - - - - 2.00 2.00 - - aesdeclast %xmm0, %xmm2
64+
# CHECK-NEXT: - - - - - - - - 2.33 2.33 2.33 - - - - 2.00 2.00 - - aesdeclast (%rax), %xmm2
65+
# CHECK-NEXT: - - - - - - - - - - - - - - - 2.00 2.00 - - aesenc %xmm0, %xmm2
66+
# CHECK-NEXT: - - - - - - - - 2.33 2.33 2.33 - - - - 2.00 2.00 - - aesenc (%rax), %xmm2
67+
# CHECK-NEXT: - - - - - - - - - - - - - - - 2.00 2.00 - - aesenclast %xmm0, %xmm2
68+
# CHECK-NEXT: - - - - - - - - 2.33 2.33 2.33 - - - - 2.00 2.00 - - aesenclast (%rax), %xmm2

0 commit comments

Comments
 (0)