Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion llvm/lib/Target/X86/X86.td
Original file line number Diff line number Diff line change
Expand Up @@ -811,6 +811,7 @@ include "X86SchedSkylakeClient.td"
include "X86SchedSkylakeServer.td"
include "X86SchedIceLake.td"
include "X86SchedAlderlakeP.td"
include "X86SchedLunarlakeP.td"
include "X86SchedSapphireRapids.td"

//===----------------------------------------------------------------------===//
Expand Down Expand Up @@ -1862,10 +1863,12 @@ def : ProcModel<"meteorlake", AlderlakePModel,
ProcessorFeatures.ADLFeatures, ProcessorFeatures.ADLTuning>;
def : ProcModel<"arrowlake", AlderlakePModel,
ProcessorFeatures.SRFFeatures, ProcessorFeatures.ADLTuning>;
foreach P = ["arrowlake-s", "arrowlake_s", "lunarlake"] in {
foreach P = ["arrowlake-s", "arrowlake_s"] in {
def : ProcModel<P, AlderlakePModel,
ProcessorFeatures.ARLSFeatures, ProcessorFeatures.ADLTuning>;
}
def : ProcModel<"lunarlake", LunarlakePModel, ProcessorFeatures.ARLSFeatures,
ProcessorFeatures.ADLTuning>;
def : ProcModel<"pantherlake", AlderlakePModel,
ProcessorFeatures.PTLFeatures, ProcessorFeatures.ADLTuning>;
def : ProcModel<"clearwaterforest", AlderlakePModel,
Expand Down
20 changes: 20 additions & 0 deletions llvm/lib/Target/X86/X86PfmCounters.td
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,26 @@ def SapphireRapidsPfmCounters : ProcPfmCounters {
}
def : PfmCountersBinding<"sapphirerapids", SapphireRapidsPfmCounters>;

def LunarLakePfmCounters : ProcPfmCounters {
let CycleCounter = UnhaltedCoreCyclesPfmCounter;
let UopsCounter = UopsIssuedPfmCounter;
let IssueCounters = [
// Refer: https://perfmon-events.intel.com/ section Lunar Lake Hybrid Event
// ALU Dispatch - Any of ALUs with latency 1 cycle that is not jmp or Shift.
PfmIssueCounter<"LNLPVPort02_03", "uops_dispatched:alu">,
PfmIssueCounter<"LNLPPort00_01_02_03_04_05", "uops_dispatched:int_eu_all">,
PfmIssueCounter<"LNLPPort00_02_04", "uops_dispatched:jmp">,
PfmIssueCounter<"LNLPPort20_21_22", "uops_dispatched:load">,
PfmIssueCounter<"LNLPPort01_03_05", "uops_dispatched:shift">,
// Slow Dispatch - If uops latency > 1, counted as slow. TBD
Copy link
Contributor Author

@mahesh-attarde mahesh-attarde May 15, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ops_dispatched:shift and ops_dispatched:slow uses same resource group LNLPPort01_03_05. We dont allow duplicates in ProcPfmCounter. we dont have resource group with fine granularity around latency yet. hence this is commented and marked TBD since no immediate use case around this.

// PfmIssueCounter<"LNLPPort01_03_05", "uops_dispatched:slow">,
PfmIssueCounter<"LNLPPort25_26_27", "uops_dispatched:sta">,
PfmIssueCounter<"LNLPPort10_11", "uops_dispatched:std">
];
let ValidationCounters = DefaultIntelPfmValidationCounters;
}
def : PfmCountersBinding<"Lunarlake", LunarLakePfmCounters>;

// AMD X86 Counters.
defvar DefaultAMDPfmValidationCounters = [
PfmValidationCounter<InstructionRetired, "RETIRED_INSTRUCTIONS">,
Expand Down
2,409 changes: 2,409 additions & 0 deletions llvm/lib/Target/X86/X86SchedLunarlakeP.td

Large diffs are not rendered by default.

149 changes: 149 additions & 0 deletions llvm/test/tools/llvm-mca/X86/LunarlakeP/independent-load-stores.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=lunarlake -timeline -timeline-max-iterations=1 < %s | FileCheck %s -check-prefixes=ALL,NOALIAS
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=lunarlake -timeline -timeline-max-iterations=1 -noalias=false < %s | FileCheck %s -check-prefixes=ALL,YESALIAS

addq $44, 64(%r14)
addq $44, 128(%r14)
addq $44, 192(%r14)
addq $44, 256(%r14)
addq $44, 320(%r14)
addq $44, 384(%r14)
addq $44, 448(%r14)
addq $44, 512(%r14)
addq $44, 576(%r14)
addq $44, 640(%r14)

# ALL: Iterations: 100
# ALL-NEXT: Instructions: 1000

# NOALIAS-NEXT: Total Cycles: 681
# YESALIAS-NEXT: Total Cycles: 12003

# ALL-NEXT: Total uOps: 4000

# ALL: Dispatch Width: 8

# NOALIAS-NEXT: uOps Per Cycle: 5.87
# NOALIAS-NEXT: IPC: 1.47

# YESALIAS-NEXT: uOps Per Cycle: 0.33
# YESALIAS-NEXT: IPC: 0.08

# ALL-NEXT: Block RThroughput: 6.7

# ALL: Instruction Info:
# ALL-NEXT: [1]: #uOps
# ALL-NEXT: [2]: Latency
# ALL-NEXT: [3]: RThroughput
# ALL-NEXT: [4]: MayLoad
# ALL-NEXT: [5]: MayStore
# ALL-NEXT: [6]: HasSideEffects (U)

# ALL: [1] [2] [3] [4] [5] [6] Instructions:
# ALL-NEXT: 4 12 0.67 * * addq $44, 64(%r14)
# ALL-NEXT: 4 12 0.67 * * addq $44, 128(%r14)
# ALL-NEXT: 4 12 0.67 * * addq $44, 192(%r14)
# ALL-NEXT: 4 12 0.67 * * addq $44, 256(%r14)
# ALL-NEXT: 4 12 0.67 * * addq $44, 320(%r14)
# ALL-NEXT: 4 12 0.67 * * addq $44, 384(%r14)
# ALL-NEXT: 4 12 0.67 * * addq $44, 448(%r14)
# ALL-NEXT: 4 12 0.67 * * addq $44, 512(%r14)
# ALL-NEXT: 4 12 0.67 * * addq $44, 576(%r14)
# ALL-NEXT: 4 12 0.67 * * addq $44, 640(%r14)

# ALL: Resources:
# ALL-NEXT: [0] - ADLPPort00
# ALL-NEXT: [1] - LNLPPort00
# ALL-NEXT: [2] - LNLPPort01
# ALL-NEXT: [3] - LNLPPort02
# ALL-NEXT: [4] - LNLPPort03
# ALL-NEXT: [5] - LNLPPort04
# ALL-NEXT: [6] - LNLPPort05
# ALL-NEXT: [7] - LNLPPort10
# ALL-NEXT: [8] - LNLPPort11
# ALL-NEXT: [9] - LNLPPort20
# ALL-NEXT: [10] - LNLPPort21
# ALL-NEXT: [11] - LNLPPort22
# ALL-NEXT: [12] - LNLPPort25
# ALL-NEXT: [13] - LNLPPort26
# ALL-NEXT: [14] - LNLPPort27
# ALL-NEXT: [15] - LNLPPortInvalid
# ALL-NEXT: [16] - LNLPVPort00
# ALL-NEXT: [17] - LNLPVPort01
# ALL-NEXT: [18] - LNLPVPort02
# ALL-NEXT: [19] - LNLPVPort03

# ALL: Resource pressure per iteration:
# ALL-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] [14] [15] [16] [17] [18] [19]
# ALL-NEXT: - - 3.33 - 3.33 - 3.34 5.00 5.00 6.66 6.66 6.68 3.33 3.33 3.34 - - - - -

# ALL: Resource pressure by instruction:
# ALL-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] [14] [15] [16] [17] [18] [19] Instructions:
# ALL-NEXT: - - 0.33 - 0.33 - 0.34 - 1.00 0.66 0.66 0.68 0.33 0.33 0.34 - - - - - addq $44, 64(%r14)
# ALL-NEXT: - - 0.33 - 0.34 - 0.33 1.00 - 0.66 0.68 0.66 0.33 0.34 0.33 - - - - - addq $44, 128(%r14)
# ALL-NEXT: - - 0.34 - 0.33 - 0.33 - 1.00 0.68 0.66 0.66 0.34 0.33 0.33 - - - - - addq $44, 192(%r14)
# ALL-NEXT: - - 0.33 - 0.33 - 0.34 1.00 - 0.66 0.66 0.68 0.33 0.33 0.34 - - - - - addq $44, 256(%r14)
# ALL-NEXT: - - 0.33 - 0.34 - 0.33 - 1.00 0.66 0.68 0.66 0.33 0.34 0.33 - - - - - addq $44, 320(%r14)
# ALL-NEXT: - - 0.34 - 0.33 - 0.33 1.00 - 0.68 0.66 0.66 0.34 0.33 0.33 - - - - - addq $44, 384(%r14)
# ALL-NEXT: - - 0.33 - 0.33 - 0.34 - 1.00 0.66 0.66 0.68 0.33 0.33 0.34 - - - - - addq $44, 448(%r14)
# ALL-NEXT: - - 0.33 - 0.34 - 0.33 1.00 - 0.66 0.68 0.66 0.33 0.34 0.33 - - - - - addq $44, 512(%r14)
# ALL-NEXT: - - 0.34 - 0.33 - 0.33 - 1.00 0.68 0.66 0.66 0.34 0.33 0.33 - - - - - addq $44, 576(%r14)
# ALL-NEXT: - - 0.33 - 0.33 - 0.34 1.00 - 0.66 0.66 0.68 0.33 0.33 0.34 - - - - - addq $44, 640(%r14)

# ALL: Timeline view:

# NOALIAS-NEXT: 0123456789
# NOALIAS-NEXT: Index 0123456789 0

# YESALIAS-NEXT: 0123456789 0123456789 0123456789 01234
# YESALIAS-NEXT: Index 0123456789 0123456789 0123456789 0123456789

# NOALIAS: [0,0] DeeeeeeeeeeeeER. . addq $44, 64(%r14)
# NOALIAS-NEXT: [0,1] DeeeeeeeeeeeeER. . addq $44, 128(%r14)
# NOALIAS-NEXT: [0,2] .DeeeeeeeeeeeeER . addq $44, 192(%r14)
# NOALIAS-NEXT: [0,3] .D=eeeeeeeeeeeeER . addq $44, 256(%r14)
# NOALIAS-NEXT: [0,4] . DeeeeeeeeeeeeER . addq $44, 320(%r14)
# NOALIAS-NEXT: [0,5] . D=eeeeeeeeeeeeER . addq $44, 384(%r14)
# NOALIAS-NEXT: [0,6] . D=eeeeeeeeeeeeER . addq $44, 448(%r14)
# NOALIAS-NEXT: [0,7] . D=eeeeeeeeeeeeER . addq $44, 512(%r14)
# NOALIAS-NEXT: [0,8] . D=eeeeeeeeeeeeER. addq $44, 576(%r14)
# NOALIAS-NEXT: [0,9] . D==eeeeeeeeeeeeER addq $44, 640(%r14)

# YESALIAS: [0,0] DeeeeeeeeeeeeER. . . . . . . . . . . . . addq $44, 64(%r14)
# YESALIAS-NEXT: [0,1] D============eeeeeeeeeeeeER . . . . . . . . . . addq $44, 128(%r14)
# YESALIAS-NEXT: [0,2] .D=======================eeeeeeeeeeeeER . . . . . . . . addq $44, 192(%r14)
# YESALIAS-NEXT: [0,3] .D===================================eeeeeeeeeeeeER . . . . . addq $44, 256(%r14)
# YESALIAS-NEXT: [0,4] . D==============================================eeeeeeeeeeeeER . . . addq $44, 320(%r14)
# YESALIAS-NEXT: [0,5] . D==========================================================eeeeeeeeeeeeER addq $44, 384(%r14)
# YESALIAS-NEXT: Truncated display due to cycle limit

# ALL: Average Wait times (based on the timeline view):
# ALL-NEXT: [0]: Executions
# ALL-NEXT: [1]: Average time spent waiting in a scheduler's queue
# ALL-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# ALL-NEXT: [3]: Average time elapsed from WB until retire stage

# ALL: [0] [1] [2] [3]
# ALL-NEXT: 0. 1 1.0 1.0 0.0 addq $44, 64(%r14)

# NOALIAS-NEXT: 1. 1 1.0 0.0 0.0 addq $44, 128(%r14)
# NOALIAS-NEXT: 2. 1 1.0 1.0 0.0 addq $44, 192(%r14)
# NOALIAS-NEXT: 3. 1 2.0 1.0 0.0 addq $44, 256(%r14)
# NOALIAS-NEXT: 4. 1 1.0 0.0 0.0 addq $44, 320(%r14)
# NOALIAS-NEXT: 5. 1 2.0 1.0 0.0 addq $44, 384(%r14)
# NOALIAS-NEXT: 6. 1 2.0 1.0 0.0 addq $44, 448(%r14)
# NOALIAS-NEXT: 7. 1 2.0 0.0 0.0 addq $44, 512(%r14)
# NOALIAS-NEXT: 8. 1 2.0 1.0 0.0 addq $44, 576(%r14)
# NOALIAS-NEXT: 9. 1 3.0 1.0 0.0 addq $44, 640(%r14)
# NOALIAS-NEXT: 1 1.7 0.7 0.0 <total>

# YESALIAS-NEXT: 1. 1 13.0 0.0 0.0 addq $44, 128(%r14)
# YESALIAS-NEXT: 2. 1 24.0 0.0 0.0 addq $44, 192(%r14)
# YESALIAS-NEXT: 3. 1 36.0 0.0 0.0 addq $44, 256(%r14)
# YESALIAS-NEXT: 4. 1 47.0 0.0 0.0 addq $44, 320(%r14)
# YESALIAS-NEXT: 5. 1 59.0 0.0 0.0 addq $44, 384(%r14)
# YESALIAS-NEXT: 6. 1 70.0 0.0 0.0 addq $44, 448(%r14)
# YESALIAS-NEXT: 7. 1 82.0 0.0 0.0 addq $44, 512(%r14)
# YESALIAS-NEXT: 8. 1 93.0 0.0 0.0 addq $44, 576(%r14)
# YESALIAS-NEXT: 9. 1 105.0 0.0 0.0 addq $44, 640(%r14)
# YESALIAS-NEXT: 1 53.0 0.1 0.0 <total>
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=lunarlake -all-views=false -summary-view < %s | FileCheck %s

# Issue #57548

# Do not crash when simulating instructions that consume partially overlapping
# resource groups.

vpsllw %xmm1, %ymm0, %ymm0
vpsllw %xmm1, %xmm2, %xmm1
vpand %ymm1, %ymm0, %ymm0

# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 300
# CHECK-NEXT: Total Cycles: 503
# CHECK-NEXT: Total uOps: 500

# CHECK: Dispatch Width: 8
# CHECK-NEXT: uOps Per Cycle: 0.99
# CHECK-NEXT: IPC: 0.60
# CHECK-NEXT: Block RThroughput: 1.0
69 changes: 69 additions & 0 deletions llvm/test/tools/llvm-mca/X86/LunarlakeP/resources-aes.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=lunarlake -instruction-tables < %s | FileCheck %s

aesdec %xmm0, %xmm2
aesdec (%rax), %xmm2

aesdeclast %xmm0, %xmm2
aesdeclast (%rax), %xmm2

aesenc %xmm0, %xmm2
aesenc (%rax), %xmm2

aesenclast %xmm0, %xmm2
aesenclast (%rax), %xmm2

# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
# CHECK-NEXT: [2]: Latency
# CHECK-NEXT: [3]: RThroughput
# CHECK-NEXT: [4]: MayLoad
# CHECK-NEXT: [5]: MayStore
# CHECK-NEXT: [6]: HasSideEffects (U)

# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 1 3 2.00 aesdec %xmm0, %xmm2
# CHECK-NEXT: 2 11 2.33 * aesdec (%rax), %xmm2
# CHECK-NEXT: 1 3 2.00 aesdeclast %xmm0, %xmm2
# CHECK-NEXT: 2 11 2.33 * aesdeclast (%rax), %xmm2
# CHECK-NEXT: 1 3 2.00 aesenc %xmm0, %xmm2
# CHECK-NEXT: 2 11 2.33 * aesenc (%rax), %xmm2
# CHECK-NEXT: 1 3 2.00 aesenclast %xmm0, %xmm2
# CHECK-NEXT: 2 11 2.33 * aesenclast (%rax), %xmm2

# CHECK: Resources:
# CHECK-NEXT: [0] - ADLPPort00
# CHECK-NEXT: [1] - LNLPPort00
# CHECK-NEXT: [2] - LNLPPort01
# CHECK-NEXT: [3] - LNLPPort02
# CHECK-NEXT: [4] - LNLPPort03
# CHECK-NEXT: [5] - LNLPPort04
# CHECK-NEXT: [6] - LNLPPort05
# CHECK-NEXT: [7] - LNLPPort10
# CHECK-NEXT: [8] - LNLPPort11
# CHECK-NEXT: [9] - LNLPPort20
# CHECK-NEXT: [10] - LNLPPort21
# CHECK-NEXT: [11] - LNLPPort22
# CHECK-NEXT: [12] - LNLPPort25
# CHECK-NEXT: [13] - LNLPPort26
# CHECK-NEXT: [14] - LNLPPort27
# CHECK-NEXT: [15] - LNLPPortInvalid
# CHECK-NEXT: [16] - LNLPVPort00
# CHECK-NEXT: [17] - LNLPVPort01
# CHECK-NEXT: [18] - LNLPVPort02
# CHECK-NEXT: [19] - LNLPVPort03

# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] [14] [15] [16] [17] [18] [19]
# CHECK-NEXT: - - - - - - - - - 9.33 9.33 9.33 - - - - 16.00 16.00 - -

# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] [14] [15] [16] [17] [18] [19] Instructions:
# CHECK-NEXT: - - - - - - - - - - - - - - - - 2.00 2.00 - - aesdec %xmm0, %xmm2
# CHECK-NEXT: - - - - - - - - - 2.33 2.33 2.33 - - - - 2.00 2.00 - - aesdec (%rax), %xmm2
# CHECK-NEXT: - - - - - - - - - - - - - - - - 2.00 2.00 - - aesdeclast %xmm0, %xmm2
# CHECK-NEXT: - - - - - - - - - 2.33 2.33 2.33 - - - - 2.00 2.00 - - aesdeclast (%rax), %xmm2
# CHECK-NEXT: - - - - - - - - - - - - - - - - 2.00 2.00 - - aesenc %xmm0, %xmm2
# CHECK-NEXT: - - - - - - - - - 2.33 2.33 2.33 - - - - 2.00 2.00 - - aesenc (%rax), %xmm2
# CHECK-NEXT: - - - - - - - - - - - - - - - - 2.00 2.00 - - aesenclast %xmm0, %xmm2
# CHECK-NEXT: - - - - - - - - - 2.33 2.33 2.33 - - - - 2.00 2.00 - - aesenclast (%rax), %xmm2
Loading