-
Notifications
You must be signed in to change notification settings - Fork 15.4k
[X86] Ensure models use vector load latency for vector loads #157631
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Conversation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Noticed while addressing llvm#146564 - some of the znver3/4 overrides for vector ops were using the scalar load latencies by mistake
Member
|
@llvm/pr-subscribers-backend-x86 Author: Simon Pilgrim (RKSimon) ChangesNoticed while addressing #146564 - some of the znver3/4 overrides for vector ops were using the scalar load latencies by mistake Patch is 23.73 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/157631.diff 8 Files Affected:
diff --git a/llvm/lib/Target/X86/X86ScheduleZnver3.td b/llvm/lib/Target/X86/X86ScheduleZnver3.td
index 9e271c1ee3709..044b77f7aacf4 100644
--- a/llvm/lib/Target/X86/X86ScheduleZnver3.td
+++ b/llvm/lib/Target/X86/X86ScheduleZnver3.td
@@ -992,14 +992,14 @@ def Zn3WriteVEXTRACTF128rr_VEXTRACTI128rr : SchedWriteRes<[Zn3FPFMisc0]> {
def : InstRW<[Zn3WriteVEXTRACTF128rr_VEXTRACTI128rr], (instrs VEXTRACTF128rri, VEXTRACTI128rri)>;
def Zn3WriteVEXTRACTI128mr : SchedWriteRes<[Zn3FPFMisc0, Zn3FPSt, Zn3Store]> {
- let Latency = !add(Znver3Model.LoadLatency, Zn3WriteVEXTRACTF128rr_VEXTRACTI128rr.Latency);
+ let Latency = !add(Znver3Model.VecLoadLatency, Zn3WriteVEXTRACTF128rr_VEXTRACTI128rr.Latency);
let ReleaseAtCycles = [1, 1, 1];
let NumMicroOps = !add(Zn3WriteVEXTRACTF128rr_VEXTRACTI128rr.NumMicroOps, 1);
}
def : InstRW<[Zn3WriteVEXTRACTI128mr], (instrs VEXTRACTI128mri, VEXTRACTF128mri)>;
def Zn3WriteVINSERTF128rmr : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3FPFMisc0]> {
- let Latency = !add(Znver3Model.LoadLatency, Zn3WriteVEXTRACTF128rr_VEXTRACTI128rr.Latency);
+ let Latency = !add(Znver3Model.VecLoadLatency, Zn3WriteVEXTRACTF128rr_VEXTRACTI128rr.Latency);
let ReleaseAtCycles = [1, 1, 1];
let NumMicroOps = !add(Zn3WriteVEXTRACTF128rr_VEXTRACTI128rr.NumMicroOps, 0);
}
@@ -1221,7 +1221,7 @@ def Zn3WriteSHA1MSG1rr : SchedWriteRes<[Zn3FPU0123]> {
def : InstRW<[Zn3WriteSHA1MSG1rr], (instrs SHA1MSG1rr)>;
def Zn3WriteSHA1MSG1rm : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3FPU0123]> {
- let Latency = !add(Znver3Model.LoadLatency, Zn3WriteSHA1MSG1rr.Latency);
+ let Latency = !add(Znver3Model.VecLoadLatency, Zn3WriteSHA1MSG1rr.Latency);
let ReleaseAtCycles = [1, 1, 2];
let NumMicroOps = !add(Zn3WriteSHA1MSG1rr.NumMicroOps, 0);
}
@@ -1235,7 +1235,7 @@ def Zn3WriteSHA1MSG2rr_SHA1NEXTErr : SchedWriteRes<[Zn3FPU0123]> {
def : InstRW<[Zn3WriteSHA1MSG2rr_SHA1NEXTErr], (instrs SHA1MSG2rr, SHA1NEXTErr)>;
def Zn3Writerm_SHA1MSG2rm_SHA1NEXTErm : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3FPU0123]> {
- let Latency = !add(Znver3Model.LoadLatency, Zn3WriteSHA1MSG2rr_SHA1NEXTErr.Latency);
+ let Latency = !add(Znver3Model.VecLoadLatency, Zn3WriteSHA1MSG2rr_SHA1NEXTErr.Latency);
let ReleaseAtCycles = [1, 1, 2];
let NumMicroOps = !add(Zn3WriteSHA1MSG2rr_SHA1NEXTErr.NumMicroOps, 0);
}
@@ -1249,7 +1249,7 @@ def Zn3WriteSHA256MSG1rr : SchedWriteRes<[Zn3FPU0123]> {
def : InstRW<[Zn3WriteSHA256MSG1rr], (instrs SHA256MSG1rr)>;
def Zn3Writerm_SHA256MSG1rm : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3FPU0123]> {
- let Latency = !add(Znver3Model.LoadLatency, Zn3WriteSHA256MSG1rr.Latency);
+ let Latency = !add(Znver3Model.VecLoadLatency, Zn3WriteSHA256MSG1rr.Latency);
let ReleaseAtCycles = [1, 1, 3];
let NumMicroOps = !add(Zn3WriteSHA256MSG1rr.NumMicroOps, 0);
}
@@ -1263,7 +1263,7 @@ def Zn3WriteSHA256MSG2rr : SchedWriteRes<[Zn3FPU0123]> {
def : InstRW<[Zn3WriteSHA256MSG2rr], (instrs SHA256MSG2rr)>;
def Zn3WriteSHA256MSG2rm : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3FPU0123]> {
- let Latency = !add(Znver3Model.LoadLatency, Zn3WriteSHA256MSG2rr.Latency);
+ let Latency = !add(Znver3Model.VecLoadLatency, Zn3WriteSHA256MSG2rr.Latency);
let ReleaseAtCycles = [1, 1, 8];
let NumMicroOps = !add(Zn3WriteSHA256MSG2rr.NumMicroOps, 1);
}
@@ -1338,14 +1338,14 @@ def Zn3WriteVPERM2I128rr_VPERM2F128rr : SchedWriteRes<[Zn3FPVShuf]> {
def : InstRW<[Zn3WriteVPERM2I128rr_VPERM2F128rr], (instrs VPERM2I128rri, VPERM2F128rri)>;
def Zn3WriteVPERM2F128rm : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3FPVShuf]> {
- let Latency = !add(Znver3Model.LoadLatency, Zn3WriteVPERM2I128rr_VPERM2F128rr.Latency);
+ let Latency = !add(Znver3Model.VecLoadLatency, Zn3WriteVPERM2I128rr_VPERM2F128rr.Latency);
let ReleaseAtCycles = [1, 1, 1];
let NumMicroOps = !add(Zn3WriteVPERM2I128rr_VPERM2F128rr.NumMicroOps, 0);
}
def : InstRW<[Zn3WriteVPERM2F128rm], (instrs VPERM2F128rmi)>;
def Zn3WriteVPERMPSYrm : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3FPVShuf]> {
- let Latency = !add(Znver3Model.LoadLatency, 7);
+ let Latency = !add(Znver3Model.VecLoadLatency, 7);
let ReleaseAtCycles = [1, 1, 2];
let NumMicroOps = 3;
}
@@ -1359,14 +1359,14 @@ def Zn3WriteVPERMYri : SchedWriteRes<[Zn3FPVShuf]> {
def : InstRW<[Zn3WriteVPERMYri], (instrs VPERMPDYri, VPERMQYri)>;
def Zn3WriteVPERMPDYmi : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3FPVShuf]> {
- let Latency = !add(Znver3Model.LoadLatency, Zn3WriteVPERMYri.Latency);
+ let Latency = !add(Znver3Model.VecLoadLatency, Zn3WriteVPERMYri.Latency);
let ReleaseAtCycles = [1, 1, 2];
let NumMicroOps = !add(Zn3WriteVPERMYri.NumMicroOps, 1);
}
def : InstRW<[Zn3WriteVPERMPDYmi], (instrs VPERMPDYmi)>;
def Zn3WriteVPERMDYm : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3FPVShuf]> {
- let Latency = !add(Znver3Model.LoadLatency, 5);
+ let Latency = !add(Znver3Model.VecLoadLatency, 5);
let ReleaseAtCycles = [1, 1, 2];
let NumMicroOps = 2;
}
diff --git a/llvm/lib/Target/X86/X86ScheduleZnver4.td b/llvm/lib/Target/X86/X86ScheduleZnver4.td
index 74d916d41f831..f4b8f8927b1b5 100644
--- a/llvm/lib/Target/X86/X86ScheduleZnver4.td
+++ b/llvm/lib/Target/X86/X86ScheduleZnver4.td
@@ -1005,14 +1005,14 @@ def Zn4WriteVEXTRACTF128rr_VEXTRACTI128rr : SchedWriteRes<[Zn4FPFMisc0]> {
def : InstRW<[Zn4WriteVEXTRACTF128rr_VEXTRACTI128rr], (instrs VEXTRACTF128rri, VEXTRACTI128rri)>;
def Zn4WriteVEXTRACTI128mr : SchedWriteRes<[Zn4FPFMisc0, Zn4FPSt, Zn4Store]> {
- let Latency = !add(Znver4Model.LoadLatency, Zn4WriteVEXTRACTF128rr_VEXTRACTI128rr.Latency);
+ let Latency = !add(Znver4Model.VecLoadLatency, Zn4WriteVEXTRACTF128rr_VEXTRACTI128rr.Latency);
let ReleaseAtCycles = [1, 1, 1];
let NumMicroOps = !add(Zn4WriteVEXTRACTF128rr_VEXTRACTI128rr.NumMicroOps, 1);
}
def : InstRW<[Zn4WriteVEXTRACTI128mr], (instrs VEXTRACTI128mri, VEXTRACTF128mri)>;
def Zn4WriteVINSERTF128rmr : SchedWriteRes<[Zn4AGU012, Zn4Load, Zn4FPFMisc0]> {
- let Latency = !add(Znver4Model.LoadLatency, Zn4WriteVEXTRACTF128rr_VEXTRACTI128rr.Latency);
+ let Latency = !add(Znver4Model.VecLoadLatency, Zn4WriteVEXTRACTF128rr_VEXTRACTI128rr.Latency);
let ReleaseAtCycles = [1, 1, 1];
let NumMicroOps = !add(Zn4WriteVEXTRACTF128rr_VEXTRACTI128rr.NumMicroOps, 0);
}
@@ -1262,7 +1262,7 @@ def Zn4WriteSHA1MSG1rr : SchedWriteRes<[Zn4FPU0123]> {
def : InstRW<[Zn4WriteSHA1MSG1rr], (instrs SHA1MSG1rr)>;
def Zn4WriteSHA1MSG1rm : SchedWriteRes<[Zn4AGU012, Zn4Load, Zn4FPU0123]> {
- let Latency = !add(Znver4Model.LoadLatency, Zn4WriteSHA1MSG1rr.Latency);
+ let Latency = !add(Znver4Model.VecLoadLatency, Zn4WriteSHA1MSG1rr.Latency);
let ReleaseAtCycles = [1, 1, 2];
let NumMicroOps = !add(Zn4WriteSHA1MSG1rr.NumMicroOps, 0);
}
@@ -1276,7 +1276,7 @@ def Zn4WriteSHA1MSG2rr_SHA1NEXTErr : SchedWriteRes<[Zn4FPU0123]> {
def : InstRW<[Zn4WriteSHA1MSG2rr_SHA1NEXTErr], (instrs SHA1MSG2rr, SHA1NEXTErr)>;
def Zn4Writerm_SHA1MSG2rm_SHA1NEXTErm : SchedWriteRes<[Zn4AGU012, Zn4Load, Zn4FPU0123]> {
- let Latency = !add(Znver4Model.LoadLatency, Zn4WriteSHA1MSG2rr_SHA1NEXTErr.Latency);
+ let Latency = !add(Znver4Model.VecLoadLatency, Zn4WriteSHA1MSG2rr_SHA1NEXTErr.Latency);
let ReleaseAtCycles = [1, 1, 2];
let NumMicroOps = !add(Zn4WriteSHA1MSG2rr_SHA1NEXTErr.NumMicroOps, 0);
}
@@ -1290,7 +1290,7 @@ def Zn4WriteSHA256MSG1rr : SchedWriteRes<[Zn4FPU0123]> {
def : InstRW<[Zn4WriteSHA256MSG1rr], (instrs SHA256MSG1rr)>;
def Zn4Writerm_SHA256MSG1rm : SchedWriteRes<[Zn4AGU012, Zn4Load, Zn4FPU0123]> {
- let Latency = !add(Znver4Model.LoadLatency, Zn4WriteSHA256MSG1rr.Latency);
+ let Latency = !add(Znver4Model.VecLoadLatency, Zn4WriteSHA256MSG1rr.Latency);
let ReleaseAtCycles = [1, 1, 3];
let NumMicroOps = !add(Zn4WriteSHA256MSG1rr.NumMicroOps, 0);
}
@@ -1304,7 +1304,7 @@ def Zn4WriteSHA256MSG2rr : SchedWriteRes<[Zn4FPU0123]> {
def : InstRW<[Zn4WriteSHA256MSG2rr], (instrs SHA256MSG2rr)>;
def Zn4WriteSHA256MSG2rm : SchedWriteRes<[Zn4AGU012, Zn4Load, Zn4FPU0123]> {
- let Latency = !add(Znver4Model.LoadLatency, Zn4WriteSHA256MSG2rr.Latency);
+ let Latency = !add(Znver4Model.VecLoadLatency, Zn4WriteSHA256MSG2rr.Latency);
let ReleaseAtCycles = [1, 1, 8];
let NumMicroOps = !add(Zn4WriteSHA256MSG2rr.NumMicroOps, 1);
}
@@ -1379,7 +1379,7 @@ def Zn4WriteVPERM2I128rr_VPERM2F128rr : SchedWriteRes<[Zn4FPVShuf]> {
def : InstRW<[Zn4WriteVPERM2I128rr_VPERM2F128rr], (instrs VPERM2I128rri, VPERM2F128rri)>;
def Zn4WriteVPERM2F128rm : SchedWriteRes<[Zn4AGU012, Zn4Load, Zn4FPVShuf]> {
- let Latency = !add(Znver4Model.LoadLatency, Zn4WriteVPERM2I128rr_VPERM2F128rr.Latency);
+ let Latency = !add(Znver4Model.VecLoadLatency, Zn4WriteVPERM2I128rr_VPERM2F128rr.Latency);
let ReleaseAtCycles = [1, 1, 1];
let NumMicroOps = !add(Zn4WriteVPERM2I128rr_VPERM2F128rr.NumMicroOps, 0);
}
@@ -1393,7 +1393,7 @@ def Zn4WriteVPERMPSYrr : SchedWriteRes<[Zn4FPVShuf]> {
def : InstRW<[Zn4WriteVPERMPSYrr], (instrs VPERMPSYrr)>;
def Zn4WriteVPERMPSYrm : SchedWriteRes<[Zn4AGU012, Zn4Load, Zn4FPVShuf]> {
- let Latency = !add(Znver4Model.LoadLatency, Zn4WriteVPERMPSYrr.Latency);
+ let Latency = !add(Znver4Model.VecLoadLatency, Zn4WriteVPERMPSYrr.Latency);
let ReleaseAtCycles = [1, 1, 2];
let NumMicroOps = !add(Zn4WriteVPERMPSYrr.NumMicroOps, 1);
}
@@ -1407,7 +1407,7 @@ def Zn4WriteVPERMYri : SchedWriteRes<[Zn4FPVShuf]> {
def : InstRW<[Zn4WriteVPERMYri], (instrs VPERMPDYri, VPERMQYri)>;
def Zn4WriteVPERMPDYmi : SchedWriteRes<[Zn4AGU012, Zn4Load, Zn4FPVShuf]> {
- let Latency = !add(Znver4Model.LoadLatency, Zn4WriteVPERMYri.Latency);
+ let Latency = !add(Znver4Model.VecLoadLatency, Zn4WriteVPERMYri.Latency);
let ReleaseAtCycles = [1, 1, 2];
let NumMicroOps = !add(Zn4WriteVPERMYri.NumMicroOps, 1);
}
@@ -1421,7 +1421,7 @@ def Zn4WriteVPERMDYrr : SchedWriteRes<[Zn4FPVShuf]> {
def : InstRW<[Zn4WriteVPERMDYrr], (instrs VPERMDYrr)>;
def Zn4WriteVPERMYm : SchedWriteRes<[Zn4AGU012, Zn4Load, Zn4FPVShuf]> {
- let Latency = !add(Znver4Model.LoadLatency, Zn4WriteVPERMDYrr.Latency);
+ let Latency = !add(Znver4Model.VecLoadLatency, Zn4WriteVPERMDYrr.Latency);
let ReleaseAtCycles = [1, 1, 2];
let NumMicroOps = !add(Zn4WriteVPERMDYrr.NumMicroOps, 0);
}
diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/Znver3/resources-avx1.s
index 4f0b4843d1704..0abf8ad61a4a0 100644
--- a/llvm/test/tools/llvm-mca/X86/Znver3/resources-avx1.s
+++ b/llvm/test/tools/llvm-mca/X86/Znver3/resources-avx1.s
@@ -1193,7 +1193,7 @@ vzeroupper
# CHECK-NEXT: 7 15 4.00 vdpps $22, %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 8 22 4.00 * vdpps $22, (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 4 1.00 vextractf128 $1, %ymm0, %xmm2
-# CHECK-NEXT: 2 8 1.00 * vextractf128 $1, %ymm0, (%rax)
+# CHECK-NEXT: 2 11 1.00 * vextractf128 $1, %ymm0, (%rax)
# CHECK-NEXT: 2 1 1.00 vextractps $1, %xmm0, %ecx
# CHECK-NEXT: 2 2 1.00 * vextractps $1, %xmm0, (%rax)
# CHECK-NEXT: 4 6 2.00 vhaddpd %xmm0, %xmm1, %xmm2
@@ -1213,7 +1213,7 @@ vzeroupper
# CHECK-NEXT: 3 6 2.00 vhsubps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 4 13 2.00 * vhsubps (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 2 1.00 vinsertf128 $1, %xmm0, %ymm1, %ymm2
-# CHECK-NEXT: 1 8 1.00 * vinsertf128 $1, (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 1 11 1.00 * vinsertf128 $1, (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 1 0.50 vinsertps $1, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 8 0.50 * vinsertps $1, (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 8 0.50 * vlddqu (%rax), %xmm2
@@ -1430,7 +1430,7 @@ vzeroupper
# CHECK-NEXT: 3 6 2.00 vpcmpistrm $1, %xmm0, %xmm2
# CHECK-NEXT: 4 13 2.00 * vpcmpistrm $1, (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 vperm2f128 $1, %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: 1 7 1.00 * vperm2f128 $1, (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 1 10 1.00 * vperm2f128 $1, (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 1 0.50 vpermilpd $1, %xmm0, %xmm2
# CHECK-NEXT: 1 8 0.50 * vpermilpd $1, (%rax), %xmm2
# CHECK-NEXT: 1 3 0.50 vpermilpd %xmm0, %xmm1, %xmm2
diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/resources-avx2.s b/llvm/test/tools/llvm-mca/X86/Znver3/resources-avx2.s
index 1a8b9e2de1d8e..bc504285a5814 100644
--- a/llvm/test/tools/llvm-mca/X86/Znver3/resources-avx2.s
+++ b/llvm/test/tools/llvm-mca/X86/Znver3/resources-avx2.s
@@ -464,7 +464,7 @@ vpxor (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 2 1.00 vbroadcastsd %xmm0, %ymm0
# CHECK-NEXT: 1 2 1.00 vbroadcastss %xmm0, %ymm0
# CHECK-NEXT: 1 4 1.00 vextracti128 $1, %ymm0, %xmm2
-# CHECK-NEXT: 2 8 1.00 * vextracti128 $1, %ymm0, (%rax)
+# CHECK-NEXT: 2 11 1.00 * vextracti128 $1, %ymm0, (%rax)
# CHECK-NEXT: 1 5 0.33 * vgatherdpd %xmm0, (%rax,%xmm1,2), %xmm2
# CHECK-NEXT: 1 5 0.33 * vgatherdpd %ymm0, (%rax,%xmm1,2), %ymm2
# CHECK-NEXT: 1 5 0.33 * vgatherdps %xmm0, (%rax,%xmm1,2), %xmm2
@@ -561,13 +561,13 @@ vpxor (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 3 1.00 vperm2i128 $1, %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 1 9 1.00 * vperm2i128 $1, (%rax), %ymm1, %ymm2
# CHECK-NEXT: 2 5 1.00 vpermd %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: 2 9 2.00 * vpermd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 2 12 2.00 * vpermd (%rax), %ymm1, %ymm2
# CHECK-NEXT: 2 6 1.00 vpermpd $1, %ymm0, %ymm2
-# CHECK-NEXT: 3 10 2.00 * vpermpd $1, (%rax), %ymm2
+# CHECK-NEXT: 3 13 2.00 * vpermpd $1, (%rax), %ymm2
# CHECK-NEXT: 2 7 1.00 vpermps %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: 3 11 2.00 * vpermps (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 3 14 2.00 * vpermps (%rax), %ymm1, %ymm2
# CHECK-NEXT: 2 6 1.00 vpermq $1, %ymm0, %ymm2
-# CHECK-NEXT: 2 9 2.00 * vpermq $1, (%rax), %ymm2
+# CHECK-NEXT: 2 12 2.00 * vpermq $1, (%rax), %ymm2
# CHECK-NEXT: 1 5 0.33 * vpgatherdd %xmm0, (%rax,%xmm1,2), %xmm2
# CHECK-NEXT: 1 5 0.33 * vpgatherdd %ymm0, (%rax,%ymm1,2), %ymm2
# CHECK-NEXT: 1 5 0.33 * vpgatherdq %xmm0, (%rax,%xmm1,2), %xmm2
diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/resources-sha.s b/llvm/test/tools/llvm-mca/X86/Znver3/resources-sha.s
index e6d5ab90a2acc..a9827788de39a 100644
--- a/llvm/test/tools/llvm-mca/X86/Znver3/resources-sha.s
+++ b/llvm/test/tools/llvm-mca/X86/Znver3/resources-sha.s
@@ -32,17 +32,17 @@ sha256rnds2 (%rax), %xmm2
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 2 2 0.50 sha1msg1 %xmm0, %xmm2
-# CHECK-NEXT: 2 6 0.50 * sha1msg1 (%rax), %xmm2
+# CHECK-NEXT: 2 9 0.50 * sha1msg1 (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 sha1msg2 %xmm0, %xmm2
-# CHECK-NEXT: 1 5 0.50 * sha1msg2 (%rax), %xmm2
+# CHECK-NEXT: 1 8 0.50 * sha1msg2 (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 sha1nexte %xmm0, %xmm2
-# CHECK-NEXT: 1 5 0.50 * sha1nexte (%rax), %xmm2
+# CHECK-NEXT: 1 8 0.50 * sha1nexte (%rax), %xmm2
# CHECK-NEXT: 1 6 2.00 sha1rnds4 $3, %xmm0, %xmm2
# CHECK-NEXT: 1 10 0.50 * sha1rnds4 $3, (%rax), %xmm2
# CHECK-NEXT: 2 2 0.75 sha256msg1 %xmm0, %xmm2
-# CHECK-NEXT: 2 6 0.75 * sha256msg1 (%rax), %xmm2
+# CHECK-NEXT: 2 9 0.75 * sha256msg1 (%rax), %xmm2
# CHECK-NEXT: 4 3 2.00 sha256msg2 %xmm0, %xmm2
-# CHECK-NEXT: 5 7 2.00 * sha256msg2 (%rax), %xmm2
+# CHECK-NEXT: 5 10 2.00 * sha256msg2 (%rax), %xmm2
# CHECK-NEXT: 1 4 2.00 sha256rnds2 %xmm0, %xmm0, %xmm2
# CHECK-NEXT: 1 10 0.50 * sha256rnds2 %xmm0, (%rax), %xmm2
diff --git a/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx1.s
index 1b2735a9cdde8..9b721c933ab51 100644
--- a/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx1.s
+++ b/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx1.s
@@ -1193,7 +1193,7 @@ vzeroupper
# CHECK-NEXT: 7 11 4.00 vdpps $22, %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 8 18 4.00 * vdpps $22, (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 4 1.00 vextractf128 $1, %ymm0, %xmm2
-# CHECK-NEXT: 2 8 1.00 * vextractf128 $1, %ymm0, (%rax)
+# CHECK-NEXT: 2 11 1.00 * vextractf128 $1, %ymm0, (%rax)
# CHECK-NEXT: 2 1 1.00 vextractps $1, %xmm0, %ecx
# CHECK-NEXT: 2 2 1.00 * vextractps $1, %xmm0, (%rax)
# CHECK-NEXT: 3 4 2.00 vhaddpd %xmm0, %xmm1, %xmm2
@@ -1213,7 +1213,7 @@ vzeroupper
# CHECK-NEXT: 3 4 2.00 vhsubps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 4 11 2.00 * vhsubps (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 2 1.00 vinsertf128 $1, %xmm0, %ymm1, %ymm2
-# CHECK-NEXT: 1 8 1.00 * vinsertf128 $1, (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 1 11 1.00 * vinsertf128 $1, (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 1 0.50 vinsertps $1, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 8 0.50 * vinsertps $1, (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 8 0.50 * vlddqu (%rax), %xmm2
@@ -1430,7 +1430,7 @@ vzeroupper
# CHECK-NEXT: 3 6 2.00 vpcmpistrm $1, %xmm0, %xmm2
# CHECK-NEXT: 4 13 2.00 * vpcmpistrm $1, (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 vperm2f128 $1, %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: 1 7 1.00 * vperm2f128 $1, (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 1 10 1.00 * vperm2f128 $1, (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 1 0.50 vpermilpd $1, %xmm0, %xmm2
# CHECK-NEXT: 1 8 0.50 * vpermilpd $1, (%rax), %xmm2
# CHECK-NEXT: 1 3 0.50 vpermilpd %xmm0, %xmm1, %xmm2
diff --git a/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx2.s b/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx2.s
index 0ad14688d0b7f..25e367c96e44b 100644
--- a/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx2.s
+++ b/llvm/test/tools/llvm-mca/X86/Znver4/resources-avx2.s
@@ -464,7 +464,7 @@ vpxor (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 2 1.00 vbroadcastsd %xmm0, %ymm0
# CHECK-NEXT: 1 2 1.00 vbroadcastss %xmm0, %ymm0
# CHECK-NEXT: 1 4 1.00 vextracti128 $1, %ymm0, %xmm2
-# CHECK-NEXT: 2 8 1.00 * vextracti128 $1, %ymm0, (%rax)
+# CHECK-NEXT: 2 11 1.00 * ...
[truncated]
|
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Noticed while addressing #146564 - some of the znver3/4 overrides for vector ops were using the scalar load latencies by mistake