Skip to content

Commit 89365b1

Browse files
committed
[X86] IceLakeServer - PACKS instructions take latency 3cy
This appears to be a slow down vs Skylake (which the model was copied off) - confirmed with uops.info / instlatx64 Noticed as D138359 was reporting that many of the PACKS overrides were redundant, but were in fact incorrect
1 parent 4346318 commit 89365b1

File tree

7 files changed

+99
-99
lines changed

7 files changed

+99
-99
lines changed

llvm/lib/Target/X86/X86SchedIceLake.td

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -672,7 +672,6 @@ def: InstRW<[ICXWriteResGroup3], (instregex "COM(P?)_FST0r",
672672
"(V?)PALIGNR(Y|Z128|Z256)?rri",
673673
"(V?)PERMIL(PD|PS)(Y|Z128|Z256)?ri",
674674
"(V?)PERMIL(PD|PS)(Y|Z128|Z256)?rr",
675-
"(V?)PACK(U|S)S(DW|WB)(Y|Z|Z128|Z256)?rr",
676675
"(V?)UNPCK(L|H)(PD|PS)(Y|Z128|Z256)?rr")>;
677676

678677
def ICXWriteResGroup4 : SchedWriteRes<[ICXPort6]> {
@@ -848,6 +847,7 @@ def: InstRW<[ICXWriteResGroup32], (instregex "(ADD|SUB|SUBR)_(FPrST0|FST0r|FrST0
848847
"VALIGND(Z|Z128|Z256)rri",
849848
"VALIGNQ(Z|Z128|Z256)rri",
850849
"VPBROADCAST(B|W)rr",
850+
"(V?)PACK(U|S)S(DW|WB)(Y|Z|Z128|Z256)?rr",
851851
"VP(MAX|MIN)(S|U)Q(Z|Z128|Z256)rr")>;
852852

853853
def ICXWriteResGroup33 : SchedWriteRes<[ICXPort5]> {
@@ -1303,7 +1303,6 @@ def: InstRW<[ICXWriteResGroup92], (instregex "VMOV(SD|SS)Zrm(b?)",
13031303
"(V?)PALIGNR(Z128)?rmi",
13041304
"(V?)PERMIL(PD|PS)(Z128)?m(b?)i",
13051305
"(V?)PERMIL(PD|PS)(Z128)?rm",
1306-
"(V?)PACK(U|S)S(DW|WB)(Z128)?rm",
13071306
"(V?)UNPCK(L|H)(PD|PS)(Z128)?rm")>;
13081307

13091308
def ICXWriteResGroup93 : SchedWriteRes<[ICXPort5,ICXPort01]> {
@@ -1542,7 +1541,6 @@ def: InstRW<[ICXWriteResGroup119], (instregex "FCOM(P?)(32|64)m",
15421541
"(V?)PALIGNR(Y|Z256)rmi",
15431542
"(V?)PERMIL(PD|PS)(Y|Z256)m(b?)i",
15441543
"(V?)PERMIL(PD|PS)(Y|Z256)rm",
1545-
"(V?)PACK(U|S)S(DW|WB)(Y|Z|Z256)rm",
15461544
"(V?)UNPCK(L|H)(PD|PS)(Y|Z256)rm")>;
15471545
def: InstRW<[ICXWriteResGroup119], (instrs VPBROADCASTBYrm,
15481546
VPBROADCASTWYrm,
@@ -1724,6 +1722,7 @@ def: InstRW<[ICXWriteResGroup136_2], (instregex "VCMP(PD|PS)Z128rm(b?)i",
17241722
"VPCMPQZ128rmi(b?)",
17251723
"VPCMPU(B|D|Q|W)Z128rmi(b?)",
17261724
"VPCMPWZ128rmi(b?)",
1725+
"(V?)PACK(U|S)S(DW|WB)(Z128)?rm",
17271726
"VPTESTMBZ128rm(b?)",
17281727
"VPTESTMDZ128rm(b?)",
17291728
"VPTESTMQZ128rm(b?)",
@@ -1795,6 +1794,7 @@ def: InstRW<[ICXWriteResGroup148_2], (instregex "VCMPPD(Z|Z256)rm(b?)i",
17951794
"VPCMPU(B|D|Q|W)Z256rmi(b?)",
17961795
"VPCMPU(B|D|Q|W)Zrmi(b?)",
17971796
"VPCMPW(Z|Z256)rmi(b?)",
1797+
"(V?)PACK(U|S)S(DW|WB)(Y|Z|Z256)rm",
17981798
"VPTESTM(B|D|Q|W)Z256rm(b?)",
17991799
"VPTESTM(B|D|Q|W)Zrm(b?)",
18001800
"VPTESTNM(B|D|Q|W)Z256rm(b?)",

llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx1.s

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1365,14 +1365,14 @@ vzeroupper
13651365
# CHECK-NEXT: 2 7 0.50 * vpabsd (%rax), %xmm2
13661366
# CHECK-NEXT: 1 1 0.50 vpabsw %xmm0, %xmm2
13671367
# CHECK-NEXT: 2 7 0.50 * vpabsw (%rax), %xmm2
1368-
# CHECK-NEXT: 1 1 1.00 vpackssdw %xmm0, %xmm1, %xmm2
1369-
# CHECK-NEXT: 2 7 1.00 * vpackssdw (%rax), %xmm1, %xmm2
1370-
# CHECK-NEXT: 1 1 1.00 vpacksswb %xmm0, %xmm1, %xmm2
1371-
# CHECK-NEXT: 2 7 1.00 * vpacksswb (%rax), %xmm1, %xmm2
1372-
# CHECK-NEXT: 1 1 1.00 vpackusdw %xmm0, %xmm1, %xmm2
1373-
# CHECK-NEXT: 2 7 1.00 * vpackusdw (%rax), %xmm1, %xmm2
1374-
# CHECK-NEXT: 1 1 1.00 vpackuswb %xmm0, %xmm1, %xmm2
1375-
# CHECK-NEXT: 2 7 1.00 * vpackuswb (%rax), %xmm1, %xmm2
1368+
# CHECK-NEXT: 1 3 1.00 vpackssdw %xmm0, %xmm1, %xmm2
1369+
# CHECK-NEXT: 2 10 1.00 * vpackssdw (%rax), %xmm1, %xmm2
1370+
# CHECK-NEXT: 1 3 1.00 vpacksswb %xmm0, %xmm1, %xmm2
1371+
# CHECK-NEXT: 2 10 1.00 * vpacksswb (%rax), %xmm1, %xmm2
1372+
# CHECK-NEXT: 1 3 1.00 vpackusdw %xmm0, %xmm1, %xmm2
1373+
# CHECK-NEXT: 2 10 1.00 * vpackusdw (%rax), %xmm1, %xmm2
1374+
# CHECK-NEXT: 1 3 1.00 vpackuswb %xmm0, %xmm1, %xmm2
1375+
# CHECK-NEXT: 2 10 1.00 * vpackuswb (%rax), %xmm1, %xmm2
13761376
# CHECK-NEXT: 1 1 0.33 vpaddb %xmm0, %xmm1, %xmm2
13771377
# CHECK-NEXT: 2 7 0.50 * vpaddb (%rax), %xmm1, %xmm2
13781378
# CHECK-NEXT: 1 1 0.33 vpaddd %xmm0, %xmm1, %xmm2

llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx2.s

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -484,14 +484,14 @@ vpxor (%rax), %ymm1, %ymm2
484484
# CHECK-NEXT: 2 8 0.50 * vpabsd (%rax), %ymm2
485485
# CHECK-NEXT: 1 1 0.50 vpabsw %ymm0, %ymm2
486486
# CHECK-NEXT: 2 8 0.50 * vpabsw (%rax), %ymm2
487-
# CHECK-NEXT: 1 1 1.00 vpackssdw %ymm0, %ymm1, %ymm2
488-
# CHECK-NEXT: 2 8 1.00 * vpackssdw (%rax), %ymm1, %ymm2
489-
# CHECK-NEXT: 1 1 1.00 vpacksswb %ymm0, %ymm1, %ymm2
490-
# CHECK-NEXT: 2 8 1.00 * vpacksswb (%rax), %ymm1, %ymm2
491-
# CHECK-NEXT: 1 1 1.00 vpackusdw %ymm0, %ymm1, %ymm2
492-
# CHECK-NEXT: 2 8 1.00 * vpackusdw (%rax), %ymm1, %ymm2
493-
# CHECK-NEXT: 1 1 1.00 vpackuswb %ymm0, %ymm1, %ymm2
494-
# CHECK-NEXT: 2 8 1.00 * vpackuswb (%rax), %ymm1, %ymm2
487+
# CHECK-NEXT: 1 3 1.00 vpackssdw %ymm0, %ymm1, %ymm2
488+
# CHECK-NEXT: 2 11 1.00 * vpackssdw (%rax), %ymm1, %ymm2
489+
# CHECK-NEXT: 1 3 1.00 vpacksswb %ymm0, %ymm1, %ymm2
490+
# CHECK-NEXT: 2 11 1.00 * vpacksswb (%rax), %ymm1, %ymm2
491+
# CHECK-NEXT: 1 3 1.00 vpackusdw %ymm0, %ymm1, %ymm2
492+
# CHECK-NEXT: 2 11 1.00 * vpackusdw (%rax), %ymm1, %ymm2
493+
# CHECK-NEXT: 1 3 1.00 vpackuswb %ymm0, %ymm1, %ymm2
494+
# CHECK-NEXT: 2 11 1.00 * vpackuswb (%rax), %ymm1, %ymm2
495495
# CHECK-NEXT: 1 1 0.33 vpaddb %ymm0, %ymm1, %ymm2
496496
# CHECK-NEXT: 2 8 0.50 * vpaddb (%rax), %ymm1, %ymm2
497497
# CHECK-NEXT: 1 1 0.33 vpaddd %ymm0, %ymm1, %ymm2

llvm/test/tools/llvm-mca/X86/IceLakeServer/resources-avx512bw.s

Lines changed: 24 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -679,30 +679,30 @@ vpunpcklwd (%rax), %zmm17, %zmm19 {z}{k1}
679679
# CHECK-NEXT: 2 8 1.00 * vpabsw (%rax), %zmm19 {%k1}
680680
# CHECK-NEXT: 1 1 1.00 vpabsw %zmm16, %zmm19 {%k1} {z}
681681
# CHECK-NEXT: 2 8 1.00 * vpabsw (%rax), %zmm19 {%k1} {z}
682-
# CHECK-NEXT: 1 1 1.00 vpackssdw %zmm16, %zmm17, %zmm19
683-
# CHECK-NEXT: 2 8 1.00 * vpackssdw (%rax), %zmm17, %zmm19
684-
# CHECK-NEXT: 1 1 1.00 vpackssdw %zmm16, %zmm17, %zmm19 {%k1}
685-
# CHECK-NEXT: 2 8 1.00 * vpackssdw (%rax), %zmm17, %zmm19 {%k1}
686-
# CHECK-NEXT: 1 1 1.00 vpackssdw %zmm16, %zmm17, %zmm19 {%k1} {z}
687-
# CHECK-NEXT: 2 8 1.00 * vpackssdw (%rax), %zmm17, %zmm19 {%k1} {z}
688-
# CHECK-NEXT: 1 1 1.00 vpacksswb %zmm16, %zmm17, %zmm19
689-
# CHECK-NEXT: 2 8 1.00 * vpacksswb (%rax), %zmm17, %zmm19
690-
# CHECK-NEXT: 1 1 1.00 vpacksswb %zmm16, %zmm17, %zmm19 {%k1}
691-
# CHECK-NEXT: 2 8 1.00 * vpacksswb (%rax), %zmm17, %zmm19 {%k1}
692-
# CHECK-NEXT: 1 1 1.00 vpacksswb %zmm16, %zmm17, %zmm19 {%k1} {z}
693-
# CHECK-NEXT: 2 8 1.00 * vpacksswb (%rax), %zmm17, %zmm19 {%k1} {z}
694-
# CHECK-NEXT: 1 1 1.00 vpackusdw %zmm16, %zmm17, %zmm19
695-
# CHECK-NEXT: 2 8 1.00 * vpackusdw (%rax), %zmm17, %zmm19
696-
# CHECK-NEXT: 1 1 1.00 vpackusdw %zmm16, %zmm17, %zmm19 {%k1}
697-
# CHECK-NEXT: 2 8 1.00 * vpackusdw (%rax), %zmm17, %zmm19 {%k1}
698-
# CHECK-NEXT: 1 1 1.00 vpackusdw %zmm16, %zmm17, %zmm19 {%k1} {z}
699-
# CHECK-NEXT: 2 8 1.00 * vpackusdw (%rax), %zmm17, %zmm19 {%k1} {z}
700-
# CHECK-NEXT: 1 1 1.00 vpackuswb %zmm16, %zmm17, %zmm19
701-
# CHECK-NEXT: 2 8 1.00 * vpackuswb (%rax), %zmm17, %zmm19
702-
# CHECK-NEXT: 1 1 1.00 vpackuswb %zmm16, %zmm17, %zmm19 {%k1}
703-
# CHECK-NEXT: 2 8 1.00 * vpackuswb (%rax), %zmm17, %zmm19 {%k1}
704-
# CHECK-NEXT: 1 1 1.00 vpackuswb %zmm16, %zmm17, %zmm19 {%k1} {z}
705-
# CHECK-NEXT: 2 8 1.00 * vpackuswb (%rax), %zmm17, %zmm19 {%k1} {z}
682+
# CHECK-NEXT: 1 3 1.00 vpackssdw %zmm16, %zmm17, %zmm19
683+
# CHECK-NEXT: 2 11 1.00 * vpackssdw (%rax), %zmm17, %zmm19
684+
# CHECK-NEXT: 1 3 1.00 vpackssdw %zmm16, %zmm17, %zmm19 {%k1}
685+
# CHECK-NEXT: 2 11 1.00 * vpackssdw (%rax), %zmm17, %zmm19 {%k1}
686+
# CHECK-NEXT: 1 3 1.00 vpackssdw %zmm16, %zmm17, %zmm19 {%k1} {z}
687+
# CHECK-NEXT: 2 11 1.00 * vpackssdw (%rax), %zmm17, %zmm19 {%k1} {z}
688+
# CHECK-NEXT: 1 3 1.00 vpacksswb %zmm16, %zmm17, %zmm19
689+
# CHECK-NEXT: 2 11 1.00 * vpacksswb (%rax), %zmm17, %zmm19
690+
# CHECK-NEXT: 1 3 1.00 vpacksswb %zmm16, %zmm17, %zmm19 {%k1}
691+
# CHECK-NEXT: 2 11 1.00 * vpacksswb (%rax), %zmm17, %zmm19 {%k1}
692+
# CHECK-NEXT: 1 3 1.00 vpacksswb %zmm16, %zmm17, %zmm19 {%k1} {z}
693+
# CHECK-NEXT: 2 11 1.00 * vpacksswb (%rax), %zmm17, %zmm19 {%k1} {z}
694+
# CHECK-NEXT: 1 3 1.00 vpackusdw %zmm16, %zmm17, %zmm19
695+
# CHECK-NEXT: 2 11 1.00 * vpackusdw (%rax), %zmm17, %zmm19
696+
# CHECK-NEXT: 1 3 1.00 vpackusdw %zmm16, %zmm17, %zmm19 {%k1}
697+
# CHECK-NEXT: 2 11 1.00 * vpackusdw (%rax), %zmm17, %zmm19 {%k1}
698+
# CHECK-NEXT: 1 3 1.00 vpackusdw %zmm16, %zmm17, %zmm19 {%k1} {z}
699+
# CHECK-NEXT: 2 11 1.00 * vpackusdw (%rax), %zmm17, %zmm19 {%k1} {z}
700+
# CHECK-NEXT: 1 3 1.00 vpackuswb %zmm16, %zmm17, %zmm19
701+
# CHECK-NEXT: 2 11 1.00 * vpackuswb (%rax), %zmm17, %zmm19
702+
# CHECK-NEXT: 1 3 1.00 vpackuswb %zmm16, %zmm17, %zmm19 {%k1}
703+
# CHECK-NEXT: 2 11 1.00 * vpackuswb (%rax), %zmm17, %zmm19 {%k1}
704+
# CHECK-NEXT: 1 3 1.00 vpackuswb %zmm16, %zmm17, %zmm19 {%k1} {z}
705+
# CHECK-NEXT: 2 11 1.00 * vpackuswb (%rax), %zmm17, %zmm19 {%k1} {z}
706706
# CHECK-NEXT: 1 1 0.33 vpaddb %zmm16, %zmm17, %zmm19
707707
# CHECK-NEXT: 2 8 0.50 * vpaddb (%rax), %zmm17, %zmm19
708708
# CHECK-NEXT: 1 1 0.33 vpaddb %zmm16, %zmm17, %zmm19 {%k1}

0 commit comments

Comments
 (0)