Skip to content

Commit b0bc467

Browse files
committed
[X86] Fix bad instregex in VPMOVSX/ZX znver4 512-bit patterns.
The Z size was optional, meaning it matched with the 128-bit SSE instructions as well. Noticed while triaging the strange perf numbers on #110308
1 parent ed6749a commit b0bc467

File tree

3 files changed

+52
-52
lines changed

3 files changed

+52
-52
lines changed

llvm/lib/Target/X86/X86ScheduleZnver4.td

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1658,7 +1658,7 @@ def Zn4MOVSZ: SchedWriteRes<[Zn4FPFMisc12]> {
16581658
let NumMicroOps = 1;
16591659
}
16601660
def : InstRW<[Zn4MOVSZ], (instregex
1661-
"(V?)PMOV(SX|ZX)(BD|BQ|BW|WD|WQ|DQ)(Z?)(rr|rrk|rrkz)"
1661+
"(V?)PMOV(SX|ZX)(BD|BQ|BW|WD|WQ|DQ)Z(rr|rrk|rrkz)"
16621662
)>;
16631663

16641664
def Zn4MOVSrr: SchedWriteRes<[Zn4FPFMisc12]> {
@@ -1667,7 +1667,7 @@ def Zn4MOVSrr: SchedWriteRes<[Zn4FPFMisc12]> {
16671667
let NumMicroOps = 1;
16681668
}
16691669
def : InstRW<[Zn4MOVSrr], (instregex
1670-
"(V?)PMOV(DB|QB|QW|SDB|SQB|SQW|USDB|USQB|USQW)(Z?)(rr|rrk|rrkz)"
1670+
"(V?)PMOV(DB|QB|QW|SDB|SQB|SQW|USDB|USQB|USQW)Z(rr|rrk|rrkz)"
16711671
)>;
16721672

16731673

llvm/test/tools/llvm-mca/X86/Znver4/resources-avx1.s

Lines changed: 25 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1506,29 +1506,29 @@ vzeroupper
15061506
# CHECK-NEXT: 1 1 0.25 vpminuw %xmm0, %xmm1, %xmm2
15071507
# CHECK-NEXT: 1 8 0.50 * vpminuw (%rax), %xmm1, %xmm2
15081508
# CHECK-NEXT: 1 1 1.00 vpmovmskb %xmm0, %ecx
1509-
# CHECK-NEXT: 1 4 2.00 vpmovsxbd %xmm0, %xmm2
1509+
# CHECK-NEXT: 1 1 0.50 vpmovsxbd %xmm0, %xmm2
15101510
# CHECK-NEXT: 1 8 0.50 * vpmovsxbd (%rax), %xmm2
1511-
# CHECK-NEXT: 1 4 2.00 vpmovsxbq %xmm0, %xmm2
1511+
# CHECK-NEXT: 1 1 0.50 vpmovsxbq %xmm0, %xmm2
15121512
# CHECK-NEXT: 1 8 0.50 * vpmovsxbq (%rax), %xmm2
1513-
# CHECK-NEXT: 1 4 2.00 vpmovsxbw %xmm0, %xmm2
1513+
# CHECK-NEXT: 1 1 0.50 vpmovsxbw %xmm0, %xmm2
15141514
# CHECK-NEXT: 1 8 0.50 * vpmovsxbw (%rax), %xmm2
1515-
# CHECK-NEXT: 1 4 2.00 vpmovsxdq %xmm0, %xmm2
1515+
# CHECK-NEXT: 1 1 0.50 vpmovsxdq %xmm0, %xmm2
15161516
# CHECK-NEXT: 1 8 0.50 * vpmovsxdq (%rax), %xmm2
1517-
# CHECK-NEXT: 1 4 2.00 vpmovsxwd %xmm0, %xmm2
1517+
# CHECK-NEXT: 1 1 0.50 vpmovsxwd %xmm0, %xmm2
15181518
# CHECK-NEXT: 1 8 0.50 * vpmovsxwd (%rax), %xmm2
1519-
# CHECK-NEXT: 1 4 2.00 vpmovsxwq %xmm0, %xmm2
1519+
# CHECK-NEXT: 1 1 0.50 vpmovsxwq %xmm0, %xmm2
15201520
# CHECK-NEXT: 1 8 0.50 * vpmovsxwq (%rax), %xmm2
1521-
# CHECK-NEXT: 1 4 2.00 vpmovzxbd %xmm0, %xmm2
1521+
# CHECK-NEXT: 1 1 0.50 vpmovzxbd %xmm0, %xmm2
15221522
# CHECK-NEXT: 1 8 0.50 * vpmovzxbd (%rax), %xmm2
1523-
# CHECK-NEXT: 1 4 2.00 vpmovzxbq %xmm0, %xmm2
1523+
# CHECK-NEXT: 1 1 0.50 vpmovzxbq %xmm0, %xmm2
15241524
# CHECK-NEXT: 1 8 0.50 * vpmovzxbq (%rax), %xmm2
1525-
# CHECK-NEXT: 1 4 2.00 vpmovzxbw %xmm0, %xmm2
1525+
# CHECK-NEXT: 1 1 0.50 vpmovzxbw %xmm0, %xmm2
15261526
# CHECK-NEXT: 1 8 0.50 * vpmovzxbw (%rax), %xmm2
1527-
# CHECK-NEXT: 1 4 2.00 vpmovzxdq %xmm0, %xmm2
1527+
# CHECK-NEXT: 1 1 0.50 vpmovzxdq %xmm0, %xmm2
15281528
# CHECK-NEXT: 1 8 0.50 * vpmovzxdq (%rax), %xmm2
1529-
# CHECK-NEXT: 1 4 2.00 vpmovzxwd %xmm0, %xmm2
1529+
# CHECK-NEXT: 1 1 0.50 vpmovzxwd %xmm0, %xmm2
15301530
# CHECK-NEXT: 1 8 0.50 * vpmovzxwd (%rax), %xmm2
1531-
# CHECK-NEXT: 1 4 2.00 vpmovzxwq %xmm0, %xmm2
1531+
# CHECK-NEXT: 1 1 0.50 vpmovzxwq %xmm0, %xmm2
15321532
# CHECK-NEXT: 1 8 0.50 * vpmovzxwq (%rax), %xmm2
15331533
# CHECK-NEXT: 1 3 0.50 vpmuldq %xmm0, %xmm1, %xmm2
15341534
# CHECK-NEXT: 1 10 0.50 * vpmuldq (%rax), %xmm1, %xmm2
@@ -1749,7 +1749,7 @@ vzeroupper
17491749

17501750
# CHECK: Resource pressure per iteration:
17511751
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1]
1752-
# CHECK-NEXT: 1.33 1.33 1.33 16.50 16.50 16.50 16.50 - 205.25 414.08 288.58 158.08 208.50 208.50 65.00 119.67 119.67 119.67 107.00 107.00 107.00 19.00 19.00
1752+
# CHECK-NEXT: 1.33 1.33 1.33 16.50 16.50 16.50 16.50 - 205.25 396.08 270.58 158.08 208.50 208.50 65.00 119.67 119.67 119.67 107.00 107.00 107.00 19.00 19.00
17531753

17541754
# CHECK: Resource pressure by instruction:
17551755
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12.0] [12.1] [13] [14.0] [14.1] [14.2] [15.0] [15.1] [15.2] [16.0] [16.1] Instructions:
@@ -2229,29 +2229,29 @@ vzeroupper
22292229
# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 - - - - - - - - - - - vpminuw %xmm0, %xmm1, %xmm2
22302230
# CHECK-NEXT: - - - - - - - - 0.25 0.25 0.25 0.25 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpminuw (%rax), %xmm1, %xmm2
22312231
# CHECK-NEXT: - - - - - - - - - - 1.00 - - - - - - - - - - - - vpmovmskb %xmm0, %ecx
2232-
# CHECK-NEXT: - - - - - - - - - 2.00 2.00 - - - - - - - - - - - - vpmovsxbd %xmm0, %xmm2
2232+
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovsxbd %xmm0, %xmm2
22332233
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovsxbd (%rax), %xmm2
2234-
# CHECK-NEXT: - - - - - - - - - 2.00 2.00 - - - - - - - - - - - - vpmovsxbq %xmm0, %xmm2
2234+
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovsxbq %xmm0, %xmm2
22352235
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovsxbq (%rax), %xmm2
2236-
# CHECK-NEXT: - - - - - - - - - 2.00 2.00 - - - - - - - - - - - - vpmovsxbw %xmm0, %xmm2
2236+
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovsxbw %xmm0, %xmm2
22372237
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovsxbw (%rax), %xmm2
2238-
# CHECK-NEXT: - - - - - - - - - 2.00 2.00 - - - - - - - - - - - - vpmovsxdq %xmm0, %xmm2
2238+
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovsxdq %xmm0, %xmm2
22392239
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovsxdq (%rax), %xmm2
2240-
# CHECK-NEXT: - - - - - - - - - 2.00 2.00 - - - - - - - - - - - - vpmovsxwd %xmm0, %xmm2
2240+
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovsxwd %xmm0, %xmm2
22412241
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovsxwd (%rax), %xmm2
2242-
# CHECK-NEXT: - - - - - - - - - 2.00 2.00 - - - - - - - - - - - - vpmovsxwq %xmm0, %xmm2
2242+
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovsxwq %xmm0, %xmm2
22432243
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovsxwq (%rax), %xmm2
2244-
# CHECK-NEXT: - - - - - - - - - 2.00 2.00 - - - - - - - - - - - - vpmovzxbd %xmm0, %xmm2
2244+
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovzxbd %xmm0, %xmm2
22452245
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovzxbd (%rax), %xmm2
2246-
# CHECK-NEXT: - - - - - - - - - 2.00 2.00 - - - - - - - - - - - - vpmovzxbq %xmm0, %xmm2
2246+
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovzxbq %xmm0, %xmm2
22472247
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovzxbq (%rax), %xmm2
2248-
# CHECK-NEXT: - - - - - - - - - 2.00 2.00 - - - - - - - - - - - - vpmovzxbw %xmm0, %xmm2
2248+
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovzxbw %xmm0, %xmm2
22492249
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovzxbw (%rax), %xmm2
2250-
# CHECK-NEXT: - - - - - - - - - 2.00 2.00 - - - - - - - - - - - - vpmovzxdq %xmm0, %xmm2
2250+
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovzxdq %xmm0, %xmm2
22512251
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovzxdq (%rax), %xmm2
2252-
# CHECK-NEXT: - - - - - - - - - 2.00 2.00 - - - - - - - - - - - - vpmovzxwd %xmm0, %xmm2
2252+
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovzxwd %xmm0, %xmm2
22532253
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovzxwd (%rax), %xmm2
2254-
# CHECK-NEXT: - - - - - - - - - 2.00 2.00 - - - - - - - - - - - - vpmovzxwq %xmm0, %xmm2
2254+
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - - - - - - - - - - - - vpmovzxwq %xmm0, %xmm2
22552255
# CHECK-NEXT: - - - - - - - - - 0.50 0.50 - 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmovzxwq (%rax), %xmm2
22562256
# CHECK-NEXT: - - - - - - - - 0.50 - - 0.50 - - - - - - - - - - - vpmuldq %xmm0, %xmm1, %xmm2
22572257
# CHECK-NEXT: - - - - - - - - 0.50 - - 0.50 0.50 0.50 - 0.33 0.33 0.33 0.33 0.33 0.33 - - vpmuldq (%rax), %xmm1, %xmm2

0 commit comments

Comments
 (0)