Skip to content

Commit 8aa9e1e

Browse files
authored
[X86] Ensure models use vector load latency for vector loads (llvm#157631)
Noticed while addressing llvm#146564 - some of the znver3/4 overrides for vector ops were using the scalar load latencies by mistake
1 parent 204917e commit 8aa9e1e

File tree

8 files changed

+46
-46
lines changed

8 files changed

+46
-46
lines changed

llvm/lib/Target/X86/X86ScheduleZnver3.td

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -992,14 +992,14 @@ def Zn3WriteVEXTRACTF128rr_VEXTRACTI128rr : SchedWriteRes<[Zn3FPFMisc0]> {
992992
def : InstRW<[Zn3WriteVEXTRACTF128rr_VEXTRACTI128rr], (instrs VEXTRACTF128rri, VEXTRACTI128rri)>;
993993

994994
def Zn3WriteVEXTRACTI128mr : SchedWriteRes<[Zn3FPFMisc0, Zn3FPSt, Zn3Store]> {
995-
let Latency = !add(Znver3Model.LoadLatency, Zn3WriteVEXTRACTF128rr_VEXTRACTI128rr.Latency);
995+
let Latency = !add(Znver3Model.VecLoadLatency, Zn3WriteVEXTRACTF128rr_VEXTRACTI128rr.Latency);
996996
let ReleaseAtCycles = [1, 1, 1];
997997
let NumMicroOps = !add(Zn3WriteVEXTRACTF128rr_VEXTRACTI128rr.NumMicroOps, 1);
998998
}
999999
def : InstRW<[Zn3WriteVEXTRACTI128mr], (instrs VEXTRACTI128mri, VEXTRACTF128mri)>;
10001000

10011001
def Zn3WriteVINSERTF128rmr : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3FPFMisc0]> {
1002-
let Latency = !add(Znver3Model.LoadLatency, Zn3WriteVEXTRACTF128rr_VEXTRACTI128rr.Latency);
1002+
let Latency = !add(Znver3Model.VecLoadLatency, Zn3WriteVEXTRACTF128rr_VEXTRACTI128rr.Latency);
10031003
let ReleaseAtCycles = [1, 1, 1];
10041004
let NumMicroOps = !add(Zn3WriteVEXTRACTF128rr_VEXTRACTI128rr.NumMicroOps, 0);
10051005
}
@@ -1221,7 +1221,7 @@ def Zn3WriteSHA1MSG1rr : SchedWriteRes<[Zn3FPU0123]> {
12211221
def : InstRW<[Zn3WriteSHA1MSG1rr], (instrs SHA1MSG1rr)>;
12221222

12231223
def Zn3WriteSHA1MSG1rm : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3FPU0123]> {
1224-
let Latency = !add(Znver3Model.LoadLatency, Zn3WriteSHA1MSG1rr.Latency);
1224+
let Latency = !add(Znver3Model.VecLoadLatency, Zn3WriteSHA1MSG1rr.Latency);
12251225
let ReleaseAtCycles = [1, 1, 2];
12261226
let NumMicroOps = !add(Zn3WriteSHA1MSG1rr.NumMicroOps, 0);
12271227
}
@@ -1235,7 +1235,7 @@ def Zn3WriteSHA1MSG2rr_SHA1NEXTErr : SchedWriteRes<[Zn3FPU0123]> {
12351235
def : InstRW<[Zn3WriteSHA1MSG2rr_SHA1NEXTErr], (instrs SHA1MSG2rr, SHA1NEXTErr)>;
12361236

12371237
def Zn3Writerm_SHA1MSG2rm_SHA1NEXTErm : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3FPU0123]> {
1238-
let Latency = !add(Znver3Model.LoadLatency, Zn3WriteSHA1MSG2rr_SHA1NEXTErr.Latency);
1238+
let Latency = !add(Znver3Model.VecLoadLatency, Zn3WriteSHA1MSG2rr_SHA1NEXTErr.Latency);
12391239
let ReleaseAtCycles = [1, 1, 2];
12401240
let NumMicroOps = !add(Zn3WriteSHA1MSG2rr_SHA1NEXTErr.NumMicroOps, 0);
12411241
}
@@ -1249,7 +1249,7 @@ def Zn3WriteSHA256MSG1rr : SchedWriteRes<[Zn3FPU0123]> {
12491249
def : InstRW<[Zn3WriteSHA256MSG1rr], (instrs SHA256MSG1rr)>;
12501250

12511251
def Zn3Writerm_SHA256MSG1rm : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3FPU0123]> {
1252-
let Latency = !add(Znver3Model.LoadLatency, Zn3WriteSHA256MSG1rr.Latency);
1252+
let Latency = !add(Znver3Model.VecLoadLatency, Zn3WriteSHA256MSG1rr.Latency);
12531253
let ReleaseAtCycles = [1, 1, 3];
12541254
let NumMicroOps = !add(Zn3WriteSHA256MSG1rr.NumMicroOps, 0);
12551255
}
@@ -1263,7 +1263,7 @@ def Zn3WriteSHA256MSG2rr : SchedWriteRes<[Zn3FPU0123]> {
12631263
def : InstRW<[Zn3WriteSHA256MSG2rr], (instrs SHA256MSG2rr)>;
12641264

12651265
def Zn3WriteSHA256MSG2rm : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3FPU0123]> {
1266-
let Latency = !add(Znver3Model.LoadLatency, Zn3WriteSHA256MSG2rr.Latency);
1266+
let Latency = !add(Znver3Model.VecLoadLatency, Zn3WriteSHA256MSG2rr.Latency);
12671267
let ReleaseAtCycles = [1, 1, 8];
12681268
let NumMicroOps = !add(Zn3WriteSHA256MSG2rr.NumMicroOps, 1);
12691269
}
@@ -1338,14 +1338,14 @@ def Zn3WriteVPERM2I128rr_VPERM2F128rr : SchedWriteRes<[Zn3FPVShuf]> {
13381338
def : InstRW<[Zn3WriteVPERM2I128rr_VPERM2F128rr], (instrs VPERM2I128rri, VPERM2F128rri)>;
13391339

13401340
def Zn3WriteVPERM2F128rm : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3FPVShuf]> {
1341-
let Latency = !add(Znver3Model.LoadLatency, Zn3WriteVPERM2I128rr_VPERM2F128rr.Latency);
1341+
let Latency = !add(Znver3Model.VecLoadLatency, Zn3WriteVPERM2I128rr_VPERM2F128rr.Latency);
13421342
let ReleaseAtCycles = [1, 1, 1];
13431343
let NumMicroOps = !add(Zn3WriteVPERM2I128rr_VPERM2F128rr.NumMicroOps, 0);
13441344
}
13451345
def : InstRW<[Zn3WriteVPERM2F128rm], (instrs VPERM2F128rmi)>;
13461346

13471347
def Zn3WriteVPERMPSYrm : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3FPVShuf]> {
1348-
let Latency = !add(Znver3Model.LoadLatency, 7);
1348+
let Latency = !add(Znver3Model.VecLoadLatency, 7);
13491349
let ReleaseAtCycles = [1, 1, 2];
13501350
let NumMicroOps = 3;
13511351
}
@@ -1359,14 +1359,14 @@ def Zn3WriteVPERMYri : SchedWriteRes<[Zn3FPVShuf]> {
13591359
def : InstRW<[Zn3WriteVPERMYri], (instrs VPERMPDYri, VPERMQYri)>;
13601360

13611361
def Zn3WriteVPERMPDYmi : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3FPVShuf]> {
1362-
let Latency = !add(Znver3Model.LoadLatency, Zn3WriteVPERMYri.Latency);
1362+
let Latency = !add(Znver3Model.VecLoadLatency, Zn3WriteVPERMYri.Latency);
13631363
let ReleaseAtCycles = [1, 1, 2];
13641364
let NumMicroOps = !add(Zn3WriteVPERMYri.NumMicroOps, 1);
13651365
}
13661366
def : InstRW<[Zn3WriteVPERMPDYmi], (instrs VPERMPDYmi)>;
13671367

13681368
def Zn3WriteVPERMDYm : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3FPVShuf]> {
1369-
let Latency = !add(Znver3Model.LoadLatency, 5);
1369+
let Latency = !add(Znver3Model.VecLoadLatency, 5);
13701370
let ReleaseAtCycles = [1, 1, 2];
13711371
let NumMicroOps = 2;
13721372
}

llvm/lib/Target/X86/X86ScheduleZnver4.td

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1005,14 +1005,14 @@ def Zn4WriteVEXTRACTF128rr_VEXTRACTI128rr : SchedWriteRes<[Zn4FPFMisc0]> {
10051005
def : InstRW<[Zn4WriteVEXTRACTF128rr_VEXTRACTI128rr], (instrs VEXTRACTF128rri, VEXTRACTI128rri)>;
10061006

10071007
def Zn4WriteVEXTRACTI128mr : SchedWriteRes<[Zn4FPFMisc0, Zn4FPSt, Zn4Store]> {
1008-
let Latency = !add(Znver4Model.LoadLatency, Zn4WriteVEXTRACTF128rr_VEXTRACTI128rr.Latency);
1008+
let Latency = !add(Znver4Model.VecLoadLatency, Zn4WriteVEXTRACTF128rr_VEXTRACTI128rr.Latency);
10091009
let ReleaseAtCycles = [1, 1, 1];
10101010
let NumMicroOps = !add(Zn4WriteVEXTRACTF128rr_VEXTRACTI128rr.NumMicroOps, 1);
10111011
}
10121012
def : InstRW<[Zn4WriteVEXTRACTI128mr], (instrs VEXTRACTI128mri, VEXTRACTF128mri)>;
10131013

10141014
def Zn4WriteVINSERTF128rmr : SchedWriteRes<[Zn4AGU012, Zn4Load, Zn4FPFMisc0]> {
1015-
let Latency = !add(Znver4Model.LoadLatency, Zn4WriteVEXTRACTF128rr_VEXTRACTI128rr.Latency);
1015+
let Latency = !add(Znver4Model.VecLoadLatency, Zn4WriteVEXTRACTF128rr_VEXTRACTI128rr.Latency);
10161016
let ReleaseAtCycles = [1, 1, 1];
10171017
let NumMicroOps = !add(Zn4WriteVEXTRACTF128rr_VEXTRACTI128rr.NumMicroOps, 0);
10181018
}
@@ -1262,7 +1262,7 @@ def Zn4WriteSHA1MSG1rr : SchedWriteRes<[Zn4FPU0123]> {
12621262
def : InstRW<[Zn4WriteSHA1MSG1rr], (instrs SHA1MSG1rr)>;
12631263

12641264
def Zn4WriteSHA1MSG1rm : SchedWriteRes<[Zn4AGU012, Zn4Load, Zn4FPU0123]> {
1265-
let Latency = !add(Znver4Model.LoadLatency, Zn4WriteSHA1MSG1rr.Latency);
1265+
let Latency = !add(Znver4Model.VecLoadLatency, Zn4WriteSHA1MSG1rr.Latency);
12661266
let ReleaseAtCycles = [1, 1, 2];
12671267
let NumMicroOps = !add(Zn4WriteSHA1MSG1rr.NumMicroOps, 0);
12681268
}
@@ -1276,7 +1276,7 @@ def Zn4WriteSHA1MSG2rr_SHA1NEXTErr : SchedWriteRes<[Zn4FPU0123]> {
12761276
def : InstRW<[Zn4WriteSHA1MSG2rr_SHA1NEXTErr], (instrs SHA1MSG2rr, SHA1NEXTErr)>;
12771277

12781278
def Zn4Writerm_SHA1MSG2rm_SHA1NEXTErm : SchedWriteRes<[Zn4AGU012, Zn4Load, Zn4FPU0123]> {
1279-
let Latency = !add(Znver4Model.LoadLatency, Zn4WriteSHA1MSG2rr_SHA1NEXTErr.Latency);
1279+
let Latency = !add(Znver4Model.VecLoadLatency, Zn4WriteSHA1MSG2rr_SHA1NEXTErr.Latency);
12801280
let ReleaseAtCycles = [1, 1, 2];
12811281
let NumMicroOps = !add(Zn4WriteSHA1MSG2rr_SHA1NEXTErr.NumMicroOps, 0);
12821282
}
@@ -1290,7 +1290,7 @@ def Zn4WriteSHA256MSG1rr : SchedWriteRes<[Zn4FPU0123]> {
12901290
def : InstRW<[Zn4WriteSHA256MSG1rr], (instrs SHA256MSG1rr)>;
12911291

12921292
def Zn4Writerm_SHA256MSG1rm : SchedWriteRes<[Zn4AGU012, Zn4Load, Zn4FPU0123]> {
1293-
let Latency = !add(Znver4Model.LoadLatency, Zn4WriteSHA256MSG1rr.Latency);
1293+
let Latency = !add(Znver4Model.VecLoadLatency, Zn4WriteSHA256MSG1rr.Latency);
12941294
let ReleaseAtCycles = [1, 1, 3];
12951295
let NumMicroOps = !add(Zn4WriteSHA256MSG1rr.NumMicroOps, 0);
12961296
}
@@ -1304,7 +1304,7 @@ def Zn4WriteSHA256MSG2rr : SchedWriteRes<[Zn4FPU0123]> {
13041304
def : InstRW<[Zn4WriteSHA256MSG2rr], (instrs SHA256MSG2rr)>;
13051305

13061306
def Zn4WriteSHA256MSG2rm : SchedWriteRes<[Zn4AGU012, Zn4Load, Zn4FPU0123]> {
1307-
let Latency = !add(Znver4Model.LoadLatency, Zn4WriteSHA256MSG2rr.Latency);
1307+
let Latency = !add(Znver4Model.VecLoadLatency, Zn4WriteSHA256MSG2rr.Latency);
13081308
let ReleaseAtCycles = [1, 1, 8];
13091309
let NumMicroOps = !add(Zn4WriteSHA256MSG2rr.NumMicroOps, 1);
13101310
}
@@ -1379,7 +1379,7 @@ def Zn4WriteVPERM2I128rr_VPERM2F128rr : SchedWriteRes<[Zn4FPVShuf]> {
13791379
def : InstRW<[Zn4WriteVPERM2I128rr_VPERM2F128rr], (instrs VPERM2I128rri, VPERM2F128rri)>;
13801380

13811381
def Zn4WriteVPERM2F128rm : SchedWriteRes<[Zn4AGU012, Zn4Load, Zn4FPVShuf]> {
1382-
let Latency = !add(Znver4Model.LoadLatency, Zn4WriteVPERM2I128rr_VPERM2F128rr.Latency);
1382+
let Latency = !add(Znver4Model.VecLoadLatency, Zn4WriteVPERM2I128rr_VPERM2F128rr.Latency);
13831383
let ReleaseAtCycles = [1, 1, 1];
13841384
let NumMicroOps = !add(Zn4WriteVPERM2I128rr_VPERM2F128rr.NumMicroOps, 0);
13851385
}
@@ -1393,7 +1393,7 @@ def Zn4WriteVPERMPSYrr : SchedWriteRes<[Zn4FPVShuf]> {
13931393
def : InstRW<[Zn4WriteVPERMPSYrr], (instrs VPERMPSYrr)>;
13941394

13951395
def Zn4WriteVPERMPSYrm : SchedWriteRes<[Zn4AGU012, Zn4Load, Zn4FPVShuf]> {
1396-
let Latency = !add(Znver4Model.LoadLatency, Zn4WriteVPERMPSYrr.Latency);
1396+
let Latency = !add(Znver4Model.VecLoadLatency, Zn4WriteVPERMPSYrr.Latency);
13971397
let ReleaseAtCycles = [1, 1, 2];
13981398
let NumMicroOps = !add(Zn4WriteVPERMPSYrr.NumMicroOps, 1);
13991399
}
@@ -1407,7 +1407,7 @@ def Zn4WriteVPERMYri : SchedWriteRes<[Zn4FPVShuf]> {
14071407
def : InstRW<[Zn4WriteVPERMYri], (instrs VPERMPDYri, VPERMQYri)>;
14081408

14091409
def Zn4WriteVPERMPDYmi : SchedWriteRes<[Zn4AGU012, Zn4Load, Zn4FPVShuf]> {
1410-
let Latency = !add(Znver4Model.LoadLatency, Zn4WriteVPERMYri.Latency);
1410+
let Latency = !add(Znver4Model.VecLoadLatency, Zn4WriteVPERMYri.Latency);
14111411
let ReleaseAtCycles = [1, 1, 2];
14121412
let NumMicroOps = !add(Zn4WriteVPERMYri.NumMicroOps, 1);
14131413
}
@@ -1421,7 +1421,7 @@ def Zn4WriteVPERMDYrr : SchedWriteRes<[Zn4FPVShuf]> {
14211421
def : InstRW<[Zn4WriteVPERMDYrr], (instrs VPERMDYrr)>;
14221422

14231423
def Zn4WriteVPERMYm : SchedWriteRes<[Zn4AGU012, Zn4Load, Zn4FPVShuf]> {
1424-
let Latency = !add(Znver4Model.LoadLatency, Zn4WriteVPERMDYrr.Latency);
1424+
let Latency = !add(Znver4Model.VecLoadLatency, Zn4WriteVPERMDYrr.Latency);
14251425
let ReleaseAtCycles = [1, 1, 2];
14261426
let NumMicroOps = !add(Zn4WriteVPERMDYrr.NumMicroOps, 0);
14271427
}

llvm/test/tools/llvm-mca/X86/Znver3/resources-avx1.s

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1193,7 +1193,7 @@ vzeroupper
11931193
# CHECK-NEXT: 7 15 4.00 vdpps $22, %ymm0, %ymm1, %ymm2
11941194
# CHECK-NEXT: 8 22 4.00 * vdpps $22, (%rax), %ymm1, %ymm2
11951195
# CHECK-NEXT: 1 4 1.00 vextractf128 $1, %ymm0, %xmm2
1196-
# CHECK-NEXT: 2 8 1.00 * vextractf128 $1, %ymm0, (%rax)
1196+
# CHECK-NEXT: 2 11 1.00 * vextractf128 $1, %ymm0, (%rax)
11971197
# CHECK-NEXT: 2 1 1.00 vextractps $1, %xmm0, %ecx
11981198
# CHECK-NEXT: 2 2 1.00 * vextractps $1, %xmm0, (%rax)
11991199
# CHECK-NEXT: 4 6 2.00 vhaddpd %xmm0, %xmm1, %xmm2
@@ -1213,7 +1213,7 @@ vzeroupper
12131213
# CHECK-NEXT: 3 6 2.00 vhsubps %ymm0, %ymm1, %ymm2
12141214
# CHECK-NEXT: 4 13 2.00 * vhsubps (%rax), %ymm1, %ymm2
12151215
# CHECK-NEXT: 1 2 1.00 vinsertf128 $1, %xmm0, %ymm1, %ymm2
1216-
# CHECK-NEXT: 1 8 1.00 * vinsertf128 $1, (%rax), %ymm1, %ymm2
1216+
# CHECK-NEXT: 1 11 1.00 * vinsertf128 $1, (%rax), %ymm1, %ymm2
12171217
# CHECK-NEXT: 1 1 0.50 vinsertps $1, %xmm0, %xmm1, %xmm2
12181218
# CHECK-NEXT: 1 8 0.50 * vinsertps $1, (%rax), %xmm1, %xmm2
12191219
# CHECK-NEXT: 1 8 0.50 * vlddqu (%rax), %xmm2
@@ -1430,7 +1430,7 @@ vzeroupper
14301430
# CHECK-NEXT: 3 6 2.00 vpcmpistrm $1, %xmm0, %xmm2
14311431
# CHECK-NEXT: 4 13 2.00 * vpcmpistrm $1, (%rax), %xmm2
14321432
# CHECK-NEXT: 1 3 1.00 vperm2f128 $1, %ymm0, %ymm1, %ymm2
1433-
# CHECK-NEXT: 1 7 1.00 * vperm2f128 $1, (%rax), %ymm1, %ymm2
1433+
# CHECK-NEXT: 1 10 1.00 * vperm2f128 $1, (%rax), %ymm1, %ymm2
14341434
# CHECK-NEXT: 1 1 0.50 vpermilpd $1, %xmm0, %xmm2
14351435
# CHECK-NEXT: 1 8 0.50 * vpermilpd $1, (%rax), %xmm2
14361436
# CHECK-NEXT: 1 3 0.50 vpermilpd %xmm0, %xmm1, %xmm2

llvm/test/tools/llvm-mca/X86/Znver3/resources-avx2.s

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -464,7 +464,7 @@ vpxor (%rax), %ymm1, %ymm2
464464
# CHECK-NEXT: 1 2 1.00 vbroadcastsd %xmm0, %ymm0
465465
# CHECK-NEXT: 1 2 1.00 vbroadcastss %xmm0, %ymm0
466466
# CHECK-NEXT: 1 4 1.00 vextracti128 $1, %ymm0, %xmm2
467-
# CHECK-NEXT: 2 8 1.00 * vextracti128 $1, %ymm0, (%rax)
467+
# CHECK-NEXT: 2 11 1.00 * vextracti128 $1, %ymm0, (%rax)
468468
# CHECK-NEXT: 1 5 0.33 * vgatherdpd %xmm0, (%rax,%xmm1,2), %xmm2
469469
# CHECK-NEXT: 1 5 0.33 * vgatherdpd %ymm0, (%rax,%xmm1,2), %ymm2
470470
# CHECK-NEXT: 1 5 0.33 * vgatherdps %xmm0, (%rax,%xmm1,2), %xmm2
@@ -561,13 +561,13 @@ vpxor (%rax), %ymm1, %ymm2
561561
# CHECK-NEXT: 1 3 1.00 vperm2i128 $1, %ymm0, %ymm1, %ymm2
562562
# CHECK-NEXT: 1 9 1.00 * vperm2i128 $1, (%rax), %ymm1, %ymm2
563563
# CHECK-NEXT: 2 5 1.00 vpermd %ymm0, %ymm1, %ymm2
564-
# CHECK-NEXT: 2 9 2.00 * vpermd (%rax), %ymm1, %ymm2
564+
# CHECK-NEXT: 2 12 2.00 * vpermd (%rax), %ymm1, %ymm2
565565
# CHECK-NEXT: 2 6 1.00 vpermpd $1, %ymm0, %ymm2
566-
# CHECK-NEXT: 3 10 2.00 * vpermpd $1, (%rax), %ymm2
566+
# CHECK-NEXT: 3 13 2.00 * vpermpd $1, (%rax), %ymm2
567567
# CHECK-NEXT: 2 7 1.00 vpermps %ymm0, %ymm1, %ymm2
568-
# CHECK-NEXT: 3 11 2.00 * vpermps (%rax), %ymm1, %ymm2
568+
# CHECK-NEXT: 3 14 2.00 * vpermps (%rax), %ymm1, %ymm2
569569
# CHECK-NEXT: 2 6 1.00 vpermq $1, %ymm0, %ymm2
570-
# CHECK-NEXT: 2 9 2.00 * vpermq $1, (%rax), %ymm2
570+
# CHECK-NEXT: 2 12 2.00 * vpermq $1, (%rax), %ymm2
571571
# CHECK-NEXT: 1 5 0.33 * vpgatherdd %xmm0, (%rax,%xmm1,2), %xmm2
572572
# CHECK-NEXT: 1 5 0.33 * vpgatherdd %ymm0, (%rax,%ymm1,2), %ymm2
573573
# CHECK-NEXT: 1 5 0.33 * vpgatherdq %xmm0, (%rax,%xmm1,2), %xmm2

llvm/test/tools/llvm-mca/X86/Znver3/resources-sha.s

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -32,17 +32,17 @@ sha256rnds2 (%rax), %xmm2
3232

3333
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
3434
# CHECK-NEXT: 2 2 0.50 sha1msg1 %xmm0, %xmm2
35-
# CHECK-NEXT: 2 6 0.50 * sha1msg1 (%rax), %xmm2
35+
# CHECK-NEXT: 2 9 0.50 * sha1msg1 (%rax), %xmm2
3636
# CHECK-NEXT: 1 1 0.50 sha1msg2 %xmm0, %xmm2
37-
# CHECK-NEXT: 1 5 0.50 * sha1msg2 (%rax), %xmm2
37+
# CHECK-NEXT: 1 8 0.50 * sha1msg2 (%rax), %xmm2
3838
# CHECK-NEXT: 1 1 0.50 sha1nexte %xmm0, %xmm2
39-
# CHECK-NEXT: 1 5 0.50 * sha1nexte (%rax), %xmm2
39+
# CHECK-NEXT: 1 8 0.50 * sha1nexte (%rax), %xmm2
4040
# CHECK-NEXT: 1 6 2.00 sha1rnds4 $3, %xmm0, %xmm2
4141
# CHECK-NEXT: 1 10 0.50 * sha1rnds4 $3, (%rax), %xmm2
4242
# CHECK-NEXT: 2 2 0.75 sha256msg1 %xmm0, %xmm2
43-
# CHECK-NEXT: 2 6 0.75 * sha256msg1 (%rax), %xmm2
43+
# CHECK-NEXT: 2 9 0.75 * sha256msg1 (%rax), %xmm2
4444
# CHECK-NEXT: 4 3 2.00 sha256msg2 %xmm0, %xmm2
45-
# CHECK-NEXT: 5 7 2.00 * sha256msg2 (%rax), %xmm2
45+
# CHECK-NEXT: 5 10 2.00 * sha256msg2 (%rax), %xmm2
4646
# CHECK-NEXT: 1 4 2.00 sha256rnds2 %xmm0, %xmm0, %xmm2
4747
# CHECK-NEXT: 1 10 0.50 * sha256rnds2 %xmm0, (%rax), %xmm2
4848

llvm/test/tools/llvm-mca/X86/Znver4/resources-avx1.s

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1193,7 +1193,7 @@ vzeroupper
11931193
# CHECK-NEXT: 7 11 4.00 vdpps $22, %ymm0, %ymm1, %ymm2
11941194
# CHECK-NEXT: 8 18 4.00 * vdpps $22, (%rax), %ymm1, %ymm2
11951195
# CHECK-NEXT: 1 4 1.00 vextractf128 $1, %ymm0, %xmm2
1196-
# CHECK-NEXT: 2 8 1.00 * vextractf128 $1, %ymm0, (%rax)
1196+
# CHECK-NEXT: 2 11 1.00 * vextractf128 $1, %ymm0, (%rax)
11971197
# CHECK-NEXT: 2 1 1.00 vextractps $1, %xmm0, %ecx
11981198
# CHECK-NEXT: 2 2 1.00 * vextractps $1, %xmm0, (%rax)
11991199
# CHECK-NEXT: 3 4 2.00 vhaddpd %xmm0, %xmm1, %xmm2
@@ -1213,7 +1213,7 @@ vzeroupper
12131213
# CHECK-NEXT: 3 4 2.00 vhsubps %ymm0, %ymm1, %ymm2
12141214
# CHECK-NEXT: 4 11 2.00 * vhsubps (%rax), %ymm1, %ymm2
12151215
# CHECK-NEXT: 1 2 1.00 vinsertf128 $1, %xmm0, %ymm1, %ymm2
1216-
# CHECK-NEXT: 1 8 1.00 * vinsertf128 $1, (%rax), %ymm1, %ymm2
1216+
# CHECK-NEXT: 1 11 1.00 * vinsertf128 $1, (%rax), %ymm1, %ymm2
12171217
# CHECK-NEXT: 1 1 0.50 vinsertps $1, %xmm0, %xmm1, %xmm2
12181218
# CHECK-NEXT: 1 8 0.50 * vinsertps $1, (%rax), %xmm1, %xmm2
12191219
# CHECK-NEXT: 1 8 0.50 * vlddqu (%rax), %xmm2
@@ -1430,7 +1430,7 @@ vzeroupper
14301430
# CHECK-NEXT: 3 6 2.00 vpcmpistrm $1, %xmm0, %xmm2
14311431
# CHECK-NEXT: 4 13 2.00 * vpcmpistrm $1, (%rax), %xmm2
14321432
# CHECK-NEXT: 1 3 1.00 vperm2f128 $1, %ymm0, %ymm1, %ymm2
1433-
# CHECK-NEXT: 1 7 1.00 * vperm2f128 $1, (%rax), %ymm1, %ymm2
1433+
# CHECK-NEXT: 1 10 1.00 * vperm2f128 $1, (%rax), %ymm1, %ymm2
14341434
# CHECK-NEXT: 1 1 0.50 vpermilpd $1, %xmm0, %xmm2
14351435
# CHECK-NEXT: 1 8 0.50 * vpermilpd $1, (%rax), %xmm2
14361436
# CHECK-NEXT: 1 3 0.50 vpermilpd %xmm0, %xmm1, %xmm2

0 commit comments

Comments
 (0)