Skip to content

Commit 8d345dd

Browse files
committed
use Zen4 CLMUL/VPERM(S/D) values
1 parent 48d84d2 commit 8d345dd

File tree

1 file changed

+9
-9
lines changed

1 file changed

+9
-9
lines changed

llvm/lib/Target/X86/X86ScheduleZnver4.td

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1341,7 +1341,7 @@ defm : Zn4WriteResXMMPair<WriteAESIMC, [Zn4FPAES01], 4, [1], 1>; // InvMixColumn
13411341
defm : Zn4WriteResXMMPair<WriteAESKeyGen, [Zn4FPAES01], 4, [1], 1>; // Key Generation.
13421342

13431343
// Carry-less multiplication instructions.
1344-
defm : Zn4WriteResXMMPair<WriteCLMul, [Zn4FPCLM01], 4, [4], 4>;
1344+
defm : Zn4WriteResXMMPair<WriteCLMul, [Zn4FPCLM01], 4, [3], 4>;
13451345

13461346
// EMMS/FEMMS
13471347
defm : Zn4WriteResInt<WriteEMMS, [Zn4ALU0123], 2, [1], 1>; // FIXME: latency not from llvm-exegesis
@@ -1387,23 +1387,23 @@ def Zn4WriteVPERM2F128rm : SchedWriteRes<[Zn4AGU012, Zn4Load, Zn4FPVShuf]> {
13871387
def : InstRW<[Zn4WriteVPERM2F128rm], (instrs VPERM2F128rmi)>;
13881388

13891389
def Zn4WriteVPERMPSYrr : SchedWriteRes<[Zn4FPVShuf]> {
1390-
let Latency = 7;
1390+
let Latency = 4;
13911391
let ReleaseAtCycles = [1];
1392-
let NumMicroOps = 2;
1392+
let NumMicroOps = 1;
13931393
}
13941394
def : InstRW<[Zn4WriteVPERMPSYrr], (instrs VPERMPSYrr)>;
13951395

13961396
def Zn4WriteVPERMPSYrm : SchedWriteRes<[Zn4AGU012, Zn4Load, Zn4FPVShuf]> {
13971397
let Latency = !add(Znver4Model.VecLoadLatency, Zn4WriteVPERMPSYrr.Latency);
1398-
let ReleaseAtCycles = [1, 1, 2];
1399-
let NumMicroOps = !add(Zn4WriteVPERMPSYrr.NumMicroOps, 1);
1398+
let ReleaseAtCycles = [1];
1399+
let NumMicroOps = 1;
14001400
}
14011401
def : InstRW<[Zn4WriteVPERMPSYrm], (instrs VPERMPSYrm)>;
14021402

14031403
def Zn4WriteVPERMYri : SchedWriteRes<[Zn4FPVShuf]> {
1404-
let Latency = 6;
1404+
let Latency = 4;
14051405
let ReleaseAtCycles = [1];
1406-
let NumMicroOps = 2;
1406+
let NumMicroOps = 1;
14071407
}
14081408
def : InstRW<[Zn4WriteVPERMYri], (instrs VPERMPDYri, VPERMQYri)>;
14091409

@@ -1415,9 +1415,9 @@ def Zn4WriteVPERMPDYmi : SchedWriteRes<[Zn4AGU012, Zn4Load, Zn4FPVShuf]> {
14151415
def : InstRW<[Zn4WriteVPERMPDYmi], (instrs VPERMPDYmi)>;
14161416

14171417
def Zn4WriteVPERMDYrr : SchedWriteRes<[Zn4FPVShuf]> {
1418-
let Latency = 5;
1418+
let Latency = 4;
14191419
let ReleaseAtCycles = [1];
1420-
let NumMicroOps = 2;
1420+
let NumMicroOps = 1;
14211421
}
14221422
def : InstRW<[Zn4WriteVPERMDYrr], (instrs VPERMDYrr)>;
14231423

0 commit comments

Comments
 (0)