Skip to content

Commit e4c7852

Browse files
committed
VALIGN has different latency depending on width
1 parent 02d554c commit e4c7852

File tree

1 file changed

+22
-6
lines changed

1 file changed

+22
-6
lines changed

llvm/lib/Target/X86/X86ScheduleZnver4.td

Lines changed: 22 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1109,15 +1109,31 @@ def Zn4WriteVecOpMaskKRMov : SchedWriteRes<[Zn4FPOpMask4]> {
11091109
}
11101110
def : InstRW<[Zn4WriteVecOpMaskKRMov], (instrs KMOVBkr, KMOVDkr, KMOVQkr, KMOVWkr)>;
11111111

1112-
def Zn4WriteVecALU2Slow : SchedWriteRes<[Zn4FPVAdd12]> {
1113-
// TODO: All align instructions are expected to be of 4 cycle latency
1114-
let Latency = 4;
1112+
// 128-bit VALIGN
1113+
def Zn4WriteXMMVecALU2Slow : SchedWriteRes<[Zn4FPVAdd12]> {
1114+
let Latency = 2;
1115+
let ReleaseAtCycles = [1];
1116+
let NumMicroOps = 1;
1117+
}
1118+
1119+
// 256-bit VALIGN
1120+
def Zn4WriteYMMVecALU2Slow : SchedWriteRes<[Zn4FPVAdd12]> {
1121+
let Latency = 3;
11151122
let ReleaseAtCycles = [1];
11161123
let NumMicroOps = 1;
11171124
}
1118-
def : InstRW<[Zn4WriteVecALU2Slow], (instrs VALIGNDZrri, VALIGNDZ128rri, VALIGNDZ256rri,
1119-
VALIGNQZrri, VALIGNQZ128rri, VALIGNQZ256rri)
1120-
>;
1125+
1126+
// 512-bit VALIGN
1127+
def Zn4WriteZMMVecALU2Slow : SchedWriteRes<[Zn4FPVAdd12]> {
1128+
let Latency = 4;
1129+
let ReleaseAtCycles = [2];
1130+
let NumMicroOps = 1;
1131+
}
1132+
1133+
def : InstRW<[Zn4WriteXMMVecALU2Slow], (instrs VALIGNDZrri, VALIGNQZrri)>;
1134+
def : InstRW<[Zn4WriteYMMVecALU2Slow], (instrs VALIGNDZ128rri, VALIGNQZ128rri)>;
1135+
def : InstRW<[Zn4WriteZMMVecALU2Slow], (instrs VALIGNDZ256rri, VALIGNQZ256rri)>;
1136+
11211137
defm : Zn4WriteResYMMPair<WriteVecALUY, [Zn4FPVAdd0123], 1, [1], 1>; // Vector integer ALU op, no logicals (YMM).
11221138

11231139
def Zn4WriteVecALUYSlow : SchedWriteRes<[Zn4FPVAdd01]> {

0 commit comments

Comments
 (0)