From 3b35d0e9472677e5d676b1002bea644906a4867d Mon Sep 17 00:00:00 2001 From: "Wang, Phoebe" Date: Thu, 2 Jan 2025 11:31:17 +0800 Subject: [PATCH] [X86][NFC] Move "_Int" after "k"/"kz" --- .../X86/MCTargetDesc/X86ATTInstPrinter.cpp | 12 +- .../X86/MCTargetDesc/X86InstComments.cpp | 19 +- .../X86/MCTargetDesc/X86InstPrinterCommon.cpp | 12 +- .../X86/MCTargetDesc/X86IntelInstPrinter.cpp | 12 +- llvm/lib/Target/X86/X86InstrAVX10.td | 35 ++- llvm/lib/Target/X86/X86InstrAVX512.td | 246 ++++++++------- llvm/lib/Target/X86/X86InstrFMA3Info.cpp | 15 +- llvm/lib/Target/X86/X86InstrInfo.cpp | 294 +++++++++--------- llvm/lib/Target/X86/X86SchedSapphireRapids.td | 52 ++-- llvm/lib/Target/X86/X86ScheduleZnver4.td | 4 +- llvm/test/TableGen/x86-fold-tables.inc | 282 ++++++++--------- 11 files changed, 505 insertions(+), 478 deletions(-) diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.cpp index b67c573e217ba..abe0cc6365dd4 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.cpp @@ -140,8 +140,8 @@ bool X86ATTInstPrinter::printVecCompareInstr(const MCInst *MI, case X86::VCMPPSZ128rmik: case X86::VCMPPSZ128rrik: case X86::VCMPPSZ256rmik: case X86::VCMPPSZ256rrik: case X86::VCMPPSZrmik: case X86::VCMPPSZrrik: - case X86::VCMPSDZrmi_Intk: case X86::VCMPSDZrri_Intk: - case X86::VCMPSSZrmi_Intk: case X86::VCMPSSZrri_Intk: + case X86::VCMPSDZrmik_Int: case X86::VCMPSDZrrik_Int: + case X86::VCMPSSZrmik_Int: case X86::VCMPSSZrrik_Int: case X86::VCMPPDZ128rmbi: case X86::VCMPPDZ128rmbik: case X86::VCMPPDZ256rmbi: case X86::VCMPPDZ256rmbik: case X86::VCMPPDZrmbi: case X86::VCMPPDZrmbik: @@ -150,8 +150,8 @@ bool X86ATTInstPrinter::printVecCompareInstr(const MCInst *MI, case X86::VCMPPSZrmbi: case X86::VCMPPSZrmbik: case X86::VCMPPDZrrib: case X86::VCMPPDZrribk: case X86::VCMPPSZrrib: case X86::VCMPPSZrribk: - case X86::VCMPSDZrrib_Int: case X86::VCMPSDZrrib_Intk: - case X86::VCMPSSZrrib_Int: case X86::VCMPSSZrrib_Intk: + case X86::VCMPSDZrrib_Int: case X86::VCMPSDZrribk_Int: + case X86::VCMPSSZrrib_Int: case X86::VCMPSSZrribk_Int: case X86::VCMPPHZ128rmi: case X86::VCMPPHZ128rri: case X86::VCMPPHZ256rmi: case X86::VCMPPHZ256rri: case X86::VCMPPHZrmi: case X86::VCMPPHZrri: @@ -160,12 +160,12 @@ bool X86ATTInstPrinter::printVecCompareInstr(const MCInst *MI, case X86::VCMPPHZ128rmik: case X86::VCMPPHZ128rrik: case X86::VCMPPHZ256rmik: case X86::VCMPPHZ256rrik: case X86::VCMPPHZrmik: case X86::VCMPPHZrrik: - case X86::VCMPSHZrmi_Intk: case X86::VCMPSHZrri_Intk: + case X86::VCMPSHZrmik_Int: case X86::VCMPSHZrrik_Int: case X86::VCMPPHZ128rmbi: case X86::VCMPPHZ128rmbik: case X86::VCMPPHZ256rmbi: case X86::VCMPPHZ256rmbik: case X86::VCMPPHZrmbi: case X86::VCMPPHZrmbik: case X86::VCMPPHZrrib: case X86::VCMPPHZrribk: - case X86::VCMPSHZrrib_Int: case X86::VCMPSHZrrib_Intk: + case X86::VCMPSHZrrib_Int: case X86::VCMPSHZrribk_Int: case X86::VCMPPBF16Z128rmi: case X86::VCMPPBF16Z128rri: case X86::VCMPPBF16Z256rmi: case X86::VCMPPBF16Z256rri: case X86::VCMPPBF16Zrmi: case X86::VCMPPBF16Zrri: diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86InstComments.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86InstComments.cpp index 9f8bc57fbc76d..681d0dab37d09 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86InstComments.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86InstComments.cpp @@ -40,6 +40,17 @@ using namespace llvm; CASE_MASK_INS_COMMON(Inst, Suffix, src) \ CASE_MASKZ_INS_COMMON(Inst, Suffix, src) +#define CASE_MASK_INS_COMMON_INT(Inst, Suffix, src) \ + case X86::V##Inst##Suffix##src##k_Int: + +#define CASE_MASKZ_INS_COMMON_INT(Inst, Suffix, src) \ + case X86::V##Inst##Suffix##src##kz_Int: + +#define CASE_AVX512_INS_COMMON_INT(Inst, Suffix, src) \ + CASE_AVX_INS_COMMON(Inst, Suffix, src##_Int) \ + CASE_MASK_INS_COMMON_INT(Inst, Suffix, src) \ + CASE_MASKZ_INS_COMMON_INT(Inst, Suffix, src) + #define CASE_FPCLASS_PACKED(Inst, src) \ CASE_AVX_INS_COMMON(Inst, Z, src##i) \ CASE_AVX_INS_COMMON(Inst, Z256, src##i) \ @@ -196,8 +207,8 @@ using namespace llvm; CASE_AVX_INS_COMMON(Inst##SS, , r_Int) \ CASE_AVX_INS_COMMON(Inst##SD, Z, r) \ CASE_AVX_INS_COMMON(Inst##SS, Z, r) \ - CASE_AVX512_INS_COMMON(Inst##SD, Z, r_Int) \ - CASE_AVX512_INS_COMMON(Inst##SS, Z, r_Int) + CASE_AVX512_INS_COMMON_INT(Inst##SD, Z, r) \ + CASE_AVX512_INS_COMMON_INT(Inst##SS, Z, r) #define CASE_FMA_SCALAR_MEM(Inst) \ CASE_AVX_INS_COMMON(Inst##SD, , m) \ @@ -206,8 +217,8 @@ using namespace llvm; CASE_AVX_INS_COMMON(Inst##SS, , m_Int) \ CASE_AVX_INS_COMMON(Inst##SD, Z, m) \ CASE_AVX_INS_COMMON(Inst##SS, Z, m) \ - CASE_AVX512_INS_COMMON(Inst##SD, Z, m_Int) \ - CASE_AVX512_INS_COMMON(Inst##SS, Z, m_Int) + CASE_AVX512_INS_COMMON_INT(Inst##SD, Z, m) \ + CASE_AVX512_INS_COMMON_INT(Inst##SS, Z, m) #define CASE_FMA4(Inst, suf) \ CASE_AVX_INS_COMMON(Inst, 4, suf) \ diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp index fafcc737ff983..01e2d4ace9773 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp @@ -277,8 +277,8 @@ void X86InstPrinterCommon::printCMPMnemonic(const MCInst *MI, bool IsVCmp, case X86::VCMPSDrmi_Int: case X86::VCMPSDrri_Int: case X86::VCMPSDZrmi: case X86::VCMPSDZrri: case X86::VCMPSDZrmi_Int: case X86::VCMPSDZrri_Int: - case X86::VCMPSDZrmi_Intk: case X86::VCMPSDZrri_Intk: - case X86::VCMPSDZrrib_Int: case X86::VCMPSDZrrib_Intk: + case X86::VCMPSDZrmik_Int: case X86::VCMPSDZrrik_Int: + case X86::VCMPSDZrrib_Int: case X86::VCMPSDZrribk_Int: OS << "sd\t"; break; case X86::CMPSSrmi: case X86::CMPSSrri: @@ -287,8 +287,8 @@ void X86InstPrinterCommon::printCMPMnemonic(const MCInst *MI, bool IsVCmp, case X86::VCMPSSrmi_Int: case X86::VCMPSSrri_Int: case X86::VCMPSSZrmi: case X86::VCMPSSZrri: case X86::VCMPSSZrmi_Int: case X86::VCMPSSZrri_Int: - case X86::VCMPSSZrmi_Intk: case X86::VCMPSSZrri_Intk: - case X86::VCMPSSZrrib_Int: case X86::VCMPSSZrrib_Intk: + case X86::VCMPSSZrmik_Int: case X86::VCMPSSZrrik_Int: + case X86::VCMPSSZrrib_Int: case X86::VCMPSSZrribk_Int: OS << "ss\t"; break; case X86::VCMPPHZ128rmi: case X86::VCMPPHZ128rri: @@ -305,8 +305,8 @@ void X86InstPrinterCommon::printCMPMnemonic(const MCInst *MI, bool IsVCmp, break; case X86::VCMPSHZrmi: case X86::VCMPSHZrri: case X86::VCMPSHZrmi_Int: case X86::VCMPSHZrri_Int: - case X86::VCMPSHZrrib_Int: case X86::VCMPSHZrrib_Intk: - case X86::VCMPSHZrmi_Intk: case X86::VCMPSHZrri_Intk: + case X86::VCMPSHZrrib_Int: case X86::VCMPSHZrribk_Int: + case X86::VCMPSHZrmik_Int: case X86::VCMPSHZrrik_Int: OS << "sh\t"; break; case X86::VCMPPBF16Z128rmi: case X86::VCMPPBF16Z128rri: diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp index 680092679c903..c26dc2ca5a7a4 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp @@ -119,8 +119,8 @@ bool X86IntelInstPrinter::printVecCompareInstr(const MCInst *MI, raw_ostream &OS case X86::VCMPPSZ128rmik: case X86::VCMPPSZ128rrik: case X86::VCMPPSZ256rmik: case X86::VCMPPSZ256rrik: case X86::VCMPPSZrmik: case X86::VCMPPSZrrik: - case X86::VCMPSDZrmi_Intk: case X86::VCMPSDZrri_Intk: - case X86::VCMPSSZrmi_Intk: case X86::VCMPSSZrri_Intk: + case X86::VCMPSDZrmik_Int: case X86::VCMPSDZrrik_Int: + case X86::VCMPSSZrmik_Int: case X86::VCMPSSZrrik_Int: case X86::VCMPPDZ128rmbi: case X86::VCMPPDZ128rmbik: case X86::VCMPPDZ256rmbi: case X86::VCMPPDZ256rmbik: case X86::VCMPPDZrmbi: case X86::VCMPPDZrmbik: @@ -129,8 +129,8 @@ bool X86IntelInstPrinter::printVecCompareInstr(const MCInst *MI, raw_ostream &OS case X86::VCMPPSZrmbi: case X86::VCMPPSZrmbik: case X86::VCMPPDZrrib: case X86::VCMPPDZrribk: case X86::VCMPPSZrrib: case X86::VCMPPSZrribk: - case X86::VCMPSDZrrib_Int: case X86::VCMPSDZrrib_Intk: - case X86::VCMPSSZrrib_Int: case X86::VCMPSSZrrib_Intk: + case X86::VCMPSDZrrib_Int: case X86::VCMPSDZrribk_Int: + case X86::VCMPSSZrrib_Int: case X86::VCMPSSZrribk_Int: case X86::VCMPPHZ128rmi: case X86::VCMPPHZ128rri: case X86::VCMPPHZ256rmi: case X86::VCMPPHZ256rri: case X86::VCMPPHZrmi: case X86::VCMPPHZrri: @@ -139,12 +139,12 @@ bool X86IntelInstPrinter::printVecCompareInstr(const MCInst *MI, raw_ostream &OS case X86::VCMPPHZ128rmik: case X86::VCMPPHZ128rrik: case X86::VCMPPHZ256rmik: case X86::VCMPPHZ256rrik: case X86::VCMPPHZrmik: case X86::VCMPPHZrrik: - case X86::VCMPSHZrmi_Intk: case X86::VCMPSHZrri_Intk: + case X86::VCMPSHZrmik_Int: case X86::VCMPSHZrrik_Int: case X86::VCMPPHZ128rmbi: case X86::VCMPPHZ128rmbik: case X86::VCMPPHZ256rmbi: case X86::VCMPPHZ256rmbik: case X86::VCMPPHZrmbi: case X86::VCMPPHZrmbik: case X86::VCMPPHZrrib: case X86::VCMPPHZrribk: - case X86::VCMPSHZrrib_Int: case X86::VCMPSHZrrib_Intk: + case X86::VCMPSHZrrib_Int: case X86::VCMPSHZrribk_Int: case X86::VCMPPBF16Z128rmi: case X86::VCMPPBF16Z128rri: case X86::VCMPPBF16Z256rmi: case X86::VCMPPBF16Z256rri: case X86::VCMPPBF16Zrmi: case X86::VCMPPBF16Zrri: diff --git a/llvm/lib/Target/X86/X86InstrAVX10.td b/llvm/lib/Target/X86/X86InstrAVX10.td index 3bc64eda01a9c..cda6998778bc4 100644 --- a/llvm/lib/Target/X86/X86InstrAVX10.td +++ b/llvm/lib/Target/X86/X86InstrAVX10.td @@ -417,27 +417,30 @@ multiclass avx10_minmax_scalar, Sched<[WriteFMAX.Folded, WriteFMAX.ReadAfterFold]>; } - defm rri_Int : AVX512_maskable<0x53, MRMSrcReg, _, (outs VR128X:$dst), - (ins VR128X:$src1, VR128X:$src2, i32u8imm:$src3), - OpStr, "$src3, $src2, $src1", "$src1, $src2, $src3", - (_.VT (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), - (i32 timm:$src3)))>, + defm rri : AVX512_maskable<0x53, MRMSrcReg, _, (outs VR128X:$dst), + (ins VR128X:$src1, VR128X:$src2, i32u8imm:$src3), + OpStr, "$src3, $src2, $src1", "$src1, $src2, $src3", + (_.VT (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), + (i32 timm:$src3))), + 0, 0, 0, vselect_mask, "", "_Int">, Sched<[WriteFMAX]>; - defm rmi_Int : AVX512_maskable<0x53, MRMSrcMem, _, (outs VR128X:$dst), - (ins VR128X:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3), - OpStr, "$src3, $src2, $src1", "$src1, $src2, $src3", - (_.VT (OpNode (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2), - (i32 timm:$src3)))>, + defm rmi : AVX512_maskable<0x53, MRMSrcMem, _, (outs VR128X:$dst), + (ins VR128X:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3), + OpStr, "$src3, $src2, $src1", "$src1, $src2, $src3", + (_.VT (OpNode (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2), + (i32 timm:$src3))), + 0, 0, 0, vselect_mask, "", "_Int">, Sched<[WriteFMAX.Folded, WriteFMAX.ReadAfterFold]>; } let Uses = [], mayRaiseFPException = 0 in - defm rrib_Int : AVX512_maskable<0x53, MRMSrcReg, _, (outs VR128X:$dst), - (ins VR128X:$src1, VR128X:$src2, i32u8imm:$src3), - OpStr, "$src3, {sae}, $src2, $src1", - "$src1, $src2, {sae}, $src3", - (_.VT (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2), - (i32 timm:$src3)))>, + defm rrib : AVX512_maskable<0x53, MRMSrcReg, _, (outs VR128X:$dst), + (ins VR128X:$src1, VR128X:$src2, i32u8imm:$src3), + OpStr, "$src3, {sae}, $src2, $src1", + "$src1, $src2, {sae}, $src3", + (_.VT (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2), + (i32 timm:$src3))), + 0, 0, 0, vselect_mask, "", "_Int">, Sched<[WriteFMAX]>, EVEX_B; } } diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index e899807cd1b7c..d6ca4b142afe0 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -28,19 +28,20 @@ multiclass AVX512_maskable_custom O, Format F, bit IsCommutable = 0, bit IsKCommutable = 0, bit IsKZCommutable = IsCommutable, - string ClobberConstraint = ""> { + string ClobberConstraint = "", + string Suffix = ""> { let isCommutable = IsCommutable, Constraints = ClobberConstraint in - def NAME: AVX512; + def Suffix: AVX512; // Prefer over VMOV*rrk Pat<> let isCommutable = IsKCommutable in - def NAME#k: AVX512, + def k#Suffix: AVX512, EVEX_K { // In case of the 3src subclass this is overridden with a let. string Constraints = !if(!eq(ClobberConstraint, ""), MaskingConstraint, @@ -52,10 +53,10 @@ multiclass AVX512_maskable_custom O, Format F, // So, it is Ok to use IsCommutable instead of IsKCommutable. let isCommutable = IsKZCommutable, // Prefer over VMOV*rrkz Pat<> Constraints = ClobberConstraint in - def NAME#kz: AVX512, + def kz#Suffix: AVX512, EVEX_KZ; } @@ -72,7 +73,8 @@ multiclass AVX512_maskable_common O, Format F, X86VectorVTInfo _, bit IsCommutable = 0, bit IsKCommutable = 0, bit IsKZCommutable = IsCommutable, - string ClobberConstraint = ""> : + string ClobberConstraint = "", + string Suffix = ""> : AVX512_maskable_custom O, Format F, X86VectorVTInfo _, [(set _.RC:$dst, (Select _.KRCWM:$mask, RHS, _.ImmAllZerosV))], MaskingConstraint, IsCommutable, - IsKCommutable, IsKZCommutable, ClobberConstraint>; + IsKCommutable, IsKZCommutable, ClobberConstraint, + Suffix>; // This multiclass generates the unconditional/non-masking, the masking and // the zero-masking variant of the vector instruction. In the masking case, the @@ -115,23 +118,24 @@ multiclass AVX512_maskable O, Format F, X86VectorVTInfo _, bit IsCommutable = 0, bit IsKCommutable = 0, bit IsKZCommutable = IsCommutable, SDPatternOperator Select = vselect_mask, - string ClobberConstraint = ""> : + string ClobberConstraint = "", + string Suffix = ""> : AVX512_maskable_common; + IsKZCommutable, ClobberConstraint, Suffix>; // This multiclass generates the unconditional/non-masking, the masking and // the zero-masking variant of the scalar instruction. multiclass AVX512_maskable_scalar O, Format F, X86VectorVTInfo _, dag Outs, dag Ins, string OpcodeStr, string AttSrcAsm, string IntelSrcAsm, - dag RHS> : + dag RHS, string Suffix = ""> : AVX512_maskable; + RHS, 0, 0, 0, X86selects_mask, "", Suffix>; // Similar to AVX512_maskable but in this case one of the source operands // ($src1) is already tied to $dst so we just use that for the preserved @@ -144,7 +148,7 @@ multiclass AVX512_maskable_3src O, Format F, X86VectorVTInfo _, bit IsCommutable = 0, bit IsKCommutable = 0, SDPatternOperator Select = vselect_mask, - bit MaskOnly = 0> : + bit MaskOnly = 0, string Suffix = ""> : AVX512_maskable_common O, Format F, X86VectorVTInfo _, OpcodeStr, AttSrcAsm, IntelSrcAsm, !if(MaskOnly, (null_frag), RHS), (Select _.KRCWM:$mask, RHS, _.RC:$src1), - Select, "", IsCommutable, IsKCommutable>; + Select, "", IsCommutable, IsKCommutable, + IsCommutable, "", Suffix>; // Similar to AVX512_maskable_3src but in this case the input VT for the tied // operand differs from the output VT. This requires a bitconvert on @@ -178,10 +183,10 @@ multiclass AVX512_maskable_3src_scalar O, Format F, X86VectorVTInfo _, dag RHS, bit IsCommutable = 0, bit IsKCommutable = 0, - bit MaskOnly = 0> : + bit MaskOnly = 0, string Suffix = ""> : AVX512_maskable_3src; + X86selects_mask, MaskOnly, Suffix>; multiclass AVX512_maskable_in_asm O, Format F, X86VectorVTInfo _, dag Outs, dag Ins, @@ -215,17 +220,18 @@ multiclass AVX512_maskable_custom_cmp O, Format F, string AttSrcAsm, string IntelSrcAsm, list Pattern, list MaskingPattern, - bit IsCommutable = 0> { + bit IsCommutable = 0, + string Suffix = ""> { let isCommutable = IsCommutable in { - def NAME: AVX512; - def NAME#k: AVX512, EVEX_K; + def k#Suffix: AVX512, EVEX_K; } } @@ -235,20 +241,22 @@ multiclass AVX512_maskable_common_cmp O, Format F, X86VectorVTInfo _, string OpcodeStr, string AttSrcAsm, string IntelSrcAsm, dag RHS, dag MaskingRHS, - bit IsCommutable = 0> : + bit IsCommutable = 0, + string Suffix = ""> : AVX512_maskable_custom_cmp; + [(set _.KRC:$dst, MaskingRHS)], IsCommutable, Suffix>; multiclass AVX512_maskable_cmp O, Format F, X86VectorVTInfo _, dag Outs, dag Ins, string OpcodeStr, string AttSrcAsm, string IntelSrcAsm, - dag RHS, dag RHS_su, bit IsCommutable = 0> : + dag RHS, dag RHS_su, bit IsCommutable = 0, + string Suffix = ""> : AVX512_maskable_common_cmp; + (and _.KRCWM:$mask, RHS_su), IsCommutable, Suffix>; // Used by conversion instructions. multiclass AVX512_maskable_cvt O, Format F, X86VectorVTInfo _, @@ -1937,37 +1945,37 @@ defm VPBLENDMW : blendmask_bw<0x66, "vpblendmw", SchedWriteVarBlend, multiclass avx512_cmp_scalar { - defm rri_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _, - (outs _.KRC:$dst), - (ins _.RC:$src1, _.RC:$src2, u8imm:$cc), - "vcmp"#_.Suffix, - "$cc, $src2, $src1", "$src1, $src2, $cc", - (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc), - (OpNode_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc)>, - EVEX, VVVV, VEX_LIG, Sched<[sched]>, SIMD_EXC; + defm rri : AVX512_maskable_cmp<0xC2, MRMSrcReg, _, + (outs _.KRC:$dst), + (ins _.RC:$src1, _.RC:$src2, u8imm:$cc), + "vcmp"#_.Suffix, + "$cc, $src2, $src1", "$src1, $src2, $cc", + (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc), + (OpNode_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc), 0, "_Int">, + EVEX, VVVV, VEX_LIG, Sched<[sched]>, SIMD_EXC; let mayLoad = 1 in - defm rmi_Int : AVX512_maskable_cmp<0xC2, MRMSrcMem, _, - (outs _.KRC:$dst), - (ins _.RC:$src1, _.IntScalarMemOp:$src2, u8imm:$cc), - "vcmp"#_.Suffix, - "$cc, $src2, $src1", "$src1, $src2, $cc", - (OpNode (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2), - timm:$cc), - (OpNode_su (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2), - timm:$cc)>, EVEX, VVVV, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>, - Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; + defm rmi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _, + (outs _.KRC:$dst), + (ins _.RC:$src1, _.IntScalarMemOp:$src2, u8imm:$cc), + "vcmp"#_.Suffix, + "$cc, $src2, $src1", "$src1, $src2, $cc", + (OpNode (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2), + timm:$cc), + (OpNode_su (_.VT _.RC:$src1), (_.ScalarIntMemFrags addr:$src2), + timm:$cc), 0, "_Int">, EVEX, VVVV, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>, + Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; let Uses = [MXCSR] in - defm rrib_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _, - (outs _.KRC:$dst), - (ins _.RC:$src1, _.RC:$src2, u8imm:$cc), - "vcmp"#_.Suffix, - "$cc, {sae}, $src2, $src1","$src1, $src2, {sae}, $cc", - (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2), - timm:$cc), - (OpNodeSAE_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), - timm:$cc)>, - EVEX, VVVV, VEX_LIG, EVEX_B, Sched<[sched]>; + defm rrib : AVX512_maskable_cmp<0xC2, MRMSrcReg, _, + (outs _.KRC:$dst), + (ins _.RC:$src1, _.RC:$src2, u8imm:$cc), + "vcmp"#_.Suffix, + "$cc, {sae}, $src2, $src1","$src1, $src2, {sae}, $cc", + (OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2), + timm:$cc), + (OpNodeSAE_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), + timm:$cc), 0, "_Int">, + EVEX, VVVV, VEX_LIG, EVEX_B, Sched<[sched]>; let isCodeGenOnly = 1 in { let isCommutable = 1 in @@ -5354,17 +5362,17 @@ multiclass avx512_fp_scalar opc, string OpcodeStr,X86VectorVTInfo _, SDPatternOperator OpNode, SDNode VecNode, X86FoldableSchedWrite sched, bit IsCommutable> { let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1 in { - defm rr_Int : AVX512_maskable_scalar, + (_.VT (VecNode _.RC:$src1, _.RC:$src2)), "_Int">, Sched<[sched]>; - defm rm_Int : AVX512_maskable_scalar, + (_.ScalarIntMemFrags addr:$src2))), "_Int">, Sched<[sched.Folded, sched.ReadAfterFold]>; let isCodeGenOnly = 1, Predicates = [HasAVX512] in { def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst), @@ -5387,28 +5395,28 @@ multiclass avx512_fp_scalar opc, string OpcodeStr,X86VectorVTInfo _, multiclass avx512_fp_scalar_round opc, string OpcodeStr,X86VectorVTInfo _, SDNode VecNode, X86FoldableSchedWrite sched> { let ExeDomain = _.ExeDomain, Uses = [MXCSR] in - defm rrb_Int : AVX512_maskable_scalar, + (i32 timm:$rc)), "_Int">, EVEX_B, EVEX_RC, Sched<[sched]>; } multiclass avx512_fp_scalar_sae opc, string OpcodeStr,X86VectorVTInfo _, SDPatternOperator OpNode, SDNode VecNode, SDNode SaeNode, X86FoldableSchedWrite sched, bit IsCommutable> { let ExeDomain = _.ExeDomain in { - defm rr_Int : AVX512_maskable_scalar, + (_.VT (VecNode _.RC:$src1, _.RC:$src2)), "_Int">, Sched<[sched]>, SIMD_EXC; - defm rm_Int : AVX512_maskable_scalar, + (_.ScalarIntMemFrags addr:$src2))), "_Int">, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; let isCodeGenOnly = 1, Predicates = [HasAVX512], @@ -5429,10 +5437,10 @@ multiclass avx512_fp_scalar_sae opc, string OpcodeStr,X86VectorVTInfo _, } let Uses = [MXCSR] in - defm rrb_Int : AVX512_maskable_scalar, + (SaeNode (_.VT _.RC:$src1), (_.VT _.RC:$src2)), "_Int">, EVEX_B, Sched<[sched]>; } } @@ -6835,22 +6843,22 @@ defm VFNMSUB132 : avx512_fma3p_132_f<0x9E, "vfnmsub132", X86any_Fnmsub, multiclass avx512_fma3s_common opc, string OpcodeStr, X86VectorVTInfo _, dag RHS_r, dag RHS_m, dag RHS_b, bit MaskOnlyReg> { let Constraints = "$src1 = $dst", hasSideEffects = 0 in { - defm r_Int: AVX512_maskable_3src_scalar, + "$src3, $src2", "$src2, $src3", (null_frag), 1, 1, 0, "_Int">, EVEX, VVVV, Sched<[SchedWriteFMA.Scl]>, SIMD_EXC; let mayLoad = 1 in - defm m_Int: AVX512_maskable_3src_scalar, + "$src3, $src2", "$src2, $src3", (null_frag), 1, 1, 0, "_Int">, EVEX, VVVV, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold, SchedWriteFMA.Scl.ReadAfterFold]>, SIMD_EXC; let Uses = [MXCSR] in - defm rb_Int: AVX512_maskable_3src_scalar, + OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", (null_frag), 1, 1, 0, "_Int">, EVEX, VVVV, EVEX_B, EVEX_RC, Sched<[SchedWriteFMA.Scl]>; let isCodeGenOnly = 1, isCommutable = 1 in { @@ -6982,7 +6990,7 @@ multiclass avx512_scalar_fma_patterns(Prefix#"213"#Suffix#"Zr_Intk") + (!cast(Prefix#"213"#Suffix#"Zrk_Int") VR128X:$src1, VK1WM:$mask, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>; @@ -6993,7 +7001,7 @@ multiclass avx512_scalar_fma_patterns(Prefix#"213"#Suffix#"Zm_Intk") + (!cast(Prefix#"213"#Suffix#"Zmk_Int") VR128X:$src1, VK1WM:$mask, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>; @@ -7002,7 +7010,7 @@ multiclass avx512_scalar_fma_patterns(Prefix#"132"#Suffix#"Zm_Intk") + (!cast(Prefix#"132"#Suffix#"Zmk_Int") VR128X:$src1, VK1WM:$mask, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>; @@ -7011,7 +7019,7 @@ multiclass avx512_scalar_fma_patterns(Prefix#"231"#Suffix#"Zr_Intk") + (!cast(Prefix#"231"#Suffix#"Zrk_Int") VR128X:$src1, VK1WM:$mask, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>; @@ -7021,7 +7029,7 @@ multiclass avx512_scalar_fma_patterns(Prefix#"231"#Suffix#"Zm_Intk") + (!cast(Prefix#"231"#Suffix#"Zmk_Int") VR128X:$src1, VK1WM:$mask, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>; @@ -7031,7 +7039,7 @@ multiclass avx512_scalar_fma_patterns(Prefix#"213"#Suffix#"Zr_Intkz") + (!cast(Prefix#"213"#Suffix#"Zrkz_Int") VR128X:$src1, VK1WM:$mask, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>; @@ -7041,7 +7049,7 @@ multiclass avx512_scalar_fma_patterns(Prefix#"231"#Suffix#"Zr_Intkz") + (!cast(Prefix#"231"#Suffix#"Zrkz_Int") VR128X:$src1, VK1WM:$mask, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)))>; @@ -7052,7 +7060,7 @@ multiclass avx512_scalar_fma_patterns(Prefix#"213"#Suffix#"Zm_Intkz") + (!cast(Prefix#"213"#Suffix#"Zmkz_Int") VR128X:$src1, VK1WM:$mask, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>; @@ -7061,7 +7069,7 @@ multiclass avx512_scalar_fma_patterns(Prefix#"132"#Suffix#"Zm_Intkz") + (!cast(Prefix#"132"#Suffix#"Zmkz_Int") VR128X:$src1, VK1WM:$mask, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>; @@ -7070,7 +7078,7 @@ multiclass avx512_scalar_fma_patterns(Prefix#"231"#Suffix#"Zm_Intkz") + (!cast(Prefix#"231"#Suffix#"Zmkz_Int") VR128X:$src1, VK1WM:$mask, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), addr:$src3)>; @@ -7097,7 +7105,7 @@ multiclass avx512_scalar_fma_patterns(Prefix#"213"#Suffix#"Zrb_Intk") + (!cast(Prefix#"213"#Suffix#"Zrbk_Int") VR128X:$src1, VK1WM:$mask, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>; @@ -7108,7 +7116,7 @@ multiclass avx512_scalar_fma_patterns(Prefix#"231"#Suffix#"Zrb_Intk") + (!cast(Prefix#"231"#Suffix#"Zrbk_Int") VR128X:$src1, VK1WM:$mask, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>; @@ -7119,7 +7127,7 @@ multiclass avx512_scalar_fma_patterns(Prefix#"213"#Suffix#"Zrb_Intkz") + (!cast(Prefix#"213"#Suffix#"Zrbkz_Int") VR128X:$src1, VK1WM:$mask, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>; @@ -7130,7 +7138,7 @@ multiclass avx512_scalar_fma_patterns(Prefix#"231"#Suffix#"Zrb_Intkz") + (!cast(Prefix#"231"#Suffix#"Zrbkz_Int") VR128X:$src1, VK1WM:$mask, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)), (_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>; @@ -7628,17 +7636,17 @@ let Uses = [MXCSR], mayRaiseFPException = 1 in multiclass avx512_cvt_fp_scalar opc, string OpcodeStr, X86VectorVTInfo _, X86VectorVTInfo _Src, SDNode OpNode, X86FoldableSchedWrite sched> { - defm rr_Int : AVX512_maskable_scalar, + (_Src.VT _Src.RC:$src2))), "_Int">, EVEX, VVVV, VEX_LIG, Sched<[sched]>; - defm rm_Int : AVX512_maskable_scalar, + (_Src.ScalarIntMemFrags addr:$src2))), "_Int">, EVEX, VVVV, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>; @@ -7660,11 +7668,11 @@ multiclass avx512_cvt_fp_sae_scalar opc, string OpcodeStr, X86VectorVTIn X86VectorVTInfo _Src, SDNode OpNodeSAE, X86FoldableSchedWrite sched> { let Uses = [MXCSR] in - defm rrb_Int : AVX512_maskable_scalar, + (_Src.VT _Src.RC:$src2))), "_Int">, EVEX, VVVV, VEX_LIG, EVEX_B, Sched<[sched]>; } @@ -7673,11 +7681,11 @@ multiclass avx512_cvt_fp_rc_scalar opc, string OpcodeStr, X86VectorVTInf X86VectorVTInfo _Src, SDNode OpNodeRnd, X86FoldableSchedWrite sched> { let Uses = [MXCSR] in - defm rrb_Int : AVX512_maskable_scalar, + (_Src.VT _Src.RC:$src2), (i32 timm:$rc))), "_Int">, EVEX, VVVV, VEX_LIG, Sched<[sched]>, EVEX_B, EVEX_RC; } @@ -9531,25 +9539,25 @@ multiclass avx512_sqrt_packed_all_round opc, string OpcodeStr, multiclass avx512_sqrt_scalar opc, string OpcodeStr, X86FoldableSchedWrite sched, X86VectorVTInfo _, string Name, Predicate prd = HasAVX512> { let ExeDomain = _.ExeDomain, Predicates = [prd] in { - defm r_Int : AVX512_maskable_scalar, + (_.VT _.RC:$src2)), "_Int">, Sched<[sched]>, SIMD_EXC; - defm m_Int : AVX512_maskable_scalar, + (_.ScalarIntMemFrags addr:$src2)), "_Int">, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; let Uses = [MXCSR] in - defm rb_Int : AVX512_maskable_scalar, + (i32 timm:$rc)), "_Int">, EVEX_B, EVEX_RC, Sched<[sched]>; let isCodeGenOnly = 1, hasSideEffects = 0 in { @@ -9596,27 +9604,27 @@ defm VSQRT : avx512_sqrt_scalar_all<0x51, "vsqrt", SchedWriteFSqrtSizes>, VEX_LI multiclass avx512_rndscale_scalar opc, string OpcodeStr, X86FoldableSchedWrite sched, X86VectorVTInfo _> { let ExeDomain = _.ExeDomain in { - defm rri_Int : AVX512_maskable_scalar, + (i32 timm:$src3))), "_Int">, Sched<[sched]>, SIMD_EXC; let Uses = [MXCSR] in - defm rrib_Int : AVX512_maskable_scalar, EVEX_B, + (i32 timm:$src3))), "_Int">, EVEX_B, Sched<[sched]>; - defm rmi_Int : AVX512_maskable_scalar, + (_.ScalarIntMemFrags addr:$src2), (i32 timm:$src3))), "_Int">, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; let isCodeGenOnly = 1, hasSideEffects = 0, Predicates = [HasAVX512] in { @@ -9669,13 +9677,13 @@ multiclass avx512_masked_scalar("V"#OpcPrefix#r_Intk) + (!cast("V"#OpcPrefix#rk_Int) _.VT:$dst, OutMask, _.VT:$src2, _.VT:$src1)>; def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects_mask Mask, (OpNode (extractelt _.VT:$src2, (iPTR 0))), ZeroFP))), - (!cast("V"#OpcPrefix#r_Intkz) + (!cast("V"#OpcPrefix#rkz_Int) OutMask, _.VT:$src2, _.VT:$src1)>; } } @@ -12174,7 +12182,7 @@ multiclass AVX512_scalar_math_fp_patterns("V"#OpcPrefix#"Zrr_Intk") + (!cast("V"#OpcPrefix#"Zrrk_Int") (_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)), VK1WM:$mask, _.VT:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>; @@ -12185,7 +12193,7 @@ multiclass AVX512_scalar_math_fp_patterns("V"#OpcPrefix#"Zrm_Intk") + (!cast("V"#OpcPrefix#"Zrmk_Int") (_.VT (COPY_TO_REGCLASS _.FRC:$src0, VR128X)), VK1WM:$mask, _.VT:$src1, addr:$src2)>; @@ -12196,7 +12204,7 @@ multiclass AVX512_scalar_math_fp_patterns("V"#OpcPrefix#"Zrr_Intkz") + (!cast("V"#OpcPrefix#"Zrrkz_Int") VK1WM:$mask, _.VT:$src1, (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>; def : Pat<(MoveNode (_.VT VR128X:$src1), @@ -12205,7 +12213,7 @@ multiclass AVX512_scalar_math_fp_patterns("V"#OpcPrefix#"Zrm_Intkz") VK1WM:$mask, _.VT:$src1, addr:$src2)>; + (!cast("V"#OpcPrefix#"Zrmkz_Int") VK1WM:$mask, _.VT:$src1, addr:$src2)>; } } diff --git a/llvm/lib/Target/X86/X86InstrFMA3Info.cpp b/llvm/lib/Target/X86/X86InstrFMA3Info.cpp index 090ec687d28c4..0da4857d66748 100644 --- a/llvm/lib/Target/X86/X86InstrFMA3Info.cpp +++ b/llvm/lib/Target/X86/X86InstrFMA3Info.cpp @@ -27,6 +27,11 @@ using namespace llvm; FMA3GROUP(Name, Suf##k, Attrs | X86InstrFMA3Group::KMergeMasked) \ FMA3GROUP(Name, Suf##kz, Attrs | X86InstrFMA3Group::KZeroMasked) +#define FMA3GROUP_MASKED_INT(Name, Suf, Attrs) \ + FMA3GROUP(Name, Suf##_Int, Attrs) \ + FMA3GROUP(Name, Suf##k_Int, Attrs | X86InstrFMA3Group::KMergeMasked) \ + FMA3GROUP(Name, Suf##kz_Int, Attrs | X86InstrFMA3Group::KZeroMasked) + #define FMA3GROUP_PACKED_WIDTHS_Z(Name, Suf, Attrs) \ FMA3GROUP_MASKED(Name, Suf##Z128m, Attrs) \ FMA3GROUP_MASKED(Name, Suf##Z128r, Attrs) \ @@ -52,9 +57,9 @@ using namespace llvm; #define FMA3GROUP_SCALAR_WIDTHS_Z(Name, Suf, Attrs) \ FMA3GROUP(Name, Suf##Zm, Attrs) \ - FMA3GROUP_MASKED(Name, Suf##Zm_Int, Attrs | X86InstrFMA3Group::Intrinsic) \ + FMA3GROUP_MASKED_INT(Name, Suf##Zm, Attrs | X86InstrFMA3Group::Intrinsic) \ FMA3GROUP(Name, Suf##Zr, Attrs) \ - FMA3GROUP_MASKED(Name, Suf##Zr_Int, Attrs | X86InstrFMA3Group::Intrinsic) \ + FMA3GROUP_MASKED_INT(Name, Suf##Zr, Attrs | X86InstrFMA3Group::Intrinsic) \ #define FMA3GROUP_SCALAR_WIDTHS_ALL(Name, Suf, Attrs) \ FMA3GROUP_SCALAR_WIDTHS_Z(Name, Suf, Attrs) \ @@ -108,11 +113,11 @@ static const X86InstrFMA3Group Groups[] = { #define FMA3GROUP_SCALAR_AVX512_ROUND(Name, Suf, Attrs) \ FMA3GROUP(Name, SDZ##Suf, Attrs) \ - FMA3GROUP_MASKED(Name, SDZ##Suf##_Int, Attrs) \ + FMA3GROUP_MASKED_INT(Name, SDZ##Suf, Attrs) \ FMA3GROUP(Name, SHZ##Suf, Attrs) \ - FMA3GROUP_MASKED(Name, SHZ##Suf##_Int, Attrs) \ + FMA3GROUP_MASKED_INT(Name, SHZ##Suf, Attrs) \ FMA3GROUP(Name, SSZ##Suf, Attrs) \ - FMA3GROUP_MASKED(Name, SSZ##Suf##_Int, Attrs) + FMA3GROUP_MASKED_INT(Name, SSZ##Suf, Attrs) static const X86InstrFMA3Group BroadcastGroups[] = { FMA3GROUP_PACKED_AVX512_ALL(VFMADD, mb, 0) diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index 5a6ea1182ccb8..30a5161bbcc50 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -7646,8 +7646,8 @@ static bool isNonFoldablePartialRegisterLoad(const MachineInstr &LoadMI, case X86::CVTSS2SDrr_Int: case X86::VCVTSS2SDrr_Int: case X86::VCVTSS2SDZrr_Int: - case X86::VCVTSS2SDZrr_Intk: - case X86::VCVTSS2SDZrr_Intkz: + case X86::VCVTSS2SDZrrk_Int: + case X86::VCVTSS2SDZrrkz_Int: case X86::CVTSS2SIrr_Int: case X86::CVTSS2SI64rr_Int: case X86::VCVTSS2SIrr_Int: @@ -7700,21 +7700,21 @@ static bool isNonFoldablePartialRegisterLoad(const MachineInstr &LoadMI, case X86::SUBSSrr_Int: case X86::VSUBSSrr_Int: case X86::VSUBSSZrr_Int: - case X86::VADDSSZrr_Intk: - case X86::VADDSSZrr_Intkz: - case X86::VCMPSSZrri_Intk: - case X86::VDIVSSZrr_Intk: - case X86::VDIVSSZrr_Intkz: - case X86::VMAXSSZrr_Intk: - case X86::VMAXSSZrr_Intkz: - case X86::VMINSSZrr_Intk: - case X86::VMINSSZrr_Intkz: - case X86::VMULSSZrr_Intk: - case X86::VMULSSZrr_Intkz: - case X86::VSQRTSSZr_Intk: - case X86::VSQRTSSZr_Intkz: - case X86::VSUBSSZrr_Intk: - case X86::VSUBSSZrr_Intkz: + case X86::VADDSSZrrk_Int: + case X86::VADDSSZrrkz_Int: + case X86::VCMPSSZrrik_Int: + case X86::VDIVSSZrrk_Int: + case X86::VDIVSSZrrkz_Int: + case X86::VMAXSSZrrk_Int: + case X86::VMAXSSZrrkz_Int: + case X86::VMINSSZrrk_Int: + case X86::VMINSSZrrkz_Int: + case X86::VMULSSZrrk_Int: + case X86::VMULSSZrrkz_Int: + case X86::VSQRTSSZrk_Int: + case X86::VSQRTSSZrkz_Int: + case X86::VSUBSSZrrk_Int: + case X86::VSUBSSZrrkz_Int: case X86::VFMADDSS4rr_Int: case X86::VFNMADDSS4rr_Int: case X86::VFMSUBSS4rr_Int: @@ -7743,30 +7743,30 @@ static bool isNonFoldablePartialRegisterLoad(const MachineInstr &LoadMI, case X86::VFNMSUB213SSZr_Int: case X86::VFMSUB231SSZr_Int: case X86::VFNMSUB231SSZr_Int: - case X86::VFMADD132SSZr_Intk: - case X86::VFNMADD132SSZr_Intk: - case X86::VFMADD213SSZr_Intk: - case X86::VFNMADD213SSZr_Intk: - case X86::VFMADD231SSZr_Intk: - case X86::VFNMADD231SSZr_Intk: - case X86::VFMSUB132SSZr_Intk: - case X86::VFNMSUB132SSZr_Intk: - case X86::VFMSUB213SSZr_Intk: - case X86::VFNMSUB213SSZr_Intk: - case X86::VFMSUB231SSZr_Intk: - case X86::VFNMSUB231SSZr_Intk: - case X86::VFMADD132SSZr_Intkz: - case X86::VFNMADD132SSZr_Intkz: - case X86::VFMADD213SSZr_Intkz: - case X86::VFNMADD213SSZr_Intkz: - case X86::VFMADD231SSZr_Intkz: - case X86::VFNMADD231SSZr_Intkz: - case X86::VFMSUB132SSZr_Intkz: - case X86::VFNMSUB132SSZr_Intkz: - case X86::VFMSUB213SSZr_Intkz: - case X86::VFNMSUB213SSZr_Intkz: - case X86::VFMSUB231SSZr_Intkz: - case X86::VFNMSUB231SSZr_Intkz: + case X86::VFMADD132SSZrk_Int: + case X86::VFNMADD132SSZrk_Int: + case X86::VFMADD213SSZrk_Int: + case X86::VFNMADD213SSZrk_Int: + case X86::VFMADD231SSZrk_Int: + case X86::VFNMADD231SSZrk_Int: + case X86::VFMSUB132SSZrk_Int: + case X86::VFNMSUB132SSZrk_Int: + case X86::VFMSUB213SSZrk_Int: + case X86::VFNMSUB213SSZrk_Int: + case X86::VFMSUB231SSZrk_Int: + case X86::VFNMSUB231SSZrk_Int: + case X86::VFMADD132SSZrkz_Int: + case X86::VFNMADD132SSZrkz_Int: + case X86::VFMADD213SSZrkz_Int: + case X86::VFNMADD213SSZrkz_Int: + case X86::VFMADD231SSZrkz_Int: + case X86::VFNMADD231SSZrkz_Int: + case X86::VFMSUB132SSZrkz_Int: + case X86::VFNMSUB132SSZrkz_Int: + case X86::VFMSUB213SSZrkz_Int: + case X86::VFNMSUB213SSZrkz_Int: + case X86::VFMSUB231SSZrkz_Int: + case X86::VFNMSUB231SSZrkz_Int: case X86::VFIXUPIMMSSZrri: case X86::VFIXUPIMMSSZrrik: case X86::VFIXUPIMMSSZrrikz: @@ -7791,8 +7791,8 @@ static bool isNonFoldablePartialRegisterLoad(const MachineInstr &LoadMI, case X86::VREDUCESSZrrik: case X86::VREDUCESSZrrikz: case X86::VRNDSCALESSZrri_Int: - case X86::VRNDSCALESSZrri_Intk: - case X86::VRNDSCALESSZrri_Intkz: + case X86::VRNDSCALESSZrrik_Int: + case X86::VRNDSCALESSZrrikz_Int: case X86::VRSQRT14SSZrr: case X86::VRSQRT14SSZrrk: case X86::VRSQRT14SSZrrkz: @@ -7819,8 +7819,8 @@ static bool isNonFoldablePartialRegisterLoad(const MachineInstr &LoadMI, case X86::CVTSD2SSrr_Int: case X86::VCVTSD2SSrr_Int: case X86::VCVTSD2SSZrr_Int: - case X86::VCVTSD2SSZrr_Intk: - case X86::VCVTSD2SSZrr_Intkz: + case X86::VCVTSD2SSZrrk_Int: + case X86::VCVTSD2SSZrrkz_Int: case X86::CVTSD2SIrr_Int: case X86::CVTSD2SI64rr_Int: case X86::VCVTSD2SIrr_Int: @@ -7869,21 +7869,21 @@ static bool isNonFoldablePartialRegisterLoad(const MachineInstr &LoadMI, case X86::SUBSDrr_Int: case X86::VSUBSDrr_Int: case X86::VSUBSDZrr_Int: - case X86::VADDSDZrr_Intk: - case X86::VADDSDZrr_Intkz: - case X86::VCMPSDZrri_Intk: - case X86::VDIVSDZrr_Intk: - case X86::VDIVSDZrr_Intkz: - case X86::VMAXSDZrr_Intk: - case X86::VMAXSDZrr_Intkz: - case X86::VMINSDZrr_Intk: - case X86::VMINSDZrr_Intkz: - case X86::VMULSDZrr_Intk: - case X86::VMULSDZrr_Intkz: - case X86::VSQRTSDZr_Intk: - case X86::VSQRTSDZr_Intkz: - case X86::VSUBSDZrr_Intk: - case X86::VSUBSDZrr_Intkz: + case X86::VADDSDZrrk_Int: + case X86::VADDSDZrrkz_Int: + case X86::VCMPSDZrrik_Int: + case X86::VDIVSDZrrk_Int: + case X86::VDIVSDZrrkz_Int: + case X86::VMAXSDZrrk_Int: + case X86::VMAXSDZrrkz_Int: + case X86::VMINSDZrrk_Int: + case X86::VMINSDZrrkz_Int: + case X86::VMULSDZrrk_Int: + case X86::VMULSDZrrkz_Int: + case X86::VSQRTSDZrk_Int: + case X86::VSQRTSDZrkz_Int: + case X86::VSUBSDZrrk_Int: + case X86::VSUBSDZrrkz_Int: case X86::VFMADDSD4rr_Int: case X86::VFNMADDSD4rr_Int: case X86::VFMSUBSD4rr_Int: @@ -7912,30 +7912,30 @@ static bool isNonFoldablePartialRegisterLoad(const MachineInstr &LoadMI, case X86::VFNMSUB213SDZr_Int: case X86::VFMSUB231SDZr_Int: case X86::VFNMSUB231SDZr_Int: - case X86::VFMADD132SDZr_Intk: - case X86::VFNMADD132SDZr_Intk: - case X86::VFMADD213SDZr_Intk: - case X86::VFNMADD213SDZr_Intk: - case X86::VFMADD231SDZr_Intk: - case X86::VFNMADD231SDZr_Intk: - case X86::VFMSUB132SDZr_Intk: - case X86::VFNMSUB132SDZr_Intk: - case X86::VFMSUB213SDZr_Intk: - case X86::VFNMSUB213SDZr_Intk: - case X86::VFMSUB231SDZr_Intk: - case X86::VFNMSUB231SDZr_Intk: - case X86::VFMADD132SDZr_Intkz: - case X86::VFNMADD132SDZr_Intkz: - case X86::VFMADD213SDZr_Intkz: - case X86::VFNMADD213SDZr_Intkz: - case X86::VFMADD231SDZr_Intkz: - case X86::VFNMADD231SDZr_Intkz: - case X86::VFMSUB132SDZr_Intkz: - case X86::VFNMSUB132SDZr_Intkz: - case X86::VFMSUB213SDZr_Intkz: - case X86::VFNMSUB213SDZr_Intkz: - case X86::VFMSUB231SDZr_Intkz: - case X86::VFNMSUB231SDZr_Intkz: + case X86::VFMADD132SDZrk_Int: + case X86::VFNMADD132SDZrk_Int: + case X86::VFMADD213SDZrk_Int: + case X86::VFNMADD213SDZrk_Int: + case X86::VFMADD231SDZrk_Int: + case X86::VFNMADD231SDZrk_Int: + case X86::VFMSUB132SDZrk_Int: + case X86::VFNMSUB132SDZrk_Int: + case X86::VFMSUB213SDZrk_Int: + case X86::VFNMSUB213SDZrk_Int: + case X86::VFMSUB231SDZrk_Int: + case X86::VFNMSUB231SDZrk_Int: + case X86::VFMADD132SDZrkz_Int: + case X86::VFNMADD132SDZrkz_Int: + case X86::VFMADD213SDZrkz_Int: + case X86::VFNMADD213SDZrkz_Int: + case X86::VFMADD231SDZrkz_Int: + case X86::VFNMADD231SDZrkz_Int: + case X86::VFMSUB132SDZrkz_Int: + case X86::VFNMSUB132SDZrkz_Int: + case X86::VFMSUB213SDZrkz_Int: + case X86::VFNMSUB213SDZrkz_Int: + case X86::VFMSUB231SDZrkz_Int: + case X86::VFNMSUB231SDZrkz_Int: case X86::VFIXUPIMMSDZrri: case X86::VFIXUPIMMSDZrrik: case X86::VFIXUPIMMSDZrrikz: @@ -7960,8 +7960,8 @@ static bool isNonFoldablePartialRegisterLoad(const MachineInstr &LoadMI, case X86::VREDUCESDZrrik: case X86::VREDUCESDZrrikz: case X86::VRNDSCALESDZrri_Int: - case X86::VRNDSCALESDZrri_Intk: - case X86::VRNDSCALESDZrri_Intkz: + case X86::VRNDSCALESDZrrik_Int: + case X86::VRNDSCALESDZrrikz_Int: case X86::VRSQRT14SDZrr: case X86::VRSQRT14SDZrrk: case X86::VRSQRT14SDZrrkz: @@ -7989,19 +7989,19 @@ static bool isNonFoldablePartialRegisterLoad(const MachineInstr &LoadMI, case X86::VMINSHZrr_Int: case X86::VMULSHZrr_Int: case X86::VSUBSHZrr_Int: - case X86::VADDSHZrr_Intk: - case X86::VADDSHZrr_Intkz: - case X86::VCMPSHZrri_Intk: - case X86::VDIVSHZrr_Intk: - case X86::VDIVSHZrr_Intkz: - case X86::VMAXSHZrr_Intk: - case X86::VMAXSHZrr_Intkz: - case X86::VMINSHZrr_Intk: - case X86::VMINSHZrr_Intkz: - case X86::VMULSHZrr_Intk: - case X86::VMULSHZrr_Intkz: - case X86::VSUBSHZrr_Intk: - case X86::VSUBSHZrr_Intkz: + case X86::VADDSHZrrk_Int: + case X86::VADDSHZrrkz_Int: + case X86::VCMPSHZrrik_Int: + case X86::VDIVSHZrrk_Int: + case X86::VDIVSHZrrkz_Int: + case X86::VMAXSHZrrk_Int: + case X86::VMAXSHZrrkz_Int: + case X86::VMINSHZrrk_Int: + case X86::VMINSHZrrkz_Int: + case X86::VMULSHZrrk_Int: + case X86::VMULSHZrrkz_Int: + case X86::VSUBSHZrrk_Int: + case X86::VSUBSHZrrkz_Int: case X86::VFMADD132SHZr_Int: case X86::VFNMADD132SHZr_Int: case X86::VFMADD213SHZr_Int: @@ -8014,30 +8014,30 @@ static bool isNonFoldablePartialRegisterLoad(const MachineInstr &LoadMI, case X86::VFNMSUB213SHZr_Int: case X86::VFMSUB231SHZr_Int: case X86::VFNMSUB231SHZr_Int: - case X86::VFMADD132SHZr_Intk: - case X86::VFNMADD132SHZr_Intk: - case X86::VFMADD213SHZr_Intk: - case X86::VFNMADD213SHZr_Intk: - case X86::VFMADD231SHZr_Intk: - case X86::VFNMADD231SHZr_Intk: - case X86::VFMSUB132SHZr_Intk: - case X86::VFNMSUB132SHZr_Intk: - case X86::VFMSUB213SHZr_Intk: - case X86::VFNMSUB213SHZr_Intk: - case X86::VFMSUB231SHZr_Intk: - case X86::VFNMSUB231SHZr_Intk: - case X86::VFMADD132SHZr_Intkz: - case X86::VFNMADD132SHZr_Intkz: - case X86::VFMADD213SHZr_Intkz: - case X86::VFNMADD213SHZr_Intkz: - case X86::VFMADD231SHZr_Intkz: - case X86::VFNMADD231SHZr_Intkz: - case X86::VFMSUB132SHZr_Intkz: - case X86::VFNMSUB132SHZr_Intkz: - case X86::VFMSUB213SHZr_Intkz: - case X86::VFNMSUB213SHZr_Intkz: - case X86::VFMSUB231SHZr_Intkz: - case X86::VFNMSUB231SHZr_Intkz: + case X86::VFMADD132SHZrk_Int: + case X86::VFNMADD132SHZrk_Int: + case X86::VFMADD213SHZrk_Int: + case X86::VFNMADD213SHZrk_Int: + case X86::VFMADD231SHZrk_Int: + case X86::VFNMADD231SHZrk_Int: + case X86::VFMSUB132SHZrk_Int: + case X86::VFNMSUB132SHZrk_Int: + case X86::VFMSUB213SHZrk_Int: + case X86::VFNMSUB213SHZrk_Int: + case X86::VFMSUB231SHZrk_Int: + case X86::VFNMSUB231SHZrk_Int: + case X86::VFMADD132SHZrkz_Int: + case X86::VFNMADD132SHZrkz_Int: + case X86::VFMADD213SHZrkz_Int: + case X86::VFNMADD213SHZrkz_Int: + case X86::VFMADD231SHZrkz_Int: + case X86::VFNMADD231SHZrkz_Int: + case X86::VFMSUB132SHZrkz_Int: + case X86::VFNMSUB132SHZrkz_Int: + case X86::VFMSUB213SHZrkz_Int: + case X86::VFNMSUB213SHZrkz_Int: + case X86::VFMSUB231SHZrkz_Int: + case X86::VFNMSUB231SHZrkz_Int: return false; default: return true; @@ -9489,25 +9489,25 @@ bool X86InstrInfo::isHighLatencyDef(int opc) const { case X86::VDIVSDZrm: case X86::VDIVSDZrr: case X86::VDIVSDZrm_Int: - case X86::VDIVSDZrm_Intk: - case X86::VDIVSDZrm_Intkz: + case X86::VDIVSDZrmk_Int: + case X86::VDIVSDZrmkz_Int: case X86::VDIVSDZrr_Int: - case X86::VDIVSDZrr_Intk: - case X86::VDIVSDZrr_Intkz: + case X86::VDIVSDZrrk_Int: + case X86::VDIVSDZrrkz_Int: case X86::VDIVSDZrrb_Int: - case X86::VDIVSDZrrb_Intk: - case X86::VDIVSDZrrb_Intkz: + case X86::VDIVSDZrrbk_Int: + case X86::VDIVSDZrrbkz_Int: case X86::VDIVSSZrm: case X86::VDIVSSZrr: case X86::VDIVSSZrm_Int: - case X86::VDIVSSZrm_Intk: - case X86::VDIVSSZrm_Intkz: + case X86::VDIVSSZrmk_Int: + case X86::VDIVSSZrmkz_Int: case X86::VDIVSSZrr_Int: - case X86::VDIVSSZrr_Intk: - case X86::VDIVSSZrr_Intkz: + case X86::VDIVSSZrrk_Int: + case X86::VDIVSSZrrkz_Int: case X86::VDIVSSZrrb_Int: - case X86::VDIVSSZrrb_Intk: - case X86::VDIVSSZrrb_Intkz: + case X86::VDIVSSZrrbk_Int: + case X86::VDIVSSZrrbkz_Int: case X86::VSQRTPDZ128m: case X86::VSQRTPDZ128mb: case X86::VSQRTPDZ128mbk: @@ -9570,26 +9570,26 @@ bool X86InstrInfo::isHighLatencyDef(int opc) const { case X86::VSQRTPSZrkz: case X86::VSQRTSDZm: case X86::VSQRTSDZm_Int: - case X86::VSQRTSDZm_Intk: - case X86::VSQRTSDZm_Intkz: + case X86::VSQRTSDZmk_Int: + case X86::VSQRTSDZmkz_Int: case X86::VSQRTSDZr: case X86::VSQRTSDZr_Int: - case X86::VSQRTSDZr_Intk: - case X86::VSQRTSDZr_Intkz: + case X86::VSQRTSDZrk_Int: + case X86::VSQRTSDZrkz_Int: case X86::VSQRTSDZrb_Int: - case X86::VSQRTSDZrb_Intk: - case X86::VSQRTSDZrb_Intkz: + case X86::VSQRTSDZrbk_Int: + case X86::VSQRTSDZrbkz_Int: case X86::VSQRTSSZm: case X86::VSQRTSSZm_Int: - case X86::VSQRTSSZm_Intk: - case X86::VSQRTSSZm_Intkz: + case X86::VSQRTSSZmk_Int: + case X86::VSQRTSSZmkz_Int: case X86::VSQRTSSZr: case X86::VSQRTSSZr_Int: - case X86::VSQRTSSZr_Intk: - case X86::VSQRTSSZr_Intkz: + case X86::VSQRTSSZrk_Int: + case X86::VSQRTSSZrkz_Int: case X86::VSQRTSSZrb_Int: - case X86::VSQRTSSZrb_Intk: - case X86::VSQRTSSZrb_Intkz: + case X86::VSQRTSSZrbk_Int: + case X86::VSQRTSSZrbkz_Int: case X86::VGATHERDPDYrm: case X86::VGATHERDPDZ128rm: diff --git a/llvm/lib/Target/X86/X86SchedSapphireRapids.td b/llvm/lib/Target/X86/X86SchedSapphireRapids.td index e04ff68d278b2..4f0d3669a311d 100644 --- a/llvm/lib/Target/X86/X86SchedSapphireRapids.td +++ b/llvm/lib/Target/X86/X86SchedSapphireRapids.td @@ -669,7 +669,7 @@ def : InstRW<[SPRWriteResGroup12], (instregex "^ADD_F(P?)rST0$", "^VALIGN(D|Q)Z256rri((k|kz)?)$", "^VCMPP(D|H|S)Z(128|256)rri(k?)$", "^VCMPS(D|H|S)Zrri$", - "^VCMPS(D|H|S)Zrr(b?)i_Int(k?)$", + "^VCMPS(D|H|S)Zrr(b?)i(k?)_Int$", "^VFPCLASSP(D|H|S)Z(128|256)ri(k?)$", "^VFPCLASSS(D|H|S)Zri(k?)$", "^VPACK(S|U)S(DW|WB)Yrr$", @@ -977,7 +977,7 @@ def SPRWriteResGroup49 : SchedWriteRes<[SPRPort00, SPRPort02_03_10]> { let NumMicroOps = 2; } def : InstRW<[SPRWriteResGroup49], (instregex "^DIV_F(32|64)m$")>; -def : InstRW<[SPRWriteResGroup49, ReadAfterVecLd], (instregex "^VSQRTSHZm_Int((k|kz)?)$")>; +def : InstRW<[SPRWriteResGroup49, ReadAfterVecLd], (instregex "^VSQRTSHZm((k|kz)?)_Int$")>; def : InstRW<[SPRWriteResGroup49, ReadAfterVecLd], (instrs VSQRTSHZm)>; def SPRWriteResGroup50 : SchedWriteRes<[SPRPort00, SPRPort02_03_10, SPRPort05]> { @@ -1166,11 +1166,11 @@ def : InstRW<[SPRWriteResGroup73, ReadAfterVecXLd], (instregex "^(V?)GF2P8AFFINE def : InstRW<[SPRWriteResGroup73, ReadAfterVecXLd], (instrs VGETEXPPHZ128mbkz, VGF2P8MULBZ128rm)>; def : InstRW<[SPRWriteResGroup73, ReadAfterVecLd], (instregex "^V(ADD|SUB)SHZrm$", - "^V(ADD|SUB)SHZrm_Int((k|kz)?)$", + "^V(ADD|SUB)SHZrm((k|kz)?)_Int$", "^VCVTSH2SSZrm((_Int)?)$", "^VM(AX|IN)CSHZrm$", "^VM(AX|IN|UL)SHZrm$", - "^VM(AX|IN|UL)SHZrm_Int((k|kz)?)$")>; + "^VM(AX|IN|UL)SHZrm((k|kz)?)_Int$")>; def : InstRW<[SPRWriteResGroup73, ReadAfterVecYLd], (instregex "^VGF2P8AFFINE((INV)?)QBYrmi$", "^VGF2P8AFFINE((INV)?)QBZ256rm(b?)i$", "^VGF2P8MULB(Y|Z256)rm$")>; @@ -1181,7 +1181,7 @@ def : InstRW<[SPRWriteResGroup73, ReadAfterVecXLd, ReadAfterVecXLd], (instregex "^VFMSUBADD(132|213|231)PHZ128m((b|k|bk|kz)?)$", "^VFMSUBADD(132|213|231)PHZ128mbkz$")>; def : InstRW<[SPRWriteResGroup73, ReadAfterVecLd, ReadAfterVecLd], (instregex "^VF(N?)M(ADD|SUB)(132|213|231)SHZm$", - "^VF(N?)M(ADD|SUB)(132|213|231)SHZm_Int((k|kz)?)$")>; + "^VF(N?)M(ADD|SUB)(132|213|231)SHZm((k|kz)?)_Int$")>; def : InstRW<[SPRWriteResGroup73, ReadAfterVecYLd, ReadAfterVecYLd], (instregex "^VPMADD52(H|L)UQZ256m((b|k|bk|kz)?)$", "^VPMADD52(H|L)UQZ256mbkz$")>; @@ -2301,7 +2301,7 @@ def : InstRW<[SPRWriteResGroup218, ReadAfterVecXLd], (instregex "^(V?)ROUNDS(D|S "^VRNDSCALEP(D|S)Z128rmbik(z?)$", "^VRNDSCALEP(D|S)Z128rmi((kz)?)$", "^VRNDSCALES(D|S)Zrmi$", - "^VRNDSCALES(D|S)Zrmi_Int((k|kz)?)$")>; + "^VRNDSCALES(D|S)Zrmi((k|kz)?)_Int$")>; def SPRWriteResGroup219 : SchedWriteRes<[SPRPort00_01]> { let ReleaseAtCycles = [2]; @@ -2313,7 +2313,7 @@ def : InstRW<[SPRWriteResGroup219], (instregex "^(V?)ROUND(PD|SS)ri$", "^(V?)ROUNDS(D|S)ri_Int$", "^VRNDSCALEP(D|S)Z(128|256)rri((k|kz)?)$", "^VRNDSCALES(D|S)Zrri$", - "^VRNDSCALES(D|S)Zrri(b?)_Int((k|kz)?)$", + "^VRNDSCALES(D|S)Zrri(b?)((k|kz)?)_Int$", "^VROUNDP(D|S)Yri$")>; def SPRWriteResGroup220 : SchedWriteRes<[SPRPort00_06]> { @@ -2530,7 +2530,7 @@ def SPRWriteResGroup249 : SchedWriteRes<[SPRPort01_05]> { let Latency = 4; } def : InstRW<[SPRWriteResGroup249], (instregex "^V(ADD|SUB)P(D|S)Z(128|256)rrkz$", - "^V(ADD|SUB)S(D|S)Zrr(b?)_Intkz$")>; + "^V(ADD|SUB)S(D|S)Zrr(b?)kz_Int$")>; def SPRWriteResGroup250 : SchedWriteRes<[SPRPort00_05]> { let Latency = 3; @@ -2545,11 +2545,11 @@ def SPRWriteResGroup251 : SchedWriteRes<[SPRPort00_01]> { let Latency = 6; } def : InstRW<[SPRWriteResGroup251], (instregex "^V(ADD|SUB)PHZ(128|256)rrk(z?)$", - "^V(ADD|SUB)SHZrr(b?)_Intk(z?)$", + "^V(ADD|SUB)SHZrr(b?)k(z?)_Int$", "^VCVT(T?)PH2(U?)WZ(128|256)rrk(z?)$", "^VCVT(U?)W2PHZ(128|256)rrk(z?)$", "^VF(N?)M(ADD|SUB)(132|213|231)PHZ(128|256)rk(z?)$", - "^VF(N?)M(ADD|SUB)(132|213|231)SHZr(b?)_Intk(z?)$", + "^VF(N?)M(ADD|SUB)(132|213|231)SHZr(b?)k(z?)_Int$", "^VFMADDSUB(132|213|231)PHZ(128|256)rk(z?)$", "^VFMSUBADD(132|213|231)PHZ(128|256)rk(z?)$", "^VGETEXPPHZ(128|256)rk(z?)$", @@ -2560,7 +2560,7 @@ def : InstRW<[SPRWriteResGroup251], (instregex "^V(ADD|SUB)PHZ(128|256)rrk(z?)$" "^VGETMANTSHZrri(k|bkz)$", "^VM(AX|IN)CPHZ(128|256)rrk(z?)$", "^VM(AX|IN|UL)PHZ(128|256)rrk(z?)$", - "^VM(AX|IN|UL)SHZrr(b?)_Intk(z?)$")>; + "^VM(AX|IN|UL)SHZrr(b?)k(z?)_Int$")>; def SPRWriteResGroup252 : SchedWriteRes<[SPRPort00]> { let Latency = 5; @@ -2745,7 +2745,7 @@ def : InstRW<[SPRWriteResGroup263, ReadAfterVecYLd], (instregex "^VCMPP(D|H|S)Z( "^VPTEST(N?)M(B|D|Q|W)Z((256)?)rm(k?)$", "^VPTEST(N?)M(D|Q)Z((256)?)rmb(k?)$")>; def : InstRW<[SPRWriteResGroup263, ReadAfterVecLd], (instregex "^VCMPS(D|H|S)Zrmi$", - "^VCMPS(D|H|S)Zrmi_Int(k?)$", + "^VCMPS(D|H|S)Zrmi(k?)_Int$", "^VFPCLASSS(D|H|S)Zmik$")>; def SPRWriteResGroup264 : SchedWriteRes<[SPRPort00, SPRPort02_03_10]> { @@ -3171,7 +3171,7 @@ def : InstRW<[SPRWriteResGroup314], (instregex "^VCVT(T?)PD2(U?)QQZ(128|256)rr(( "^VPLZCNT(D|Q)Z(128|256)rr((k|kz)?)$", "^VPMADD52(H|L)UQZ(128|256)r((k|kz)?)$", "^VSCALEFS(D|S)Zrr((k|kz)?)$", - "^VSCALEFS(D|S)Zrrb_Int((k|kz)?)$")>; + "^VSCALEFS(D|S)Zrrb((k|kz)?)_Int$")>; def : InstRW<[SPRWriteResGroup314, ReadAfterVecLd], (instregex "^VFIXUPIMMS(D|S)Zrrib((k|kz)?)$")>; def SPRWriteResGroup315 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_10, SPRPort05]> { @@ -3300,7 +3300,7 @@ def SPRWriteResGroup331 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_10]> { let NumMicroOps = 2; } def : InstRW<[SPRWriteResGroup331], (instregex "^VCVTPH2PSZ(128|256)rmk(z?)$")>; -def : InstRW<[SPRWriteResGroup331, ReadAfterVecLd], (instregex "^VCVTSH2SSZrm_Intk(z?)$")>; +def : InstRW<[SPRWriteResGroup331, ReadAfterVecLd], (instregex "^VCVTSH2SSZrmk(z?)_Int$")>; def : InstRW<[SPRWriteResGroup331, ReadAfterVecXLd], (instregex "^VPMADDUBSWZ128rmk(z?)$", "^VPMULH((U|RS)?)WZ128rmk(z?)$", "^VPMULLWZ128rmk(z?)$")>; @@ -3460,7 +3460,7 @@ def SPRWriteResGroup353 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort0 let Latency = 21; let NumMicroOps = 7; } -def : InstRW<[SPRWriteResGroup353, ReadAfterVecLd], (instregex "^VCVTSD2SHZrm_Intk(z?)$")>; +def : InstRW<[SPRWriteResGroup353, ReadAfterVecLd], (instregex "^VCVTSD2SHZrmk(z?)_Int$")>; def SPRWriteResGroup354 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort05]> { let ReleaseAtCycles = [2, 1, 1]; @@ -3475,7 +3475,7 @@ def SPRWriteResGroup355 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort0 let Latency = 14; let NumMicroOps = 4; } -def : InstRW<[SPRWriteResGroup355], (instregex "^VCVTSD2SHZrr(b?)_Intk(z?)$")>; +def : InstRW<[SPRWriteResGroup355], (instregex "^VCVTSD2SHZrr(b?)k(z?)_Int$")>; def SPRWriteResGroup356 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_10, SPRPort05]> { let ReleaseAtCycles = [2, 1, 1]; @@ -3489,7 +3489,7 @@ def SPRWriteResGroup357 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_10, SPRPort0 let Latency = 20; let NumMicroOps = 4; } -def : InstRW<[SPRWriteResGroup357, ReadAfterVecLd], (instregex "^VCVTSH2SDZrm_Intk(z?)$")>; +def : InstRW<[SPRWriteResGroup357, ReadAfterVecLd], (instregex "^VCVTSH2SDZrmk(z?)_Int$")>; def SPRWriteResGroup358 : SchedWriteRes<[SPRPort00_01, SPRPort05]> { let ReleaseAtCycles = [2, 1]; @@ -3504,7 +3504,7 @@ def SPRWriteResGroup359 : SchedWriteRes<[SPRPort00_01, SPRPort05]> { let Latency = 13; let NumMicroOps = 3; } -def : InstRW<[SPRWriteResGroup359], (instregex "^VCVTSH2SDZrr(b?)_Intk(z?)$")>; +def : InstRW<[SPRWriteResGroup359], (instregex "^VCVTSH2SDZrr(b?)k(z?)_Int$")>; def SPRWriteResGroup360 : SchedWriteRes<[SPRPort00, SPRPort00_01, SPRPort02_03_10]> { let Latency = 13; @@ -3523,7 +3523,7 @@ def : InstRW<[SPRWriteResGroup361], (instregex "^VCVT(T?)SH2(U?)SI((64)?)Zrr(b?) def SPRWriteResGroup362 : SchedWriteRes<[SPRPort00_01]> { let Latency = 8; } -def : InstRW<[SPRWriteResGroup362], (instregex "^VCVTSH2SSZrr(b?)_Intk(z?)$")>; +def : InstRW<[SPRWriteResGroup362], (instregex "^VCVTSH2SSZrr(b?)k(z?)_Int$")>; def SPRWriteResGroup363 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort02_03_10]> { let Latency = 14; @@ -3536,7 +3536,7 @@ def SPRWriteResGroup364 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort0 let Latency = 16; let NumMicroOps = 3; } -def : InstRW<[SPRWriteResGroup364, ReadAfterVecLd], (instregex "^VCVTSS2SHZrm_Intk(z?)$")>; +def : InstRW<[SPRWriteResGroup364, ReadAfterVecLd], (instregex "^VCVTSS2SHZrmk(z?)_Int$")>; def SPRWriteResGroup365 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05]> { let Latency = 6; @@ -3549,7 +3549,7 @@ def SPRWriteResGroup366 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05]> { let Latency = 9; let NumMicroOps = 2; } -def : InstRW<[SPRWriteResGroup366], (instregex "^VCVTSS2SHZrr(b?)_Intk(z?)$")>; +def : InstRW<[SPRWriteResGroup366], (instregex "^VCVTSS2SHZrr(b?)k(z?)_Int$")>; def SPRWriteResGroup367 : SchedWriteRes<[SPRPort05]> { let Latency = 5; @@ -3667,7 +3667,7 @@ def SPRWriteResGroup380 : SchedWriteRes<[SPRPort00, SPRPort02_03_10]> { let Latency = 21; let NumMicroOps = 2; } -def : InstRW<[SPRWriteResGroup380, ReadAfterVecLd], (instregex "^VDIVSHZrm_Int((k|kz)?)$")>; +def : InstRW<[SPRWriteResGroup380, ReadAfterVecLd], (instregex "^VDIVSHZrm((k|kz)?)_Int$")>; def : InstRW<[SPRWriteResGroup380, ReadAfterVecLd], (instrs VDIVSHZrm)>; def SPRWriteResGroup381 : SchedWriteRes<[SPRPort00]> { @@ -4884,7 +4884,7 @@ def SPRWriteResGroup534 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_10]> { let NumMicroOps = 3; } def : InstRW<[SPRWriteResGroup534, ReadAfterVecXLd], (instregex "^VRNDSCALEPHZ128rm(b?)ik(z?)$", - "^VRNDSCALESHZrmi_Intk(z?)$", + "^VRNDSCALESHZrmik(z?)_Int$", "^VSCALEFPHZ128rm(bk|kz)$", "^VSCALEFPHZ128rm(k|bkz)$")>; def : InstRW<[SPRWriteResGroup534, ReadAfterVecYLd], (instregex "^VRNDSCALEPHZ256rm(b?)ik(z?)$", @@ -4898,9 +4898,9 @@ def SPRWriteResGroup535 : SchedWriteRes<[SPRPort00_01]> { let NumMicroOps = 2; } def : InstRW<[SPRWriteResGroup535], (instregex "^VRNDSCALEPHZ(128|256)rrik(z?)$", - "^VRNDSCALESHZrri(b?)_Intk(z?)$", + "^VRNDSCALESHZrri(b?)k(z?)_Int$", "^VSCALEFPHZ(128|256)rrk(z?)$", - "^VSCALEFSHZrrb_Intk(z?)$", + "^VSCALEFSHZrrbk(z?)_Int$", "^VSCALEFSHZrrk(z?)$")>; def SPRWriteResGroup536 : SchedWriteRes<[SPRPort00, SPRPort02_03_10]> { @@ -4944,7 +4944,7 @@ def SPRWriteResGroup540 : SchedWriteRes<[SPRPort00, SPRPort02_03_10]> { } def : InstRW<[SPRWriteResGroup540, ReadAfterVecXLd], (instregex "^VSQRTPDZ128m(bk|kz)$", "^VSQRTPDZ128m(k|bkz)$")>; -def : InstRW<[SPRWriteResGroup540, ReadAfterVecLd], (instregex "^VSQRTSDZm_Intk(z?)$")>; +def : InstRW<[SPRWriteResGroup540, ReadAfterVecLd], (instregex "^VSQRTSDZmk(z?)_Int$")>; def SPRWriteResGroup541 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort02_03_10]> { let ReleaseAtCycles = [2, 1, 1]; diff --git a/llvm/lib/Target/X86/X86ScheduleZnver4.td b/llvm/lib/Target/X86/X86ScheduleZnver4.td index 38f9b5ef1d80b..c5478dd9fc13d 100644 --- a/llvm/lib/Target/X86/X86ScheduleZnver4.td +++ b/llvm/lib/Target/X86/X86ScheduleZnver4.td @@ -1545,7 +1545,7 @@ def Zn4WriteSCALErr: SchedWriteRes<[Zn4FPFMisc23]> { let NumMicroOps = 2; } def : InstRW<[Zn4WriteSCALErr], (instregex - "V(SCALEF|REDUCE)(S|P)(S|D)(Z?|Z128?|Z256?)(rr|rrb|rrkz|rrik|rrikz|rri)(_Int?|_Intkz?)", + "V(SCALEF|REDUCE)(S|P)(S|D)(Z?|Z128?|Z256?)(rr|rrb|rrkz|rrik|rrikz|rri)(_Int?)", "(V?)REDUCE(PD|PS|SD|SS)(Z?|Z128?)(rri|rrikz|rrib)" )>; @@ -1585,7 +1585,7 @@ def : InstRW<[Zn4WriteSHIFTrr], (instregex "(V?)P(ROL|ROR)(D|Q|VD|VQ)(Z?|Z128?|Z256?)(rr|rrk|rrkz)", "(V?)P(ROL|ROR)(D|Q|VD|VQ)(Z256?)(ri|rik|rikz)", "(V?)P(ROL|ROR)(D|Q)(Z?|Z128?)(ri|rik|rikz)", - "VPSHUFBITQMBZ128rr", "VFMSUB231SSZr_Intkz" + "VPSHUFBITQMBZ128rr", "VFMSUB231SSZrkz_Int" )>; def Zn4WriteSHIFTri: SchedWriteRes<[Zn4FPFMisc01]> { diff --git a/llvm/test/TableGen/x86-fold-tables.inc b/llvm/test/TableGen/x86-fold-tables.inc index 8cfaa18a5cfac..954c05bdb2076 100644 --- a/llvm/test/TableGen/x86-fold-tables.inc +++ b/llvm/test/TableGen/x86-fold-tables.inc @@ -4239,9 +4239,9 @@ static const X86FoldTableEntry Table3[] = { {X86::VADDPSZ128rrkz, X86::VADDPSZ128rmkz, 0}, {X86::VADDPSZ256rrkz, X86::VADDPSZ256rmkz, 0}, {X86::VADDPSZrrkz, X86::VADDPSZrmkz, 0}, - {X86::VADDSDZrr_Intkz, X86::VADDSDZrm_Intkz, TB_NO_REVERSE}, - {X86::VADDSHZrr_Intkz, X86::VADDSHZrm_Intkz, TB_NO_REVERSE}, - {X86::VADDSSZrr_Intkz, X86::VADDSSZrm_Intkz, TB_NO_REVERSE}, + {X86::VADDSDZrrkz_Int, X86::VADDSDZrmkz_Int, TB_NO_REVERSE}, + {X86::VADDSHZrrkz_Int, X86::VADDSHZrmkz_Int, TB_NO_REVERSE}, + {X86::VADDSSZrrkz_Int, X86::VADDSSZrmkz_Int, TB_NO_REVERSE}, {X86::VALIGNDZ128rrikz, X86::VALIGNDZ128rmikz, 0}, {X86::VALIGNDZ256rrikz, X86::VALIGNDZ256rmikz, 0}, {X86::VALIGNDZrrikz, X86::VALIGNDZrmikz, 0}, @@ -4288,9 +4288,9 @@ static const X86FoldTableEntry Table3[] = { {X86::VCMPPSZ128rrik, X86::VCMPPSZ128rmik, 0}, {X86::VCMPPSZ256rrik, X86::VCMPPSZ256rmik, 0}, {X86::VCMPPSZrrik, X86::VCMPPSZrmik, 0}, - {X86::VCMPSDZrri_Intk, X86::VCMPSDZrmi_Intk, TB_NO_REVERSE}, - {X86::VCMPSHZrri_Intk, X86::VCMPSHZrmi_Intk, TB_NO_REVERSE}, - {X86::VCMPSSZrri_Intk, X86::VCMPSSZrmi_Intk, TB_NO_REVERSE}, + {X86::VCMPSDZrrik_Int, X86::VCMPSDZrmik_Int, TB_NO_REVERSE}, + {X86::VCMPSHZrrik_Int, X86::VCMPSHZrmik_Int, TB_NO_REVERSE}, + {X86::VCMPSSZrrik_Int, X86::VCMPSSZrmik_Int, TB_NO_REVERSE}, {X86::VCVT2PS2PHXZ128rrkz, X86::VCVT2PS2PHXZ128rmkz, 0}, {X86::VCVT2PS2PHXZ256rrkz, X86::VCVT2PS2PHXZ256rmkz, 0}, {X86::VCVT2PS2PHXZrrkz, X86::VCVT2PS2PHXZrmkz, 0}, @@ -4438,12 +4438,12 @@ static const X86FoldTableEntry Table3[] = { {X86::VCVTQQ2PSZ128rrk, X86::VCVTQQ2PSZ128rmk, 0}, {X86::VCVTQQ2PSZ256rrk, X86::VCVTQQ2PSZ256rmk, 0}, {X86::VCVTQQ2PSZrrk, X86::VCVTQQ2PSZrmk, 0}, - {X86::VCVTSD2SHZrr_Intkz, X86::VCVTSD2SHZrm_Intkz, TB_NO_REVERSE}, - {X86::VCVTSD2SSZrr_Intkz, X86::VCVTSD2SSZrm_Intkz, TB_NO_REVERSE}, - {X86::VCVTSH2SDZrr_Intkz, X86::VCVTSH2SDZrm_Intkz, TB_NO_REVERSE}, - {X86::VCVTSH2SSZrr_Intkz, X86::VCVTSH2SSZrm_Intkz, TB_NO_REVERSE}, - {X86::VCVTSS2SDZrr_Intkz, X86::VCVTSS2SDZrm_Intkz, TB_NO_REVERSE}, - {X86::VCVTSS2SHZrr_Intkz, X86::VCVTSS2SHZrm_Intkz, TB_NO_REVERSE}, + {X86::VCVTSD2SHZrrkz_Int, X86::VCVTSD2SHZrmkz_Int, TB_NO_REVERSE}, + {X86::VCVTSD2SSZrrkz_Int, X86::VCVTSD2SSZrmkz_Int, TB_NO_REVERSE}, + {X86::VCVTSH2SDZrrkz_Int, X86::VCVTSH2SDZrmkz_Int, TB_NO_REVERSE}, + {X86::VCVTSH2SSZrrkz_Int, X86::VCVTSH2SSZrmkz_Int, TB_NO_REVERSE}, + {X86::VCVTSS2SDZrrkz_Int, X86::VCVTSS2SDZrmkz_Int, TB_NO_REVERSE}, + {X86::VCVTSS2SHZrrkz_Int, X86::VCVTSS2SHZrmkz_Int, TB_NO_REVERSE}, {X86::VCVTTNEBF162IBSZ128rrk, X86::VCVTTNEBF162IBSZ128rmk, 0}, {X86::VCVTTNEBF162IBSZ256rrk, X86::VCVTTNEBF162IBSZ256rmk, 0}, {X86::VCVTTNEBF162IBSZrrk, X86::VCVTTNEBF162IBSZrmk, 0}, @@ -4567,9 +4567,9 @@ static const X86FoldTableEntry Table3[] = { {X86::VDIVPSZ128rrkz, X86::VDIVPSZ128rmkz, 0}, {X86::VDIVPSZ256rrkz, X86::VDIVPSZ256rmkz, 0}, {X86::VDIVPSZrrkz, X86::VDIVPSZrmkz, 0}, - {X86::VDIVSDZrr_Intkz, X86::VDIVSDZrm_Intkz, TB_NO_REVERSE}, - {X86::VDIVSHZrr_Intkz, X86::VDIVSHZrm_Intkz, TB_NO_REVERSE}, - {X86::VDIVSSZrr_Intkz, X86::VDIVSSZrm_Intkz, TB_NO_REVERSE}, + {X86::VDIVSDZrrkz_Int, X86::VDIVSDZrmkz_Int, TB_NO_REVERSE}, + {X86::VDIVSHZrrkz_Int, X86::VDIVSHZrmkz_Int, TB_NO_REVERSE}, + {X86::VDIVSSZrrkz_Int, X86::VDIVSSZrmkz_Int, TB_NO_REVERSE}, {X86::VDPBF16PSZ128r, X86::VDPBF16PSZ128m, 0}, {X86::VDPBF16PSZ256r, X86::VDPBF16PSZ256m, 0}, {X86::VDPBF16PSZr, X86::VDPBF16PSZm, 0}, @@ -5110,9 +5110,9 @@ static const X86FoldTableEntry Table3[] = { {X86::VMAXPSZ128rrkz, X86::VMAXPSZ128rmkz, 0}, {X86::VMAXPSZ256rrkz, X86::VMAXPSZ256rmkz, 0}, {X86::VMAXPSZrrkz, X86::VMAXPSZrmkz, 0}, - {X86::VMAXSDZrr_Intkz, X86::VMAXSDZrm_Intkz, TB_NO_REVERSE}, - {X86::VMAXSHZrr_Intkz, X86::VMAXSHZrm_Intkz, TB_NO_REVERSE}, - {X86::VMAXSSZrr_Intkz, X86::VMAXSSZrm_Intkz, TB_NO_REVERSE}, + {X86::VMAXSDZrrkz_Int, X86::VMAXSDZrmkz_Int, TB_NO_REVERSE}, + {X86::VMAXSHZrrkz_Int, X86::VMAXSHZrmkz_Int, TB_NO_REVERSE}, + {X86::VMAXSSZrrkz_Int, X86::VMAXSSZrmkz_Int, TB_NO_REVERSE}, {X86::VMINCPDZ128rrkz, X86::VMINCPDZ128rmkz, 0}, {X86::VMINCPDZ256rrkz, X86::VMINCPDZ256rmkz, 0}, {X86::VMINCPDZrrkz, X86::VMINCPDZrmkz, 0}, @@ -5134,9 +5134,9 @@ static const X86FoldTableEntry Table3[] = { {X86::VMINMAXPSZ128rrikz, X86::VMINMAXPSZ128rmikz, 0}, {X86::VMINMAXPSZ256rrikz, X86::VMINMAXPSZ256rmikz, 0}, {X86::VMINMAXPSZrrikz, X86::VMINMAXPSZrmikz, 0}, - {X86::VMINMAXSDrri_Intkz, X86::VMINMAXSDrmi_Intkz, TB_NO_REVERSE}, - {X86::VMINMAXSHrri_Intkz, X86::VMINMAXSHrmi_Intkz, TB_NO_REVERSE}, - {X86::VMINMAXSSrri_Intkz, X86::VMINMAXSSrmi_Intkz, TB_NO_REVERSE}, + {X86::VMINMAXSDrrikz_Int, X86::VMINMAXSDrmikz_Int, TB_NO_REVERSE}, + {X86::VMINMAXSHrrikz_Int, X86::VMINMAXSHrmikz_Int, TB_NO_REVERSE}, + {X86::VMINMAXSSrrikz_Int, X86::VMINMAXSSrmikz_Int, TB_NO_REVERSE}, {X86::VMINPBF16Z128rrkz, X86::VMINPBF16Z128rmkz, 0}, {X86::VMINPBF16Z256rrkz, X86::VMINPBF16Z256rmkz, 0}, {X86::VMINPBF16Zrrkz, X86::VMINPBF16Zrmkz, 0}, @@ -5149,9 +5149,9 @@ static const X86FoldTableEntry Table3[] = { {X86::VMINPSZ128rrkz, X86::VMINPSZ128rmkz, 0}, {X86::VMINPSZ256rrkz, X86::VMINPSZ256rmkz, 0}, {X86::VMINPSZrrkz, X86::VMINPSZrmkz, 0}, - {X86::VMINSDZrr_Intkz, X86::VMINSDZrm_Intkz, TB_NO_REVERSE}, - {X86::VMINSHZrr_Intkz, X86::VMINSHZrm_Intkz, TB_NO_REVERSE}, - {X86::VMINSSZrr_Intkz, X86::VMINSSZrm_Intkz, TB_NO_REVERSE}, + {X86::VMINSDZrrkz_Int, X86::VMINSDZrmkz_Int, TB_NO_REVERSE}, + {X86::VMINSHZrrkz_Int, X86::VMINSHZrmkz_Int, TB_NO_REVERSE}, + {X86::VMINSSZrrkz_Int, X86::VMINSSZrmkz_Int, TB_NO_REVERSE}, {X86::VMOVAPDZ128rrk, X86::VMOVAPDZ128rmk, TB_NO_REVERSE|TB_ALIGN_16}, {X86::VMOVAPDZ256rrk, X86::VMOVAPDZ256rmk, TB_NO_REVERSE|TB_ALIGN_32}, {X86::VMOVAPDZrrk, X86::VMOVAPDZrmk, TB_NO_REVERSE|TB_ALIGN_64}, @@ -5206,9 +5206,9 @@ static const X86FoldTableEntry Table3[] = { {X86::VMULPSZ128rrkz, X86::VMULPSZ128rmkz, 0}, {X86::VMULPSZ256rrkz, X86::VMULPSZ256rmkz, 0}, {X86::VMULPSZrrkz, X86::VMULPSZrmkz, 0}, - {X86::VMULSDZrr_Intkz, X86::VMULSDZrm_Intkz, TB_NO_REVERSE}, - {X86::VMULSHZrr_Intkz, X86::VMULSHZrm_Intkz, TB_NO_REVERSE}, - {X86::VMULSSZrr_Intkz, X86::VMULSSZrm_Intkz, TB_NO_REVERSE}, + {X86::VMULSDZrrkz_Int, X86::VMULSDZrmkz_Int, TB_NO_REVERSE}, + {X86::VMULSHZrrkz_Int, X86::VMULSHZrmkz_Int, TB_NO_REVERSE}, + {X86::VMULSSZrrkz_Int, X86::VMULSSZrmkz_Int, TB_NO_REVERSE}, {X86::VORPDZ128rrkz, X86::VORPDZ128rmkz, 0}, {X86::VORPDZ256rrkz, X86::VORPDZ256rmkz, 0}, {X86::VORPDZrrkz, X86::VORPDZrmkz, 0}, @@ -5972,9 +5972,9 @@ static const X86FoldTableEntry Table3[] = { {X86::VRNDSCALEPSZ128rrik, X86::VRNDSCALEPSZ128rmik, 0}, {X86::VRNDSCALEPSZ256rrik, X86::VRNDSCALEPSZ256rmik, 0}, {X86::VRNDSCALEPSZrrik, X86::VRNDSCALEPSZrmik, 0}, - {X86::VRNDSCALESDZrri_Intkz, X86::VRNDSCALESDZrmi_Intkz, TB_NO_REVERSE}, - {X86::VRNDSCALESHZrri_Intkz, X86::VRNDSCALESHZrmi_Intkz, TB_NO_REVERSE}, - {X86::VRNDSCALESSZrri_Intkz, X86::VRNDSCALESSZrmi_Intkz, TB_NO_REVERSE}, + {X86::VRNDSCALESDZrrikz_Int, X86::VRNDSCALESDZrmikz_Int, TB_NO_REVERSE}, + {X86::VRNDSCALESHZrrikz_Int, X86::VRNDSCALESHZrmikz_Int, TB_NO_REVERSE}, + {X86::VRNDSCALESSZrrikz_Int, X86::VRNDSCALESSZrmikz_Int, TB_NO_REVERSE}, {X86::VRSQRT14PDZ128rk, X86::VRSQRT14PDZ128mk, 0}, {X86::VRSQRT14PDZ256rk, X86::VRSQRT14PDZ256mk, 0}, {X86::VRSQRT14PDZrk, X86::VRSQRT14PDZmk, 0}, @@ -6038,9 +6038,9 @@ static const X86FoldTableEntry Table3[] = { {X86::VSQRTPSZ128rk, X86::VSQRTPSZ128mk, 0}, {X86::VSQRTPSZ256rk, X86::VSQRTPSZ256mk, 0}, {X86::VSQRTPSZrk, X86::VSQRTPSZmk, 0}, - {X86::VSQRTSDZr_Intkz, X86::VSQRTSDZm_Intkz, TB_NO_REVERSE}, - {X86::VSQRTSHZr_Intkz, X86::VSQRTSHZm_Intkz, TB_NO_REVERSE}, - {X86::VSQRTSSZr_Intkz, X86::VSQRTSSZm_Intkz, TB_NO_REVERSE}, + {X86::VSQRTSDZrkz_Int, X86::VSQRTSDZmkz_Int, TB_NO_REVERSE}, + {X86::VSQRTSHZrkz_Int, X86::VSQRTSHZmkz_Int, TB_NO_REVERSE}, + {X86::VSQRTSSZrkz_Int, X86::VSQRTSSZmkz_Int, TB_NO_REVERSE}, {X86::VSUBNEPBF16Z128rrkz, X86::VSUBNEPBF16Z128rmkz, 0}, {X86::VSUBNEPBF16Z256rrkz, X86::VSUBNEPBF16Z256rmkz, 0}, {X86::VSUBNEPBF16Zrrkz, X86::VSUBNEPBF16Zrmkz, 0}, @@ -6053,9 +6053,9 @@ static const X86FoldTableEntry Table3[] = { {X86::VSUBPSZ128rrkz, X86::VSUBPSZ128rmkz, 0}, {X86::VSUBPSZ256rrkz, X86::VSUBPSZ256rmkz, 0}, {X86::VSUBPSZrrkz, X86::VSUBPSZrmkz, 0}, - {X86::VSUBSDZrr_Intkz, X86::VSUBSDZrm_Intkz, TB_NO_REVERSE}, - {X86::VSUBSHZrr_Intkz, X86::VSUBSHZrm_Intkz, TB_NO_REVERSE}, - {X86::VSUBSSZrr_Intkz, X86::VSUBSSZrm_Intkz, TB_NO_REVERSE}, + {X86::VSUBSDZrrkz_Int, X86::VSUBSDZrmkz_Int, TB_NO_REVERSE}, + {X86::VSUBSHZrrkz_Int, X86::VSUBSHZrmkz_Int, TB_NO_REVERSE}, + {X86::VSUBSSZrrkz_Int, X86::VSUBSSZrmkz_Int, TB_NO_REVERSE}, {X86::VUNPCKHPDZ128rrkz, X86::VUNPCKHPDZ128rmkz, 0}, {X86::VUNPCKHPDZ256rrkz, X86::VUNPCKHPDZ256rmkz, 0}, {X86::VUNPCKHPDZrrkz, X86::VUNPCKHPDZrmkz, 0}, @@ -6089,9 +6089,9 @@ static const X86FoldTableEntry Table4[] = { {X86::VADDPSZ128rrk, X86::VADDPSZ128rmk, 0}, {X86::VADDPSZ256rrk, X86::VADDPSZ256rmk, 0}, {X86::VADDPSZrrk, X86::VADDPSZrmk, 0}, - {X86::VADDSDZrr_Intk, X86::VADDSDZrm_Intk, TB_NO_REVERSE}, - {X86::VADDSHZrr_Intk, X86::VADDSHZrm_Intk, TB_NO_REVERSE}, - {X86::VADDSSZrr_Intk, X86::VADDSSZrm_Intk, TB_NO_REVERSE}, + {X86::VADDSDZrrk_Int, X86::VADDSDZrmk_Int, TB_NO_REVERSE}, + {X86::VADDSHZrrk_Int, X86::VADDSHZrmk_Int, TB_NO_REVERSE}, + {X86::VADDSSZrrk_Int, X86::VADDSSZrmk_Int, TB_NO_REVERSE}, {X86::VALIGNDZ128rrik, X86::VALIGNDZ128rmik, 0}, {X86::VALIGNDZ256rrik, X86::VALIGNDZ256rmik, 0}, {X86::VALIGNDZrrik, X86::VALIGNDZrmik, 0}, @@ -6140,12 +6140,12 @@ static const X86FoldTableEntry Table4[] = { {X86::VCVTNE2PS2BF16Z128rrk, X86::VCVTNE2PS2BF16Z128rmk, 0}, {X86::VCVTNE2PS2BF16Z256rrk, X86::VCVTNE2PS2BF16Z256rmk, 0}, {X86::VCVTNE2PS2BF16Zrrk, X86::VCVTNE2PS2BF16Zrmk, 0}, - {X86::VCVTSD2SHZrr_Intk, X86::VCVTSD2SHZrm_Intk, TB_NO_REVERSE}, - {X86::VCVTSD2SSZrr_Intk, X86::VCVTSD2SSZrm_Intk, TB_NO_REVERSE}, - {X86::VCVTSH2SDZrr_Intk, X86::VCVTSH2SDZrm_Intk, TB_NO_REVERSE}, - {X86::VCVTSH2SSZrr_Intk, X86::VCVTSH2SSZrm_Intk, TB_NO_REVERSE}, - {X86::VCVTSS2SDZrr_Intk, X86::VCVTSS2SDZrm_Intk, TB_NO_REVERSE}, - {X86::VCVTSS2SHZrr_Intk, X86::VCVTSS2SHZrm_Intk, TB_NO_REVERSE}, + {X86::VCVTSD2SHZrrk_Int, X86::VCVTSD2SHZrmk_Int, TB_NO_REVERSE}, + {X86::VCVTSD2SSZrrk_Int, X86::VCVTSD2SSZrmk_Int, TB_NO_REVERSE}, + {X86::VCVTSH2SDZrrk_Int, X86::VCVTSH2SDZrmk_Int, TB_NO_REVERSE}, + {X86::VCVTSH2SSZrrk_Int, X86::VCVTSH2SSZrmk_Int, TB_NO_REVERSE}, + {X86::VCVTSS2SDZrrk_Int, X86::VCVTSS2SDZrmk_Int, TB_NO_REVERSE}, + {X86::VCVTSS2SHZrrk_Int, X86::VCVTSS2SHZrmk_Int, TB_NO_REVERSE}, {X86::VDBPSADBWZ128rrik, X86::VDBPSADBWZ128rmik, 0}, {X86::VDBPSADBWZ256rrik, X86::VDBPSADBWZ256rmik, 0}, {X86::VDBPSADBWZrrik, X86::VDBPSADBWZrmik, 0}, @@ -6161,9 +6161,9 @@ static const X86FoldTableEntry Table4[] = { {X86::VDIVPSZ128rrk, X86::VDIVPSZ128rmk, 0}, {X86::VDIVPSZ256rrk, X86::VDIVPSZ256rmk, 0}, {X86::VDIVPSZrrk, X86::VDIVPSZrmk, 0}, - {X86::VDIVSDZrr_Intk, X86::VDIVSDZrm_Intk, TB_NO_REVERSE}, - {X86::VDIVSHZrr_Intk, X86::VDIVSHZrm_Intk, TB_NO_REVERSE}, - {X86::VDIVSSZrr_Intk, X86::VDIVSSZrm_Intk, TB_NO_REVERSE}, + {X86::VDIVSDZrrk_Int, X86::VDIVSDZrmk_Int, TB_NO_REVERSE}, + {X86::VDIVSHZrrk_Int, X86::VDIVSHZrmk_Int, TB_NO_REVERSE}, + {X86::VDIVSSZrrk_Int, X86::VDIVSSZrmk_Int, TB_NO_REVERSE}, {X86::VDPBF16PSZ128rk, X86::VDPBF16PSZ128mk, 0}, {X86::VDPBF16PSZ128rkz, X86::VDPBF16PSZ128mkz, 0}, {X86::VDPBF16PSZ256rk, X86::VDPBF16PSZ256mk, 0}, @@ -6228,12 +6228,12 @@ static const X86FoldTableEntry Table4[] = { {X86::VFMADD132PSZ256rkz, X86::VFMADD132PSZ256mkz, 0}, {X86::VFMADD132PSZrk, X86::VFMADD132PSZmk, 0}, {X86::VFMADD132PSZrkz, X86::VFMADD132PSZmkz, 0}, - {X86::VFMADD132SDZr_Intk, X86::VFMADD132SDZm_Intk, TB_NO_REVERSE}, - {X86::VFMADD132SDZr_Intkz, X86::VFMADD132SDZm_Intkz, TB_NO_REVERSE}, - {X86::VFMADD132SHZr_Intk, X86::VFMADD132SHZm_Intk, TB_NO_REVERSE}, - {X86::VFMADD132SHZr_Intkz, X86::VFMADD132SHZm_Intkz, TB_NO_REVERSE}, - {X86::VFMADD132SSZr_Intk, X86::VFMADD132SSZm_Intk, TB_NO_REVERSE}, - {X86::VFMADD132SSZr_Intkz, X86::VFMADD132SSZm_Intkz, TB_NO_REVERSE}, + {X86::VFMADD132SDZrk_Int, X86::VFMADD132SDZmk_Int, TB_NO_REVERSE}, + {X86::VFMADD132SDZrkz_Int, X86::VFMADD132SDZmkz_Int, TB_NO_REVERSE}, + {X86::VFMADD132SHZrk_Int, X86::VFMADD132SHZmk_Int, TB_NO_REVERSE}, + {X86::VFMADD132SHZrkz_Int, X86::VFMADD132SHZmkz_Int, TB_NO_REVERSE}, + {X86::VFMADD132SSZrk_Int, X86::VFMADD132SSZmk_Int, TB_NO_REVERSE}, + {X86::VFMADD132SSZrkz_Int, X86::VFMADD132SSZmkz_Int, TB_NO_REVERSE}, {X86::VFMADD213NEPBF16Z128rk, X86::VFMADD213NEPBF16Z128mk, 0}, {X86::VFMADD213NEPBF16Z128rkz, X86::VFMADD213NEPBF16Z128mkz, 0}, {X86::VFMADD213NEPBF16Z256rk, X86::VFMADD213NEPBF16Z256mk, 0}, @@ -6258,12 +6258,12 @@ static const X86FoldTableEntry Table4[] = { {X86::VFMADD213PSZ256rkz, X86::VFMADD213PSZ256mkz, 0}, {X86::VFMADD213PSZrk, X86::VFMADD213PSZmk, 0}, {X86::VFMADD213PSZrkz, X86::VFMADD213PSZmkz, 0}, - {X86::VFMADD213SDZr_Intk, X86::VFMADD213SDZm_Intk, TB_NO_REVERSE}, - {X86::VFMADD213SDZr_Intkz, X86::VFMADD213SDZm_Intkz, TB_NO_REVERSE}, - {X86::VFMADD213SHZr_Intk, X86::VFMADD213SHZm_Intk, TB_NO_REVERSE}, - {X86::VFMADD213SHZr_Intkz, X86::VFMADD213SHZm_Intkz, TB_NO_REVERSE}, - {X86::VFMADD213SSZr_Intk, X86::VFMADD213SSZm_Intk, TB_NO_REVERSE}, - {X86::VFMADD213SSZr_Intkz, X86::VFMADD213SSZm_Intkz, TB_NO_REVERSE}, + {X86::VFMADD213SDZrk_Int, X86::VFMADD213SDZmk_Int, TB_NO_REVERSE}, + {X86::VFMADD213SDZrkz_Int, X86::VFMADD213SDZmkz_Int, TB_NO_REVERSE}, + {X86::VFMADD213SHZrk_Int, X86::VFMADD213SHZmk_Int, TB_NO_REVERSE}, + {X86::VFMADD213SHZrkz_Int, X86::VFMADD213SHZmkz_Int, TB_NO_REVERSE}, + {X86::VFMADD213SSZrk_Int, X86::VFMADD213SSZmk_Int, TB_NO_REVERSE}, + {X86::VFMADD213SSZrkz_Int, X86::VFMADD213SSZmkz_Int, TB_NO_REVERSE}, {X86::VFMADD231NEPBF16Z128rk, X86::VFMADD231NEPBF16Z128mk, 0}, {X86::VFMADD231NEPBF16Z128rkz, X86::VFMADD231NEPBF16Z128mkz, 0}, {X86::VFMADD231NEPBF16Z256rk, X86::VFMADD231NEPBF16Z256mk, 0}, @@ -6288,12 +6288,12 @@ static const X86FoldTableEntry Table4[] = { {X86::VFMADD231PSZ256rkz, X86::VFMADD231PSZ256mkz, 0}, {X86::VFMADD231PSZrk, X86::VFMADD231PSZmk, 0}, {X86::VFMADD231PSZrkz, X86::VFMADD231PSZmkz, 0}, - {X86::VFMADD231SDZr_Intk, X86::VFMADD231SDZm_Intk, TB_NO_REVERSE}, - {X86::VFMADD231SDZr_Intkz, X86::VFMADD231SDZm_Intkz, TB_NO_REVERSE}, - {X86::VFMADD231SHZr_Intk, X86::VFMADD231SHZm_Intk, TB_NO_REVERSE}, - {X86::VFMADD231SHZr_Intkz, X86::VFMADD231SHZm_Intkz, TB_NO_REVERSE}, - {X86::VFMADD231SSZr_Intk, X86::VFMADD231SSZm_Intk, TB_NO_REVERSE}, - {X86::VFMADD231SSZr_Intkz, X86::VFMADD231SSZm_Intkz, TB_NO_REVERSE}, + {X86::VFMADD231SDZrk_Int, X86::VFMADD231SDZmk_Int, TB_NO_REVERSE}, + {X86::VFMADD231SDZrkz_Int, X86::VFMADD231SDZmkz_Int, TB_NO_REVERSE}, + {X86::VFMADD231SHZrk_Int, X86::VFMADD231SHZmk_Int, TB_NO_REVERSE}, + {X86::VFMADD231SHZrkz_Int, X86::VFMADD231SHZmkz_Int, TB_NO_REVERSE}, + {X86::VFMADD231SSZrk_Int, X86::VFMADD231SSZmk_Int, TB_NO_REVERSE}, + {X86::VFMADD231SSZrkz_Int, X86::VFMADD231SSZmkz_Int, TB_NO_REVERSE}, {X86::VFMADDCPHZ128rk, X86::VFMADDCPHZ128mk, 0}, {X86::VFMADDCPHZ128rkz, X86::VFMADDCPHZ128mkz, 0}, {X86::VFMADDCPHZ256rk, X86::VFMADDCPHZ256mk, 0}, @@ -6380,12 +6380,12 @@ static const X86FoldTableEntry Table4[] = { {X86::VFMSUB132PSZ256rkz, X86::VFMSUB132PSZ256mkz, 0}, {X86::VFMSUB132PSZrk, X86::VFMSUB132PSZmk, 0}, {X86::VFMSUB132PSZrkz, X86::VFMSUB132PSZmkz, 0}, - {X86::VFMSUB132SDZr_Intk, X86::VFMSUB132SDZm_Intk, TB_NO_REVERSE}, - {X86::VFMSUB132SDZr_Intkz, X86::VFMSUB132SDZm_Intkz, TB_NO_REVERSE}, - {X86::VFMSUB132SHZr_Intk, X86::VFMSUB132SHZm_Intk, TB_NO_REVERSE}, - {X86::VFMSUB132SHZr_Intkz, X86::VFMSUB132SHZm_Intkz, TB_NO_REVERSE}, - {X86::VFMSUB132SSZr_Intk, X86::VFMSUB132SSZm_Intk, TB_NO_REVERSE}, - {X86::VFMSUB132SSZr_Intkz, X86::VFMSUB132SSZm_Intkz, TB_NO_REVERSE}, + {X86::VFMSUB132SDZrk_Int, X86::VFMSUB132SDZmk_Int, TB_NO_REVERSE}, + {X86::VFMSUB132SDZrkz_Int, X86::VFMSUB132SDZmkz_Int, TB_NO_REVERSE}, + {X86::VFMSUB132SHZrk_Int, X86::VFMSUB132SHZmk_Int, TB_NO_REVERSE}, + {X86::VFMSUB132SHZrkz_Int, X86::VFMSUB132SHZmkz_Int, TB_NO_REVERSE}, + {X86::VFMSUB132SSZrk_Int, X86::VFMSUB132SSZmk_Int, TB_NO_REVERSE}, + {X86::VFMSUB132SSZrkz_Int, X86::VFMSUB132SSZmkz_Int, TB_NO_REVERSE}, {X86::VFMSUB213NEPBF16Z128rk, X86::VFMSUB213NEPBF16Z128mk, 0}, {X86::VFMSUB213NEPBF16Z128rkz, X86::VFMSUB213NEPBF16Z128mkz, 0}, {X86::VFMSUB213NEPBF16Z256rk, X86::VFMSUB213NEPBF16Z256mk, 0}, @@ -6410,12 +6410,12 @@ static const X86FoldTableEntry Table4[] = { {X86::VFMSUB213PSZ256rkz, X86::VFMSUB213PSZ256mkz, 0}, {X86::VFMSUB213PSZrk, X86::VFMSUB213PSZmk, 0}, {X86::VFMSUB213PSZrkz, X86::VFMSUB213PSZmkz, 0}, - {X86::VFMSUB213SDZr_Intk, X86::VFMSUB213SDZm_Intk, TB_NO_REVERSE}, - {X86::VFMSUB213SDZr_Intkz, X86::VFMSUB213SDZm_Intkz, TB_NO_REVERSE}, - {X86::VFMSUB213SHZr_Intk, X86::VFMSUB213SHZm_Intk, TB_NO_REVERSE}, - {X86::VFMSUB213SHZr_Intkz, X86::VFMSUB213SHZm_Intkz, TB_NO_REVERSE}, - {X86::VFMSUB213SSZr_Intk, X86::VFMSUB213SSZm_Intk, TB_NO_REVERSE}, - {X86::VFMSUB213SSZr_Intkz, X86::VFMSUB213SSZm_Intkz, TB_NO_REVERSE}, + {X86::VFMSUB213SDZrk_Int, X86::VFMSUB213SDZmk_Int, TB_NO_REVERSE}, + {X86::VFMSUB213SDZrkz_Int, X86::VFMSUB213SDZmkz_Int, TB_NO_REVERSE}, + {X86::VFMSUB213SHZrk_Int, X86::VFMSUB213SHZmk_Int, TB_NO_REVERSE}, + {X86::VFMSUB213SHZrkz_Int, X86::VFMSUB213SHZmkz_Int, TB_NO_REVERSE}, + {X86::VFMSUB213SSZrk_Int, X86::VFMSUB213SSZmk_Int, TB_NO_REVERSE}, + {X86::VFMSUB213SSZrkz_Int, X86::VFMSUB213SSZmkz_Int, TB_NO_REVERSE}, {X86::VFMSUB231NEPBF16Z128rk, X86::VFMSUB231NEPBF16Z128mk, 0}, {X86::VFMSUB231NEPBF16Z128rkz, X86::VFMSUB231NEPBF16Z128mkz, 0}, {X86::VFMSUB231NEPBF16Z256rk, X86::VFMSUB231NEPBF16Z256mk, 0}, @@ -6440,12 +6440,12 @@ static const X86FoldTableEntry Table4[] = { {X86::VFMSUB231PSZ256rkz, X86::VFMSUB231PSZ256mkz, 0}, {X86::VFMSUB231PSZrk, X86::VFMSUB231PSZmk, 0}, {X86::VFMSUB231PSZrkz, X86::VFMSUB231PSZmkz, 0}, - {X86::VFMSUB231SDZr_Intk, X86::VFMSUB231SDZm_Intk, TB_NO_REVERSE}, - {X86::VFMSUB231SDZr_Intkz, X86::VFMSUB231SDZm_Intkz, TB_NO_REVERSE}, - {X86::VFMSUB231SHZr_Intk, X86::VFMSUB231SHZm_Intk, TB_NO_REVERSE}, - {X86::VFMSUB231SHZr_Intkz, X86::VFMSUB231SHZm_Intkz, TB_NO_REVERSE}, - {X86::VFMSUB231SSZr_Intk, X86::VFMSUB231SSZm_Intk, TB_NO_REVERSE}, - {X86::VFMSUB231SSZr_Intkz, X86::VFMSUB231SSZm_Intkz, TB_NO_REVERSE}, + {X86::VFMSUB231SDZrk_Int, X86::VFMSUB231SDZmk_Int, TB_NO_REVERSE}, + {X86::VFMSUB231SDZrkz_Int, X86::VFMSUB231SDZmkz_Int, TB_NO_REVERSE}, + {X86::VFMSUB231SHZrk_Int, X86::VFMSUB231SHZmk_Int, TB_NO_REVERSE}, + {X86::VFMSUB231SHZrkz_Int, X86::VFMSUB231SHZmkz_Int, TB_NO_REVERSE}, + {X86::VFMSUB231SSZrk_Int, X86::VFMSUB231SSZmk_Int, TB_NO_REVERSE}, + {X86::VFMSUB231SSZrkz_Int, X86::VFMSUB231SSZmkz_Int, TB_NO_REVERSE}, {X86::VFMSUBADD132PDZ128rk, X86::VFMSUBADD132PDZ128mk, 0}, {X86::VFMSUBADD132PDZ128rkz, X86::VFMSUBADD132PDZ128mkz, 0}, {X86::VFMSUBADD132PDZ256rk, X86::VFMSUBADD132PDZ256mk, 0}, @@ -6528,12 +6528,12 @@ static const X86FoldTableEntry Table4[] = { {X86::VFNMADD132PSZ256rkz, X86::VFNMADD132PSZ256mkz, 0}, {X86::VFNMADD132PSZrk, X86::VFNMADD132PSZmk, 0}, {X86::VFNMADD132PSZrkz, X86::VFNMADD132PSZmkz, 0}, - {X86::VFNMADD132SDZr_Intk, X86::VFNMADD132SDZm_Intk, TB_NO_REVERSE}, - {X86::VFNMADD132SDZr_Intkz, X86::VFNMADD132SDZm_Intkz, TB_NO_REVERSE}, - {X86::VFNMADD132SHZr_Intk, X86::VFNMADD132SHZm_Intk, TB_NO_REVERSE}, - {X86::VFNMADD132SHZr_Intkz, X86::VFNMADD132SHZm_Intkz, TB_NO_REVERSE}, - {X86::VFNMADD132SSZr_Intk, X86::VFNMADD132SSZm_Intk, TB_NO_REVERSE}, - {X86::VFNMADD132SSZr_Intkz, X86::VFNMADD132SSZm_Intkz, TB_NO_REVERSE}, + {X86::VFNMADD132SDZrk_Int, X86::VFNMADD132SDZmk_Int, TB_NO_REVERSE}, + {X86::VFNMADD132SDZrkz_Int, X86::VFNMADD132SDZmkz_Int, TB_NO_REVERSE}, + {X86::VFNMADD132SHZrk_Int, X86::VFNMADD132SHZmk_Int, TB_NO_REVERSE}, + {X86::VFNMADD132SHZrkz_Int, X86::VFNMADD132SHZmkz_Int, TB_NO_REVERSE}, + {X86::VFNMADD132SSZrk_Int, X86::VFNMADD132SSZmk_Int, TB_NO_REVERSE}, + {X86::VFNMADD132SSZrkz_Int, X86::VFNMADD132SSZmkz_Int, TB_NO_REVERSE}, {X86::VFNMADD213NEPBF16Z128rk, X86::VFNMADD213NEPBF16Z128mk, 0}, {X86::VFNMADD213NEPBF16Z128rkz, X86::VFNMADD213NEPBF16Z128mkz, 0}, {X86::VFNMADD213NEPBF16Z256rk, X86::VFNMADD213NEPBF16Z256mk, 0}, @@ -6558,12 +6558,12 @@ static const X86FoldTableEntry Table4[] = { {X86::VFNMADD213PSZ256rkz, X86::VFNMADD213PSZ256mkz, 0}, {X86::VFNMADD213PSZrk, X86::VFNMADD213PSZmk, 0}, {X86::VFNMADD213PSZrkz, X86::VFNMADD213PSZmkz, 0}, - {X86::VFNMADD213SDZr_Intk, X86::VFNMADD213SDZm_Intk, TB_NO_REVERSE}, - {X86::VFNMADD213SDZr_Intkz, X86::VFNMADD213SDZm_Intkz, TB_NO_REVERSE}, - {X86::VFNMADD213SHZr_Intk, X86::VFNMADD213SHZm_Intk, TB_NO_REVERSE}, - {X86::VFNMADD213SHZr_Intkz, X86::VFNMADD213SHZm_Intkz, TB_NO_REVERSE}, - {X86::VFNMADD213SSZr_Intk, X86::VFNMADD213SSZm_Intk, TB_NO_REVERSE}, - {X86::VFNMADD213SSZr_Intkz, X86::VFNMADD213SSZm_Intkz, TB_NO_REVERSE}, + {X86::VFNMADD213SDZrk_Int, X86::VFNMADD213SDZmk_Int, TB_NO_REVERSE}, + {X86::VFNMADD213SDZrkz_Int, X86::VFNMADD213SDZmkz_Int, TB_NO_REVERSE}, + {X86::VFNMADD213SHZrk_Int, X86::VFNMADD213SHZmk_Int, TB_NO_REVERSE}, + {X86::VFNMADD213SHZrkz_Int, X86::VFNMADD213SHZmkz_Int, TB_NO_REVERSE}, + {X86::VFNMADD213SSZrk_Int, X86::VFNMADD213SSZmk_Int, TB_NO_REVERSE}, + {X86::VFNMADD213SSZrkz_Int, X86::VFNMADD213SSZmkz_Int, TB_NO_REVERSE}, {X86::VFNMADD231NEPBF16Z128rk, X86::VFNMADD231NEPBF16Z128mk, 0}, {X86::VFNMADD231NEPBF16Z128rkz, X86::VFNMADD231NEPBF16Z128mkz, 0}, {X86::VFNMADD231NEPBF16Z256rk, X86::VFNMADD231NEPBF16Z256mk, 0}, @@ -6588,12 +6588,12 @@ static const X86FoldTableEntry Table4[] = { {X86::VFNMADD231PSZ256rkz, X86::VFNMADD231PSZ256mkz, 0}, {X86::VFNMADD231PSZrk, X86::VFNMADD231PSZmk, 0}, {X86::VFNMADD231PSZrkz, X86::VFNMADD231PSZmkz, 0}, - {X86::VFNMADD231SDZr_Intk, X86::VFNMADD231SDZm_Intk, TB_NO_REVERSE}, - {X86::VFNMADD231SDZr_Intkz, X86::VFNMADD231SDZm_Intkz, TB_NO_REVERSE}, - {X86::VFNMADD231SHZr_Intk, X86::VFNMADD231SHZm_Intk, TB_NO_REVERSE}, - {X86::VFNMADD231SHZr_Intkz, X86::VFNMADD231SHZm_Intkz, TB_NO_REVERSE}, - {X86::VFNMADD231SSZr_Intk, X86::VFNMADD231SSZm_Intk, TB_NO_REVERSE}, - {X86::VFNMADD231SSZr_Intkz, X86::VFNMADD231SSZm_Intkz, TB_NO_REVERSE}, + {X86::VFNMADD231SDZrk_Int, X86::VFNMADD231SDZmk_Int, TB_NO_REVERSE}, + {X86::VFNMADD231SDZrkz_Int, X86::VFNMADD231SDZmkz_Int, TB_NO_REVERSE}, + {X86::VFNMADD231SHZrk_Int, X86::VFNMADD231SHZmk_Int, TB_NO_REVERSE}, + {X86::VFNMADD231SHZrkz_Int, X86::VFNMADD231SHZmkz_Int, TB_NO_REVERSE}, + {X86::VFNMADD231SSZrk_Int, X86::VFNMADD231SSZmk_Int, TB_NO_REVERSE}, + {X86::VFNMADD231SSZrkz_Int, X86::VFNMADD231SSZmkz_Int, TB_NO_REVERSE}, {X86::VFNMSUB132NEPBF16Z128rk, X86::VFNMSUB132NEPBF16Z128mk, 0}, {X86::VFNMSUB132NEPBF16Z128rkz, X86::VFNMSUB132NEPBF16Z128mkz, 0}, {X86::VFNMSUB132NEPBF16Z256rk, X86::VFNMSUB132NEPBF16Z256mk, 0}, @@ -6618,12 +6618,12 @@ static const X86FoldTableEntry Table4[] = { {X86::VFNMSUB132PSZ256rkz, X86::VFNMSUB132PSZ256mkz, 0}, {X86::VFNMSUB132PSZrk, X86::VFNMSUB132PSZmk, 0}, {X86::VFNMSUB132PSZrkz, X86::VFNMSUB132PSZmkz, 0}, - {X86::VFNMSUB132SDZr_Intk, X86::VFNMSUB132SDZm_Intk, TB_NO_REVERSE}, - {X86::VFNMSUB132SDZr_Intkz, X86::VFNMSUB132SDZm_Intkz, TB_NO_REVERSE}, - {X86::VFNMSUB132SHZr_Intk, X86::VFNMSUB132SHZm_Intk, TB_NO_REVERSE}, - {X86::VFNMSUB132SHZr_Intkz, X86::VFNMSUB132SHZm_Intkz, TB_NO_REVERSE}, - {X86::VFNMSUB132SSZr_Intk, X86::VFNMSUB132SSZm_Intk, TB_NO_REVERSE}, - {X86::VFNMSUB132SSZr_Intkz, X86::VFNMSUB132SSZm_Intkz, TB_NO_REVERSE}, + {X86::VFNMSUB132SDZrk_Int, X86::VFNMSUB132SDZmk_Int, TB_NO_REVERSE}, + {X86::VFNMSUB132SDZrkz_Int, X86::VFNMSUB132SDZmkz_Int, TB_NO_REVERSE}, + {X86::VFNMSUB132SHZrk_Int, X86::VFNMSUB132SHZmk_Int, TB_NO_REVERSE}, + {X86::VFNMSUB132SHZrkz_Int, X86::VFNMSUB132SHZmkz_Int, TB_NO_REVERSE}, + {X86::VFNMSUB132SSZrk_Int, X86::VFNMSUB132SSZmk_Int, TB_NO_REVERSE}, + {X86::VFNMSUB132SSZrkz_Int, X86::VFNMSUB132SSZmkz_Int, TB_NO_REVERSE}, {X86::VFNMSUB213NEPBF16Z128rk, X86::VFNMSUB213NEPBF16Z128mk, 0}, {X86::VFNMSUB213NEPBF16Z128rkz, X86::VFNMSUB213NEPBF16Z128mkz, 0}, {X86::VFNMSUB213NEPBF16Z256rk, X86::VFNMSUB213NEPBF16Z256mk, 0}, @@ -6648,12 +6648,12 @@ static const X86FoldTableEntry Table4[] = { {X86::VFNMSUB213PSZ256rkz, X86::VFNMSUB213PSZ256mkz, 0}, {X86::VFNMSUB213PSZrk, X86::VFNMSUB213PSZmk, 0}, {X86::VFNMSUB213PSZrkz, X86::VFNMSUB213PSZmkz, 0}, - {X86::VFNMSUB213SDZr_Intk, X86::VFNMSUB213SDZm_Intk, TB_NO_REVERSE}, - {X86::VFNMSUB213SDZr_Intkz, X86::VFNMSUB213SDZm_Intkz, TB_NO_REVERSE}, - {X86::VFNMSUB213SHZr_Intk, X86::VFNMSUB213SHZm_Intk, TB_NO_REVERSE}, - {X86::VFNMSUB213SHZr_Intkz, X86::VFNMSUB213SHZm_Intkz, TB_NO_REVERSE}, - {X86::VFNMSUB213SSZr_Intk, X86::VFNMSUB213SSZm_Intk, TB_NO_REVERSE}, - {X86::VFNMSUB213SSZr_Intkz, X86::VFNMSUB213SSZm_Intkz, TB_NO_REVERSE}, + {X86::VFNMSUB213SDZrk_Int, X86::VFNMSUB213SDZmk_Int, TB_NO_REVERSE}, + {X86::VFNMSUB213SDZrkz_Int, X86::VFNMSUB213SDZmkz_Int, TB_NO_REVERSE}, + {X86::VFNMSUB213SHZrk_Int, X86::VFNMSUB213SHZmk_Int, TB_NO_REVERSE}, + {X86::VFNMSUB213SHZrkz_Int, X86::VFNMSUB213SHZmkz_Int, TB_NO_REVERSE}, + {X86::VFNMSUB213SSZrk_Int, X86::VFNMSUB213SSZmk_Int, TB_NO_REVERSE}, + {X86::VFNMSUB213SSZrkz_Int, X86::VFNMSUB213SSZmkz_Int, TB_NO_REVERSE}, {X86::VFNMSUB231NEPBF16Z128rk, X86::VFNMSUB231NEPBF16Z128mk, 0}, {X86::VFNMSUB231NEPBF16Z128rkz, X86::VFNMSUB231NEPBF16Z128mkz, 0}, {X86::VFNMSUB231NEPBF16Z256rk, X86::VFNMSUB231NEPBF16Z256mk, 0}, @@ -6678,12 +6678,12 @@ static const X86FoldTableEntry Table4[] = { {X86::VFNMSUB231PSZ256rkz, X86::VFNMSUB231PSZ256mkz, 0}, {X86::VFNMSUB231PSZrk, X86::VFNMSUB231PSZmk, 0}, {X86::VFNMSUB231PSZrkz, X86::VFNMSUB231PSZmkz, 0}, - {X86::VFNMSUB231SDZr_Intk, X86::VFNMSUB231SDZm_Intk, TB_NO_REVERSE}, - {X86::VFNMSUB231SDZr_Intkz, X86::VFNMSUB231SDZm_Intkz, TB_NO_REVERSE}, - {X86::VFNMSUB231SHZr_Intk, X86::VFNMSUB231SHZm_Intk, TB_NO_REVERSE}, - {X86::VFNMSUB231SHZr_Intkz, X86::VFNMSUB231SHZm_Intkz, TB_NO_REVERSE}, - {X86::VFNMSUB231SSZr_Intk, X86::VFNMSUB231SSZm_Intk, TB_NO_REVERSE}, - {X86::VFNMSUB231SSZr_Intkz, X86::VFNMSUB231SSZm_Intkz, TB_NO_REVERSE}, + {X86::VFNMSUB231SDZrk_Int, X86::VFNMSUB231SDZmk_Int, TB_NO_REVERSE}, + {X86::VFNMSUB231SDZrkz_Int, X86::VFNMSUB231SDZmkz_Int, TB_NO_REVERSE}, + {X86::VFNMSUB231SHZrk_Int, X86::VFNMSUB231SHZmk_Int, TB_NO_REVERSE}, + {X86::VFNMSUB231SHZrkz_Int, X86::VFNMSUB231SHZmkz_Int, TB_NO_REVERSE}, + {X86::VFNMSUB231SSZrk_Int, X86::VFNMSUB231SSZmk_Int, TB_NO_REVERSE}, + {X86::VFNMSUB231SSZrkz_Int, X86::VFNMSUB231SSZmkz_Int, TB_NO_REVERSE}, {X86::VGETEXPSDZrk, X86::VGETEXPSDZmk, TB_NO_REVERSE}, {X86::VGETEXPSHZrk, X86::VGETEXPSHZmk, TB_NO_REVERSE}, {X86::VGETEXPSSZrk, X86::VGETEXPSSZmk, TB_NO_REVERSE}, @@ -6732,9 +6732,9 @@ static const X86FoldTableEntry Table4[] = { {X86::VMAXPSZ128rrk, X86::VMAXPSZ128rmk, 0}, {X86::VMAXPSZ256rrk, X86::VMAXPSZ256rmk, 0}, {X86::VMAXPSZrrk, X86::VMAXPSZrmk, 0}, - {X86::VMAXSDZrr_Intk, X86::VMAXSDZrm_Intk, TB_NO_REVERSE}, - {X86::VMAXSHZrr_Intk, X86::VMAXSHZrm_Intk, TB_NO_REVERSE}, - {X86::VMAXSSZrr_Intk, X86::VMAXSSZrm_Intk, TB_NO_REVERSE}, + {X86::VMAXSDZrrk_Int, X86::VMAXSDZrmk_Int, TB_NO_REVERSE}, + {X86::VMAXSHZrrk_Int, X86::VMAXSHZrmk_Int, TB_NO_REVERSE}, + {X86::VMAXSSZrrk_Int, X86::VMAXSSZrmk_Int, TB_NO_REVERSE}, {X86::VMINCPDZ128rrk, X86::VMINCPDZ128rmk, 0}, {X86::VMINCPDZ256rrk, X86::VMINCPDZ256rmk, 0}, {X86::VMINCPDZrrk, X86::VMINCPDZrmk, 0}, @@ -6756,9 +6756,9 @@ static const X86FoldTableEntry Table4[] = { {X86::VMINMAXPSZ128rrik, X86::VMINMAXPSZ128rmik, 0}, {X86::VMINMAXPSZ256rrik, X86::VMINMAXPSZ256rmik, 0}, {X86::VMINMAXPSZrrik, X86::VMINMAXPSZrmik, 0}, - {X86::VMINMAXSDrri_Intk, X86::VMINMAXSDrmi_Intk, TB_NO_REVERSE}, - {X86::VMINMAXSHrri_Intk, X86::VMINMAXSHrmi_Intk, TB_NO_REVERSE}, - {X86::VMINMAXSSrri_Intk, X86::VMINMAXSSrmi_Intk, TB_NO_REVERSE}, + {X86::VMINMAXSDrrik_Int, X86::VMINMAXSDrmik_Int, TB_NO_REVERSE}, + {X86::VMINMAXSHrrik_Int, X86::VMINMAXSHrmik_Int, TB_NO_REVERSE}, + {X86::VMINMAXSSrrik_Int, X86::VMINMAXSSrmik_Int, TB_NO_REVERSE}, {X86::VMINPBF16Z128rrk, X86::VMINPBF16Z128rmk, 0}, {X86::VMINPBF16Z256rrk, X86::VMINPBF16Z256rmk, 0}, {X86::VMINPBF16Zrrk, X86::VMINPBF16Zrmk, 0}, @@ -6771,9 +6771,9 @@ static const X86FoldTableEntry Table4[] = { {X86::VMINPSZ128rrk, X86::VMINPSZ128rmk, 0}, {X86::VMINPSZ256rrk, X86::VMINPSZ256rmk, 0}, {X86::VMINPSZrrk, X86::VMINPSZrmk, 0}, - {X86::VMINSDZrr_Intk, X86::VMINSDZrm_Intk, TB_NO_REVERSE}, - {X86::VMINSHZrr_Intk, X86::VMINSHZrm_Intk, TB_NO_REVERSE}, - {X86::VMINSSZrr_Intk, X86::VMINSSZrm_Intk, TB_NO_REVERSE}, + {X86::VMINSDZrrk_Int, X86::VMINSDZrmk_Int, TB_NO_REVERSE}, + {X86::VMINSHZrrk_Int, X86::VMINSHZrmk_Int, TB_NO_REVERSE}, + {X86::VMINSSZrrk_Int, X86::VMINSSZrmk_Int, TB_NO_REVERSE}, {X86::VMPSADBWZ128rrik, X86::VMPSADBWZ128rmik, 0}, {X86::VMPSADBWZ256rrik, X86::VMPSADBWZ256rmik, 0}, {X86::VMPSADBWZrrik, X86::VMPSADBWZrmik, 0}, @@ -6789,9 +6789,9 @@ static const X86FoldTableEntry Table4[] = { {X86::VMULPSZ128rrk, X86::VMULPSZ128rmk, 0}, {X86::VMULPSZ256rrk, X86::VMULPSZ256rmk, 0}, {X86::VMULPSZrrk, X86::VMULPSZrmk, 0}, - {X86::VMULSDZrr_Intk, X86::VMULSDZrm_Intk, TB_NO_REVERSE}, - {X86::VMULSHZrr_Intk, X86::VMULSHZrm_Intk, TB_NO_REVERSE}, - {X86::VMULSSZrr_Intk, X86::VMULSSZrm_Intk, TB_NO_REVERSE}, + {X86::VMULSDZrrk_Int, X86::VMULSDZrmk_Int, TB_NO_REVERSE}, + {X86::VMULSHZrrk_Int, X86::VMULSHZrmk_Int, TB_NO_REVERSE}, + {X86::VMULSSZrrk_Int, X86::VMULSSZrmk_Int, TB_NO_REVERSE}, {X86::VORPDZ128rrk, X86::VORPDZ128rmk, 0}, {X86::VORPDZ256rrk, X86::VORPDZ256rmk, 0}, {X86::VORPDZrrk, X86::VORPDZrmk, 0}, @@ -7347,9 +7347,9 @@ static const X86FoldTableEntry Table4[] = { {X86::VREDUCESDZrrik, X86::VREDUCESDZrmik, TB_NO_REVERSE}, {X86::VREDUCESHZrrik, X86::VREDUCESHZrmik, TB_NO_REVERSE}, {X86::VREDUCESSZrrik, X86::VREDUCESSZrmik, TB_NO_REVERSE}, - {X86::VRNDSCALESDZrri_Intk, X86::VRNDSCALESDZrmi_Intk, TB_NO_REVERSE}, - {X86::VRNDSCALESHZrri_Intk, X86::VRNDSCALESHZrmi_Intk, TB_NO_REVERSE}, - {X86::VRNDSCALESSZrri_Intk, X86::VRNDSCALESSZrmi_Intk, TB_NO_REVERSE}, + {X86::VRNDSCALESDZrrik_Int, X86::VRNDSCALESDZrmik_Int, TB_NO_REVERSE}, + {X86::VRNDSCALESHZrrik_Int, X86::VRNDSCALESHZrmik_Int, TB_NO_REVERSE}, + {X86::VRNDSCALESSZrrik_Int, X86::VRNDSCALESSZrmik_Int, TB_NO_REVERSE}, {X86::VRSQRT14SDZrrk, X86::VRSQRT14SDZrmk, TB_NO_REVERSE}, {X86::VRSQRT14SSZrrk, X86::VRSQRT14SSZrmk, TB_NO_REVERSE}, {X86::VRSQRT28SDZrk, X86::VRSQRT28SDZmk, TB_NO_REVERSE}, @@ -7384,9 +7384,9 @@ static const X86FoldTableEntry Table4[] = { {X86::VSHUFPSZ128rrik, X86::VSHUFPSZ128rmik, 0}, {X86::VSHUFPSZ256rrik, X86::VSHUFPSZ256rmik, 0}, {X86::VSHUFPSZrrik, X86::VSHUFPSZrmik, 0}, - {X86::VSQRTSDZr_Intk, X86::VSQRTSDZm_Intk, TB_NO_REVERSE}, - {X86::VSQRTSHZr_Intk, X86::VSQRTSHZm_Intk, TB_NO_REVERSE}, - {X86::VSQRTSSZr_Intk, X86::VSQRTSSZm_Intk, TB_NO_REVERSE}, + {X86::VSQRTSDZrk_Int, X86::VSQRTSDZmk_Int, TB_NO_REVERSE}, + {X86::VSQRTSHZrk_Int, X86::VSQRTSHZmk_Int, TB_NO_REVERSE}, + {X86::VSQRTSSZrk_Int, X86::VSQRTSSZmk_Int, TB_NO_REVERSE}, {X86::VSUBNEPBF16Z128rrk, X86::VSUBNEPBF16Z128rmk, 0}, {X86::VSUBNEPBF16Z256rrk, X86::VSUBNEPBF16Z256rmk, 0}, {X86::VSUBNEPBF16Zrrk, X86::VSUBNEPBF16Zrmk, 0}, @@ -7399,9 +7399,9 @@ static const X86FoldTableEntry Table4[] = { {X86::VSUBPSZ128rrk, X86::VSUBPSZ128rmk, 0}, {X86::VSUBPSZ256rrk, X86::VSUBPSZ256rmk, 0}, {X86::VSUBPSZrrk, X86::VSUBPSZrmk, 0}, - {X86::VSUBSDZrr_Intk, X86::VSUBSDZrm_Intk, TB_NO_REVERSE}, - {X86::VSUBSHZrr_Intk, X86::VSUBSHZrm_Intk, TB_NO_REVERSE}, - {X86::VSUBSSZrr_Intk, X86::VSUBSSZrm_Intk, TB_NO_REVERSE}, + {X86::VSUBSDZrrk_Int, X86::VSUBSDZrmk_Int, TB_NO_REVERSE}, + {X86::VSUBSHZrrk_Int, X86::VSUBSHZrmk_Int, TB_NO_REVERSE}, + {X86::VSUBSSZrrk_Int, X86::VSUBSSZrmk_Int, TB_NO_REVERSE}, {X86::VUNPCKHPDZ128rrk, X86::VUNPCKHPDZ128rmk, 0}, {X86::VUNPCKHPDZ256rrk, X86::VUNPCKHPDZ256rmk, 0}, {X86::VUNPCKHPDZrrk, X86::VUNPCKHPDZrmk, 0},