Skip to content

Commit ac9626a

Browse files
author
git apple-llvm automerger
committed
Merge commit 'd04b6dadb65f' from llvm.org/main into next
2 parents 20f3f5b + d04b6da commit ac9626a

File tree

10 files changed

+689
-689
lines changed

10 files changed

+689
-689
lines changed

llvm/lib/Target/X86/X86InstrAVX512.td

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -12404,22 +12404,22 @@ multiclass VNNI_rmb<bits<8> Op, string OpStr, SDNode OpNode,
1240412404
X86FoldableSchedWrite sched, X86VectorVTInfo VTI,
1240512405
bit IsCommutable> {
1240612406
let ExeDomain = VTI.ExeDomain in {
12407-
defm r : AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst),
12407+
defm rr : AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst),
1240812408
(ins VTI.RC:$src2, VTI.RC:$src3), OpStr,
1240912409
"$src3, $src2", "$src2, $src3",
1241012410
(VTI.VT (OpNode VTI.RC:$src1,
1241112411
VTI.RC:$src2, VTI.RC:$src3)),
1241212412
IsCommutable, IsCommutable>,
1241312413
EVEX, VVVV, T8, Sched<[sched]>;
12414-
defm m : AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
12414+
defm rm : AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
1241512415
(ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
1241612416
"$src3, $src2", "$src2, $src3",
1241712417
(VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
1241812418
(VTI.VT (VTI.LdFrag addr:$src3))))>,
1241912419
EVEX, VVVV, EVEX_CD8<32, CD8VF>, T8,
1242012420
Sched<[sched.Folded, sched.ReadAfterFold,
1242112421
sched.ReadAfterFold]>;
12422-
defm mb : AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
12422+
defm rmb : AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
1242312423
(ins VTI.RC:$src2, VTI.ScalarMemOp:$src3),
1242412424
OpStr, "${src3}"#VTI.BroadcastStr#", $src2",
1242512425
"$src2, ${src3}"#VTI.BroadcastStr,
@@ -12459,24 +12459,24 @@ defm VPDPWSSDS : VNNI_common<0x53, "vpdpwssds", X86Vpdpwssds, SchedWriteVecIMul
1245912459
let Predicates = [HasVNNI] in {
1246012460
def : Pat<(v16i32 (add VR512:$src1,
1246112461
(X86vpmaddwd_su VR512:$src2, VR512:$src3))),
12462-
(VPDPWSSDZr VR512:$src1, VR512:$src2, VR512:$src3)>;
12462+
(VPDPWSSDZrr VR512:$src1, VR512:$src2, VR512:$src3)>;
1246312463
def : Pat<(v16i32 (add VR512:$src1,
1246412464
(X86vpmaddwd_su VR512:$src2, (load addr:$src3)))),
12465-
(VPDPWSSDZm VR512:$src1, VR512:$src2, addr:$src3)>;
12465+
(VPDPWSSDZrm VR512:$src1, VR512:$src2, addr:$src3)>;
1246612466
}
1246712467
let Predicates = [HasVNNI,HasVLX] in {
1246812468
def : Pat<(v8i32 (add VR256X:$src1,
1246912469
(X86vpmaddwd_su VR256X:$src2, VR256X:$src3))),
12470-
(VPDPWSSDZ256r VR256X:$src1, VR256X:$src2, VR256X:$src3)>;
12470+
(VPDPWSSDZ256rr VR256X:$src1, VR256X:$src2, VR256X:$src3)>;
1247112471
def : Pat<(v8i32 (add VR256X:$src1,
1247212472
(X86vpmaddwd_su VR256X:$src2, (load addr:$src3)))),
12473-
(VPDPWSSDZ256m VR256X:$src1, VR256X:$src2, addr:$src3)>;
12473+
(VPDPWSSDZ256rm VR256X:$src1, VR256X:$src2, addr:$src3)>;
1247412474
def : Pat<(v4i32 (add VR128X:$src1,
1247512475
(X86vpmaddwd_su VR128X:$src2, VR128X:$src3))),
12476-
(VPDPWSSDZ128r VR128X:$src1, VR128X:$src2, VR128X:$src3)>;
12476+
(VPDPWSSDZ128rr VR128X:$src1, VR128X:$src2, VR128X:$src3)>;
1247712477
def : Pat<(v4i32 (add VR128X:$src1,
1247812478
(X86vpmaddwd_su VR128X:$src2, (load addr:$src3)))),
12479-
(VPDPWSSDZ128m VR128X:$src1, VR128X:$src2, addr:$src3)>;
12479+
(VPDPWSSDZ128rm VR128X:$src1, VR128X:$src2, addr:$src3)>;
1248012480
}
1248112481

1248212482
//===----------------------------------------------------------------------===//

llvm/lib/Target/X86/X86InstrInfo.cpp

Lines changed: 87 additions & 87 deletions
Original file line numberDiff line numberDiff line change
@@ -2939,78 +2939,78 @@ bool X86InstrInfo::findCommutedOpIndices(const MachineInstr &MI,
29392939
case X86::VPDPBUUDSYrr:
29402940
case X86::VPDPBUUDrr:
29412941
case X86::VPDPBUUDYrr:
2942-
case X86::VPDPBSSDSZ128r:
2943-
case X86::VPDPBSSDSZ128rk:
2944-
case X86::VPDPBSSDSZ128rkz:
2945-
case X86::VPDPBSSDSZ256r:
2946-
case X86::VPDPBSSDSZ256rk:
2947-
case X86::VPDPBSSDSZ256rkz:
2948-
case X86::VPDPBSSDSZr:
2949-
case X86::VPDPBSSDSZrk:
2950-
case X86::VPDPBSSDSZrkz:
2951-
case X86::VPDPBSSDZ128r:
2952-
case X86::VPDPBSSDZ128rk:
2953-
case X86::VPDPBSSDZ128rkz:
2954-
case X86::VPDPBSSDZ256r:
2955-
case X86::VPDPBSSDZ256rk:
2956-
case X86::VPDPBSSDZ256rkz:
2957-
case X86::VPDPBSSDZr:
2958-
case X86::VPDPBSSDZrk:
2959-
case X86::VPDPBSSDZrkz:
2960-
case X86::VPDPBUUDSZ128r:
2961-
case X86::VPDPBUUDSZ128rk:
2962-
case X86::VPDPBUUDSZ128rkz:
2963-
case X86::VPDPBUUDSZ256r:
2964-
case X86::VPDPBUUDSZ256rk:
2965-
case X86::VPDPBUUDSZ256rkz:
2966-
case X86::VPDPBUUDSZr:
2967-
case X86::VPDPBUUDSZrk:
2968-
case X86::VPDPBUUDSZrkz:
2969-
case X86::VPDPBUUDZ128r:
2970-
case X86::VPDPBUUDZ128rk:
2971-
case X86::VPDPBUUDZ128rkz:
2972-
case X86::VPDPBUUDZ256r:
2973-
case X86::VPDPBUUDZ256rk:
2974-
case X86::VPDPBUUDZ256rkz:
2975-
case X86::VPDPBUUDZr:
2976-
case X86::VPDPBUUDZrk:
2977-
case X86::VPDPBUUDZrkz:
2978-
case X86::VPDPWSSDZ128r:
2979-
case X86::VPDPWSSDZ128rk:
2980-
case X86::VPDPWSSDZ128rkz:
2981-
case X86::VPDPWSSDZ256r:
2982-
case X86::VPDPWSSDZ256rk:
2983-
case X86::VPDPWSSDZ256rkz:
2984-
case X86::VPDPWSSDZr:
2985-
case X86::VPDPWSSDZrk:
2986-
case X86::VPDPWSSDZrkz:
2987-
case X86::VPDPWSSDSZ128r:
2988-
case X86::VPDPWSSDSZ128rk:
2989-
case X86::VPDPWSSDSZ128rkz:
2990-
case X86::VPDPWSSDSZ256r:
2991-
case X86::VPDPWSSDSZ256rk:
2992-
case X86::VPDPWSSDSZ256rkz:
2993-
case X86::VPDPWSSDSZr:
2994-
case X86::VPDPWSSDSZrk:
2995-
case X86::VPDPWSSDSZrkz:
2996-
case X86::VPDPWUUDZ128r:
2997-
case X86::VPDPWUUDZ128rk:
2998-
case X86::VPDPWUUDZ128rkz:
2999-
case X86::VPDPWUUDZ256r:
3000-
case X86::VPDPWUUDZ256rk:
3001-
case X86::VPDPWUUDZ256rkz:
3002-
case X86::VPDPWUUDZr:
3003-
case X86::VPDPWUUDZrk:
3004-
case X86::VPDPWUUDZrkz:
3005-
case X86::VPDPWUUDSZ128r:
3006-
case X86::VPDPWUUDSZ128rk:
3007-
case X86::VPDPWUUDSZ128rkz:
3008-
case X86::VPDPWUUDSZ256r:
3009-
case X86::VPDPWUUDSZ256rk:
3010-
case X86::VPDPWUUDSZ256rkz:
3011-
case X86::VPDPWUUDSZr:
3012-
case X86::VPDPWUUDSZrk:
3013-
case X86::VPDPWUUDSZrkz:
2942+
case X86::VPDPBSSDSZ128rr:
2943+
case X86::VPDPBSSDSZ128rrk:
2944+
case X86::VPDPBSSDSZ128rrkz:
2945+
case X86::VPDPBSSDSZ256rr:
2946+
case X86::VPDPBSSDSZ256rrk:
2947+
case X86::VPDPBSSDSZ256rrkz:
2948+
case X86::VPDPBSSDSZrr:
2949+
case X86::VPDPBSSDSZrrk:
2950+
case X86::VPDPBSSDSZrrkz:
2951+
case X86::VPDPBSSDZ128rr:
2952+
case X86::VPDPBSSDZ128rrk:
2953+
case X86::VPDPBSSDZ128rrkz:
2954+
case X86::VPDPBSSDZ256rr:
2955+
case X86::VPDPBSSDZ256rrk:
2956+
case X86::VPDPBSSDZ256rrkz:
2957+
case X86::VPDPBSSDZrr:
2958+
case X86::VPDPBSSDZrrk:
2959+
case X86::VPDPBSSDZrrkz:
2960+
case X86::VPDPBUUDSZ128rr:
2961+
case X86::VPDPBUUDSZ128rrk:
2962+
case X86::VPDPBUUDSZ128rrkz:
2963+
case X86::VPDPBUUDSZ256rr:
2964+
case X86::VPDPBUUDSZ256rrk:
2965+
case X86::VPDPBUUDSZ256rrkz:
2966+
case X86::VPDPBUUDSZrr:
2967+
case X86::VPDPBUUDSZrrk:
2968+
case X86::VPDPBUUDSZrrkz:
2969+
case X86::VPDPBUUDZ128rr:
2970+
case X86::VPDPBUUDZ128rrk:
2971+
case X86::VPDPBUUDZ128rrkz:
2972+
case X86::VPDPBUUDZ256rr:
2973+
case X86::VPDPBUUDZ256rrk:
2974+
case X86::VPDPBUUDZ256rrkz:
2975+
case X86::VPDPBUUDZrr:
2976+
case X86::VPDPBUUDZrrk:
2977+
case X86::VPDPBUUDZrrkz:
2978+
case X86::VPDPWSSDZ128rr:
2979+
case X86::VPDPWSSDZ128rrk:
2980+
case X86::VPDPWSSDZ128rrkz:
2981+
case X86::VPDPWSSDZ256rr:
2982+
case X86::VPDPWSSDZ256rrk:
2983+
case X86::VPDPWSSDZ256rrkz:
2984+
case X86::VPDPWSSDZrr:
2985+
case X86::VPDPWSSDZrrk:
2986+
case X86::VPDPWSSDZrrkz:
2987+
case X86::VPDPWSSDSZ128rr:
2988+
case X86::VPDPWSSDSZ128rrk:
2989+
case X86::VPDPWSSDSZ128rrkz:
2990+
case X86::VPDPWSSDSZ256rr:
2991+
case X86::VPDPWSSDSZ256rrk:
2992+
case X86::VPDPWSSDSZ256rrkz:
2993+
case X86::VPDPWSSDSZrr:
2994+
case X86::VPDPWSSDSZrrk:
2995+
case X86::VPDPWSSDSZrrkz:
2996+
case X86::VPDPWUUDZ128rr:
2997+
case X86::VPDPWUUDZ128rrk:
2998+
case X86::VPDPWUUDZ128rrkz:
2999+
case X86::VPDPWUUDZ256rr:
3000+
case X86::VPDPWUUDZ256rrk:
3001+
case X86::VPDPWUUDZ256rrkz:
3002+
case X86::VPDPWUUDZrr:
3003+
case X86::VPDPWUUDZrrk:
3004+
case X86::VPDPWUUDZrrkz:
3005+
case X86::VPDPWUUDSZ128rr:
3006+
case X86::VPDPWUUDSZ128rrk:
3007+
case X86::VPDPWUUDSZ128rrkz:
3008+
case X86::VPDPWUUDSZ256rr:
3009+
case X86::VPDPWUUDSZ256rrk:
3010+
case X86::VPDPWUUDSZ256rrkz:
3011+
case X86::VPDPWUUDSZrr:
3012+
case X86::VPDPWUUDSZrrk:
3013+
case X86::VPDPWUUDSZrrkz:
30143014
case X86::VPMADD52HUQrr:
30153015
case X86::VPMADD52HUQYrr:
30163016
case X86::VPMADD52HUQZ128r:
@@ -10822,15 +10822,15 @@ bool X86InstrInfo::getMachineCombinerPatterns(
1082210822
}
1082310823
break;
1082410824
}
10825-
case X86::VPDPWSSDZ128r:
10826-
case X86::VPDPWSSDZ128m:
10827-
case X86::VPDPWSSDZ256r:
10828-
case X86::VPDPWSSDZ256m:
10829-
case X86::VPDPWSSDZr:
10830-
case X86::VPDPWSSDZm: {
10831-
if (Subtarget.hasBWI() && !Subtarget.hasFastDPWSSD()) {
10832-
Patterns.push_back(X86MachineCombinerPattern::DPWSSD);
10833-
return true;
10825+
case X86::VPDPWSSDZ128rr:
10826+
case X86::VPDPWSSDZ128rm:
10827+
case X86::VPDPWSSDZ256rr:
10828+
case X86::VPDPWSSDZ256rm:
10829+
case X86::VPDPWSSDZrr:
10830+
case X86::VPDPWSSDZrm: {
10831+
if (Subtarget.hasBWI() && !Subtarget.hasFastDPWSSD()) {
10832+
Patterns.push_back(X86MachineCombinerPattern::DPWSSD);
10833+
return true;
1083410834
}
1083510835
break;
1083610836
}
@@ -10866,11 +10866,11 @@ genAlternativeDpCodeSequence(MachineInstr &Root, const TargetInstrInfo &TII,
1086610866
MaddOpc = X86::VPMADDWDrm;
1086710867
AddOpc = X86::VPADDDrr;
1086810868
break;
10869-
case X86::VPDPWSSDZ128r:
10869+
case X86::VPDPWSSDZ128rr:
1087010870
MaddOpc = X86::VPMADDWDZ128rr;
1087110871
AddOpc = X86::VPADDDZ128rr;
1087210872
break;
10873-
case X86::VPDPWSSDZ128m:
10873+
case X86::VPDPWSSDZ128rm:
1087410874
MaddOpc = X86::VPMADDWDZ128rm;
1087510875
AddOpc = X86::VPADDDZ128rr;
1087610876
break;
@@ -10886,23 +10886,23 @@ genAlternativeDpCodeSequence(MachineInstr &Root, const TargetInstrInfo &TII,
1088610886
MaddOpc = X86::VPMADDWDYrm;
1088710887
AddOpc = X86::VPADDDYrr;
1088810888
break;
10889-
case X86::VPDPWSSDZ256r:
10889+
case X86::VPDPWSSDZ256rr:
1089010890
MaddOpc = X86::VPMADDWDZ256rr;
1089110891
AddOpc = X86::VPADDDZ256rr;
1089210892
break;
10893-
case X86::VPDPWSSDZ256m:
10893+
case X86::VPDPWSSDZ256rm:
1089410894
MaddOpc = X86::VPMADDWDZ256rm;
1089510895
AddOpc = X86::VPADDDZ256rr;
1089610896
break;
1089710897
// vpdpwssd zmm2,zmm3,zmm1
1089810898
// -->
1089910899
// vpmaddwd zmm3,zmm3,zmm1
1090010900
// vpaddd zmm2,zmm2,zmm3
10901-
case X86::VPDPWSSDZr:
10901+
case X86::VPDPWSSDZrr:
1090210902
MaddOpc = X86::VPMADDWDZrr;
1090310903
AddOpc = X86::VPADDDZrr;
1090410904
break;
10905-
case X86::VPDPWSSDZm:
10905+
case X86::VPDPWSSDZrm:
1090610906
MaddOpc = X86::VPMADDWDZrm;
1090710907
AddOpc = X86::VPADDDZrr;
1090810908
break;

llvm/lib/Target/X86/X86ScheduleZnver4.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1567,7 +1567,7 @@ def Zn4WriteBUSDr_VPMADDr: SchedWriteRes<[Zn4FPFMisc01]> {
15671567
let NumMicroOps = 1;
15681568
}
15691569
def : InstRW<[Zn4WriteBUSDr_VPMADDr], (instregex
1570-
"VPDP(BU|WS)(S|P)(S|D|DS)(Z|Z128|Z256)(r|rk|rkz)",
1570+
"VPDP(BU|WS)(S|P)(S|D|DS)(Z?|Z128?|Z256?|Y?)r(r|rk|rkz)",
15711571
"VPMADD52(H|L)UQ(Z|Z128|Z256)(r|rk|rkz)"
15721572
)>;
15731573

0 commit comments

Comments
 (0)