@@ -397,6 +397,26 @@ bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
397397 {X86::VMOVSDZrm, 1 , 64 , rebuildZeroUpperCst},
398398 {X86::VMOVDDUPZ128rm, 1 , 64 , rebuildSplatCst}},
399399 1 );
400+ case X86::VMOVAPDZ128rmk:
401+ case X86::VMOVUPDZ128rmk:
402+ return FixupConstant ({{X86::VMOVSDZrmk, 1 , 64 , rebuildZeroUpperCst},
403+ {X86::VMOVDDUPZ128rmk, 1 , 64 , rebuildSplatCst}},
404+ 3 );
405+ case X86::VMOVAPDZ128rmkz:
406+ case X86::VMOVUPDZ128rmkz:
407+ return FixupConstant ({{X86::VMOVSDZrmkz, 1 , 64 , rebuildZeroUpperCst},
408+ {X86::VMOVDDUPZ128rmkz, 1 , 64 , rebuildSplatCst}},
409+ 2 );
410+ case X86::VMOVAPSZ128rmk:
411+ case X86::VMOVUPSZ128rmk:
412+ return FixupConstant ({{X86::VMOVSSZrmk, 1 , 32 , rebuildZeroUpperCst},
413+ {X86::VBROADCASTSSZ128rmk, 1 , 32 , rebuildSplatCst}},
414+ 3 );
415+ case X86::VMOVAPSZ128rmkz:
416+ case X86::VMOVUPSZ128rmkz:
417+ return FixupConstant ({{X86::VMOVSSZrmkz, 1 , 32 , rebuildZeroUpperCst},
418+ {X86::VBROADCASTSSZ128rmkz, 1 , 32 , rebuildSplatCst}},
419+ 2 );
400420 case X86::VMOVAPDZ256rm:
401421 case X86::VMOVAPSZ256rm:
402422 case X86::VMOVUPDZ256rm:
@@ -406,6 +426,26 @@ bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
406426 {X86::VBROADCASTSDZ256rm, 1 , 64 , rebuildSplatCst},
407427 {X86::VBROADCASTF32X4Z256rm, 1 , 128 , rebuildSplatCst}},
408428 1 );
429+ case X86::VMOVAPDZ256rmk:
430+ case X86::VMOVUPDZ256rmk:
431+ return FixupConstant ({{X86::VBROADCASTSDZ256rmk, 1 , 64 , rebuildSplatCst}},
432+ 3 );
433+ case X86::VMOVAPDZ256rmkz:
434+ case X86::VMOVUPDZ256rmkz:
435+ return FixupConstant ({{X86::VBROADCASTSDZ256rmkz, 1 , 64 , rebuildSplatCst}},
436+ 2 );
437+ case X86::VMOVAPSZ256rmk:
438+ case X86::VMOVUPSZ256rmk:
439+ return FixupConstant (
440+ {{X86::VBROADCASTSSZ256rmk, 1 , 32 , rebuildSplatCst},
441+ {X86::VBROADCASTF32X4Z256rmk, 1 , 128 , rebuildSplatCst}},
442+ 3 );
443+ case X86::VMOVAPSZ256rmkz:
444+ case X86::VMOVUPSZ256rmkz:
445+ return FixupConstant (
446+ {{X86::VBROADCASTSSZ256rmkz, 1 , 32 , rebuildSplatCst},
447+ {X86::VBROADCASTF32X4Z256rmkz, 1 , 128 , rebuildSplatCst}},
448+ 2 );
409449 case X86::VMOVAPDZrm:
410450 case X86::VMOVAPSZrm:
411451 case X86::VMOVUPDZrm:
@@ -415,6 +455,26 @@ bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
415455 {X86::VBROADCASTF32X4rm, 1 , 128 , rebuildSplatCst},
416456 {X86::VBROADCASTF64X4rm, 1 , 256 , rebuildSplatCst}},
417457 1 );
458+ case X86::VMOVAPDZrmk:
459+ case X86::VMOVUPDZrmk:
460+ return FixupConstant ({{X86::VBROADCASTSDZrmk, 1 , 64 , rebuildSplatCst},
461+ {X86::VBROADCASTF64X4rmk, 1 , 256 , rebuildSplatCst}},
462+ 3 );
463+ case X86::VMOVAPDZrmkz:
464+ case X86::VMOVUPDZrmkz:
465+ return FixupConstant ({{X86::VBROADCASTSDZrmkz, 1 , 64 , rebuildSplatCst},
466+ {X86::VBROADCASTF64X4rmkz, 1 , 256 , rebuildSplatCst}},
467+ 2 );
468+ case X86::VMOVAPSZrmk:
469+ case X86::VMOVUPSZrmk:
470+ return FixupConstant ({{X86::VBROADCASTSSZrmk, 1 , 32 , rebuildSplatCst},
471+ {X86::VBROADCASTF32X4rmk, 1 , 128 , rebuildSplatCst}},
472+ 3 );
473+ case X86::VMOVAPSZrmkz:
474+ case X86::VMOVUPSZrmkz:
475+ return FixupConstant ({{X86::VBROADCASTSSZrmkz, 1 , 32 , rebuildSplatCst},
476+ {X86::VBROADCASTF32X4rmkz, 1 , 128 , rebuildSplatCst}},
477+ 2 );
418478 /* Integer Loads */
419479 case X86::MOVDQArm:
420480 case X86::MOVDQUrm: {
@@ -510,6 +570,42 @@ bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
510570 {X86::VPMOVZXDQZ128rm, 2 , 32 , rebuildZExtCst}};
511571 return FixupConstant (Fixups, 1 );
512572 }
573+ case X86::VMOVDQA32Z128rmk:
574+ case X86::VMOVDQU32Z128rmk:
575+ return FixupConstant ({{X86::VPBROADCASTDZ128rmk, 1 , 32 , rebuildSplatCst},
576+ {X86::VPMOVSXBDZ128rmk, 4 , 8 , rebuildSExtCst},
577+ {X86::VPMOVZXBDZ128rmk, 4 , 8 , rebuildZExtCst},
578+ {X86::VPMOVSXWDZ128rmk, 4 , 16 , rebuildSExtCst},
579+ {X86::VPMOVZXWDZ128rmk, 4 , 16 , rebuildZExtCst}},
580+ 3 );
581+ case X86::VMOVDQA32Z128rmkz:
582+ case X86::VMOVDQU32Z128rmkz:
583+ return FixupConstant ({{X86::VPBROADCASTDZ128rmkz, 1 , 32 , rebuildSplatCst},
584+ {X86::VPMOVSXBDZ128rmkz, 4 , 8 , rebuildSExtCst},
585+ {X86::VPMOVZXBDZ128rmkz, 4 , 8 , rebuildZExtCst},
586+ {X86::VPMOVSXWDZ128rmkz, 4 , 16 , rebuildSExtCst},
587+ {X86::VPMOVZXWDZ128rmkz, 4 , 16 , rebuildZExtCst}},
588+ 2 );
589+ case X86::VMOVDQA64Z128rmk:
590+ case X86::VMOVDQU64Z128rmk:
591+ return FixupConstant ({{X86::VPMOVSXBQZ128rmk, 2 , 8 , rebuildSExtCst},
592+ {X86::VPMOVZXBQZ128rmk, 2 , 8 , rebuildZExtCst},
593+ {X86::VPMOVSXWQZ128rmk, 2 , 16 , rebuildSExtCst},
594+ {X86::VPMOVZXWQZ128rmk, 2 , 16 , rebuildZExtCst},
595+ {X86::VPBROADCASTQZ128rmk, 1 , 64 , rebuildSplatCst},
596+ {X86::VPMOVSXDQZ128rmk, 2 , 32 , rebuildSExtCst},
597+ {X86::VPMOVZXDQZ128rmk, 2 , 32 , rebuildZExtCst}},
598+ 3 );
599+ case X86::VMOVDQA64Z128rmkz:
600+ case X86::VMOVDQU64Z128rmkz:
601+ return FixupConstant ({{X86::VPMOVSXBQZ128rmkz, 2 , 8 , rebuildSExtCst},
602+ {X86::VPMOVZXBQZ128rmkz, 2 , 8 , rebuildZExtCst},
603+ {X86::VPMOVSXWQZ128rmkz, 2 , 16 , rebuildSExtCst},
604+ {X86::VPMOVZXWQZ128rmkz, 2 , 16 , rebuildZExtCst},
605+ {X86::VPBROADCASTQZ128rmkz, 1 , 64 , rebuildSplatCst},
606+ {X86::VPMOVSXDQZ128rmkz, 2 , 32 , rebuildSExtCst},
607+ {X86::VPMOVZXDQZ128rmkz, 2 , 32 , rebuildZExtCst}},
608+ 2 );
513609 case X86::VMOVDQA32Z256rm:
514610 case X86::VMOVDQA64Z256rm:
515611 case X86::VMOVDQU32Z256rm:
@@ -534,6 +630,46 @@ bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
534630 {X86::VPMOVZXDQZ256rm, 4 , 32 , rebuildZExtCst}};
535631 return FixupConstant (Fixups, 1 );
536632 }
633+ case X86::VMOVDQA32Z256rmk:
634+ case X86::VMOVDQU32Z256rmk:
635+ return FixupConstant (
636+ {{X86::VPBROADCASTDZ256rmk, 1 , 32 , rebuildSplatCst},
637+ {X86::VPMOVSXBDZ256rmk, 8 , 8 , rebuildSExtCst},
638+ {X86::VPMOVZXBDZ256rmk, 8 , 8 , rebuildZExtCst},
639+ {X86::VBROADCASTI32X4Z256rmk, 1 , 128 , rebuildSplatCst},
640+ {X86::VPMOVSXWDZ256rmk, 8 , 16 , rebuildSExtCst},
641+ {X86::VPMOVZXWDZ256rmk, 8 , 16 , rebuildZExtCst}},
642+ 3 );
643+ case X86::VMOVDQA32Z256rmkz:
644+ case X86::VMOVDQU32Z256rmkz:
645+ return FixupConstant (
646+ {{X86::VPBROADCASTDZ256rmkz, 1 , 32 , rebuildSplatCst},
647+ {X86::VPMOVSXBDZ256rmkz, 8 , 8 , rebuildSExtCst},
648+ {X86::VPMOVZXBDZ256rmkz, 8 , 8 , rebuildZExtCst},
649+ {X86::VBROADCASTI32X4Z256rmkz, 1 , 128 , rebuildSplatCst},
650+ {X86::VPMOVSXWDZ256rmkz, 8 , 16 , rebuildSExtCst},
651+ {X86::VPMOVZXWDZ256rmkz, 8 , 16 , rebuildZExtCst}},
652+ 2 );
653+ case X86::VMOVDQA64Z256rmk:
654+ case X86::VMOVDQU64Z256rmk:
655+ return FixupConstant ({{X86::VPMOVSXBQZ256rmk, 4 , 8 , rebuildSExtCst},
656+ {X86::VPMOVZXBQZ256rmk, 4 , 8 , rebuildZExtCst},
657+ {X86::VPBROADCASTQZ256rmk, 1 , 64 , rebuildSplatCst},
658+ {X86::VPMOVSXWQZ256rmk, 4 , 16 , rebuildSExtCst},
659+ {X86::VPMOVZXWQZ256rmk, 4 , 16 , rebuildZExtCst},
660+ {X86::VPMOVSXDQZ256rmk, 4 , 32 , rebuildSExtCst},
661+ {X86::VPMOVZXDQZ256rmk, 4 , 32 , rebuildZExtCst}},
662+ 3 );
663+ case X86::VMOVDQA64Z256rmkz:
664+ case X86::VMOVDQU64Z256rmkz:
665+ return FixupConstant ({{X86::VPMOVSXBQZ256rmkz, 4 , 8 , rebuildSExtCst},
666+ {X86::VPMOVZXBQZ256rmkz, 4 , 8 , rebuildZExtCst},
667+ {X86::VPBROADCASTQZ256rmkz, 1 , 64 , rebuildSplatCst},
668+ {X86::VPMOVSXWQZ256rmkz, 4 , 16 , rebuildSExtCst},
669+ {X86::VPMOVZXWQZ256rmkz, 4 , 16 , rebuildZExtCst},
670+ {X86::VPMOVSXDQZ256rmkz, 4 , 32 , rebuildSExtCst},
671+ {X86::VPMOVZXDQZ256rmkz, 4 , 32 , rebuildZExtCst}},
672+ 2 );
537673 case X86::VMOVDQA32Zrm:
538674 case X86::VMOVDQA64Zrm:
539675 case X86::VMOVDQU32Zrm:
@@ -559,41 +695,87 @@ bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
559695 {X86::VPMOVZXDQZrm, 8 , 32 , rebuildZExtCst}};
560696 return FixupConstant (Fixups, 1 );
561697 }
698+ case X86::VMOVDQA32Zrmk:
699+ case X86::VMOVDQU32Zrmk:
700+ return FixupConstant ({{X86::VPBROADCASTDZrmk, 1 , 32 , rebuildSplatCst},
701+ {X86::VBROADCASTI32X4rmk, 1 , 128 , rebuildSplatCst},
702+ {X86::VPMOVSXBDZrmk, 16 , 8 , rebuildSExtCst},
703+ {X86::VPMOVZXBDZrmk, 16 , 8 , rebuildZExtCst},
704+ {X86::VPMOVSXWDZrmk, 16 , 16 , rebuildSExtCst},
705+ {X86::VPMOVZXWDZrmk, 16 , 16 , rebuildZExtCst}},
706+ 3 );
707+ case X86::VMOVDQA32Zrmkz:
708+ case X86::VMOVDQU32Zrmkz:
709+ return FixupConstant ({{X86::VPBROADCASTDZrmkz, 1 , 32 , rebuildSplatCst},
710+ {X86::VBROADCASTI32X4rmkz, 1 , 128 , rebuildSplatCst},
711+ {X86::VPMOVSXBDZrmkz, 16 , 8 , rebuildSExtCst},
712+ {X86::VPMOVZXBDZrmkz, 16 , 8 , rebuildZExtCst},
713+ {X86::VPMOVSXWDZrmkz, 16 , 16 , rebuildSExtCst},
714+ {X86::VPMOVZXWDZrmkz, 16 , 16 , rebuildZExtCst}},
715+ 2 );
716+ case X86::VMOVDQA64Zrmk:
717+ case X86::VMOVDQU64Zrmk:
718+ return FixupConstant ({{X86::VPBROADCASTQZrmk, 1 , 64 , rebuildSplatCst},
719+ {X86::VPMOVSXBQZrmk, 8 , 8 , rebuildSExtCst},
720+ {X86::VPMOVZXBQZrmk, 8 , 8 , rebuildZExtCst},
721+ {X86::VPMOVSXWQZrmk, 8 , 16 , rebuildSExtCst},
722+ {X86::VPMOVZXWQZrmk, 8 , 16 , rebuildZExtCst},
723+ {X86::VBROADCASTI64X4rmk, 1 , 256 , rebuildSplatCst},
724+ {X86::VPMOVSXDQZrmk, 8 , 32 , rebuildSExtCst},
725+ {X86::VPMOVZXDQZrmk, 8 , 32 , rebuildZExtCst}},
726+ 3 );
727+ case X86::VMOVDQA64Zrmkz:
728+ case X86::VMOVDQU64Zrmkz:
729+ return FixupConstant ({{X86::VPBROADCASTQZrmkz, 1 , 64 , rebuildSplatCst},
730+ {X86::VPMOVSXBQZrmkz, 8 , 8 , rebuildSExtCst},
731+ {X86::VPMOVZXBQZrmkz, 8 , 8 , rebuildZExtCst},
732+ {X86::VPMOVSXWQZrmkz, 8 , 16 , rebuildSExtCst},
733+ {X86::VPMOVZXWQZrmkz, 8 , 16 , rebuildZExtCst},
734+ {X86::VBROADCASTI64X4rmkz, 1 , 256 , rebuildSplatCst},
735+ {X86::VPMOVSXDQZrmkz, 8 , 32 , rebuildSExtCst},
736+ {X86::VPMOVZXDQZrmkz, 8 , 32 , rebuildZExtCst}},
737+ 2 );
562738 }
563739
564- auto ConvertToBroadcastAVX512 = [&](unsigned OpSrc32, unsigned OpSrc64) {
565- unsigned OpBcst32 = 0 , OpBcst64 = 0 ;
566- unsigned OpNoBcst32 = 0 , OpNoBcst64 = 0 ;
740+ auto ConvertToBroadcastAVX512 = [&](unsigned OpSrc16, unsigned OpSrc32,
741+ unsigned OpSrc64) {
742+ if (OpSrc16) {
743+ if (const X86FoldTableEntry *Mem2Bcst =
744+ llvm::lookupBroadcastFoldTableBySize (OpSrc16, 16 )) {
745+ unsigned OpBcst16 = Mem2Bcst->DstOp ;
746+ unsigned OpNoBcst16 = Mem2Bcst->Flags & TB_INDEX_MASK;
747+ FixupEntry Fixups[] = {{(int )OpBcst16, 1 , 16 , rebuildSplatCst}};
748+ if (FixupConstant (Fixups, OpNoBcst16))
749+ return true ;
750+ }
751+ }
567752 if (OpSrc32) {
568753 if (const X86FoldTableEntry *Mem2Bcst =
569754 llvm::lookupBroadcastFoldTableBySize (OpSrc32, 32 )) {
570- OpBcst32 = Mem2Bcst->DstOp ;
571- OpNoBcst32 = Mem2Bcst->Flags & TB_INDEX_MASK;
755+ unsigned OpBcst32 = Mem2Bcst->DstOp ;
756+ unsigned OpNoBcst32 = Mem2Bcst->Flags & TB_INDEX_MASK;
757+ FixupEntry Fixups[] = {{(int )OpBcst32, 1 , 32 , rebuildSplatCst}};
758+ if (FixupConstant (Fixups, OpNoBcst32))
759+ return true ;
572760 }
573761 }
574762 if (OpSrc64) {
575763 if (const X86FoldTableEntry *Mem2Bcst =
576764 llvm::lookupBroadcastFoldTableBySize (OpSrc64, 64 )) {
577- OpBcst64 = Mem2Bcst->DstOp ;
578- OpNoBcst64 = Mem2Bcst->Flags & TB_INDEX_MASK;
765+ unsigned OpBcst64 = Mem2Bcst->DstOp ;
766+ unsigned OpNoBcst64 = Mem2Bcst->Flags & TB_INDEX_MASK;
767+ FixupEntry Fixups[] = {{(int )OpBcst64, 1 , 64 , rebuildSplatCst}};
768+ if (FixupConstant (Fixups, OpNoBcst64))
769+ return true ;
579770 }
580771 }
581- assert (((OpBcst32 == 0 ) || (OpBcst64 == 0 ) || (OpNoBcst32 == OpNoBcst64)) &&
582- " OperandNo mismatch" );
583-
584- if (OpBcst32 || OpBcst64) {
585- unsigned OpNo = OpBcst32 == 0 ? OpNoBcst64 : OpNoBcst32;
586- FixupEntry Fixups[] = {{(int )OpBcst32, 32 , 32 , rebuildSplatCst},
587- {(int )OpBcst64, 64 , 64 , rebuildSplatCst}};
588- return FixupConstant (Fixups, OpNo);
589- }
590772 return false ;
591773 };
592774
593775 // Attempt to find a AVX512 mapping from a full width memory-fold instruction
594776 // to a broadcast-fold instruction variant.
595777 if ((MI.getDesc ().TSFlags & X86II::EncodingMask) == X86II::EVEX)
596- return ConvertToBroadcastAVX512 (Opc, Opc);
778+ return ConvertToBroadcastAVX512 (Opc, Opc, Opc );
597779
598780 // Reverse the X86InstrInfo::setExecutionDomainCustom EVEX->VEX logic
599781 // conversion to see if we can convert to a broadcasted (integer) logic op.
@@ -650,7 +832,7 @@ bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
650832 break ;
651833 }
652834 if (OpSrc32 || OpSrc64)
653- return ConvertToBroadcastAVX512 (OpSrc32, OpSrc64);
835+ return ConvertToBroadcastAVX512 (0 , OpSrc32, OpSrc64);
654836 }
655837
656838 return false ;
0 commit comments