@@ -294,20 +294,56 @@ bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
294294 case X86::VMOVUPSZ128rm:
295295 return ConvertToBroadcast (0 , 0 , X86::VMOVDDUPZ128rm,
296296 X86::VBROADCASTSSZ128rm, 0 , 0 , 1 );
297+ case X86::VMOVAPDZ128rmk:
298+ case X86::VMOVUPDZ128rmk:
299+ return ConvertToBroadcast (0 , 0 , X86::VMOVDDUPZ128rmk, 0 , 0 , 0 , 3 );
300+ case X86::VMOVAPSZ128rmk:
301+ case X86::VMOVUPSZ128rmk:
302+ return ConvertToBroadcast (0 , 0 , 0 , X86::VBROADCASTSSZ128rmk, 0 , 0 , 3 );
303+ case X86::VMOVAPDZ128rmkz:
304+ case X86::VMOVUPDZ128rmkz:
305+ return ConvertToBroadcast (0 , 0 , X86::VMOVDDUPZ128rmkz, 0 , 0 , 0 , 2 );
306+ case X86::VMOVAPSZ128rmkz:
307+ case X86::VMOVUPSZ128rmkz:
308+ return ConvertToBroadcast (0 , 0 , 0 , X86::VBROADCASTSSZ128rmkz, 0 , 0 , 2 );
297309 case X86::VMOVAPDZ256rm:
298310 case X86::VMOVAPSZ256rm:
299311 case X86::VMOVUPDZ256rm:
300312 case X86::VMOVUPSZ256rm:
301313 return ConvertToBroadcast (0 , X86::VBROADCASTF32X4Z256rm,
302314 X86::VBROADCASTSDZ256rm, X86::VBROADCASTSSZ256rm,
303315 0 , 0 , 1 );
316+ case X86::VMOVAPDZ256rmk:
317+ case X86::VMOVUPDZ256rmk:
318+ return ConvertToBroadcast (0 , 0 , X86::VBROADCASTSDZ256rmk, 0 , 0 , 0 , 3 );
319+ case X86::VMOVAPSZ256rmk:
320+ case X86::VMOVUPSZ256rmk:
321+ return ConvertToBroadcast (0 , 0 , 0 , X86::VBROADCASTSSZ256rmk, 0 , 0 , 3 );
322+ case X86::VMOVAPDZ256rmkz:
323+ case X86::VMOVUPDZ256rmkz:
324+ return ConvertToBroadcast (0 , 0 , X86::VBROADCASTSDZ256rmkz, 0 , 0 , 0 , 2 );
325+ case X86::VMOVAPSZ256rmkz:
326+ case X86::VMOVUPSZ256rmkz:
327+ return ConvertToBroadcast (0 , 0 , 0 , X86::VBROADCASTSSZ256rmkz, 0 , 0 , 2 );
304328 case X86::VMOVAPDZrm:
305329 case X86::VMOVAPSZrm:
306330 case X86::VMOVUPDZrm:
307331 case X86::VMOVUPSZrm:
308332 return ConvertToBroadcast (X86::VBROADCASTF64X4rm, X86::VBROADCASTF32X4rm,
309333 X86::VBROADCASTSDZrm, X86::VBROADCASTSSZrm, 0 , 0 ,
310334 1 );
335+ case X86::VMOVAPDZrmk:
336+ case X86::VMOVUPDZrmk:
337+ return ConvertToBroadcast (0 , 0 , X86::VBROADCASTSDZrmk, 0 , 0 , 0 , 3 );
338+ case X86::VMOVAPSZrmk:
339+ case X86::VMOVUPSZrmk:
340+ return ConvertToBroadcast (0 , 0 , 0 , X86::VBROADCASTSSZrmk, 0 , 0 , 3 );
341+ case X86::VMOVAPDZrmkz:
342+ case X86::VMOVUPDZrmkz:
343+ return ConvertToBroadcast (0 , 0 , X86::VBROADCASTSDZrmkz, 0 , 0 , 0 , 2 );
344+ case X86::VMOVAPSZrmkz:
345+ case X86::VMOVUPSZrmkz:
346+ return ConvertToBroadcast (0 , 0 , 0 , X86::VBROADCASTSSZrmkz, 0 , 0 , 2 );
311347 /* Integer Loads */
312348 case X86::VMOVDQArm:
313349 case X86::VMOVDQUrm:
@@ -332,6 +368,18 @@ bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
332368 X86::VPBROADCASTDZ128rm,
333369 HasBWI ? X86::VPBROADCASTWZ128rm : 0 ,
334370 HasBWI ? X86::VPBROADCASTBZ128rm : 0 , 1 );
371+ case X86::VMOVDQA32Z128rmk:
372+ case X86::VMOVDQU32Z128rmk:
373+ return ConvertToBroadcast (0 , 0 , 0 , X86::VPBROADCASTDZ128rmk, 0 , 0 , 3 );
374+ case X86::VMOVDQA32Z128rmkz:
375+ case X86::VMOVDQU32Z128rmkz:
376+ return ConvertToBroadcast (0 , 0 , 0 , X86::VPBROADCASTDZ128rmkz, 0 , 0 , 2 );
377+ case X86::VMOVDQA64Z128rmk:
378+ case X86::VMOVDQU64Z128rmk:
379+ return ConvertToBroadcast (0 , 0 , X86::VPBROADCASTQZ128rmk, 0 , 0 , 0 , 3 );
380+ case X86::VMOVDQA64Z128rmkz:
381+ case X86::VMOVDQU64Z128rmkz:
382+ return ConvertToBroadcast (0 , 0 , X86::VPBROADCASTQZ128rmkz, 0 , 0 , 0 , 2 );
335383 case X86::VMOVDQA32Z256rm:
336384 case X86::VMOVDQA64Z256rm:
337385 case X86::VMOVDQU32Z256rm:
@@ -340,6 +388,24 @@ bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
340388 X86::VPBROADCASTQZ256rm, X86::VPBROADCASTDZ256rm,
341389 HasBWI ? X86::VPBROADCASTWZ256rm : 0 ,
342390 HasBWI ? X86::VPBROADCASTBZ256rm : 0 , 1 );
391+ case X86::VMOVDQA32Z256rmk:
392+ case X86::VMOVDQU32Z256rmk:
393+ return ConvertToBroadcast (0 , X86::VBROADCASTI32X4Z256rmk,
394+ HasDQI ? X86::VBROADCASTI32X2Z256rmk : 0 ,
395+ X86::VPBROADCASTDZ256rmk, 0 , 0 , 3 );
396+ case X86::VMOVDQA32Z256rmkz:
397+ case X86::VMOVDQU32Z256rmkz:
398+ return ConvertToBroadcast (0 , X86::VBROADCASTI32X4Z256rmkz,
399+ HasDQI ? X86::VBROADCASTI32X2Z256rmkz : 0 ,
400+ X86::VPBROADCASTDZ256rmkz, 0 , 0 , 2 );
401+ case X86::VMOVDQA64Z256rmk:
402+ case X86::VMOVDQU64Z256rmk:
403+ return ConvertToBroadcast (0 , HasDQI ? X86::VBROADCASTI64X2Z128rmk : 0 ,
404+ X86::VPBROADCASTQZ256rmk, 0 , 0 , 0 , 3 );
405+ case X86::VMOVDQA64Z256rmkz:
406+ case X86::VMOVDQU64Z256rmkz:
407+ return ConvertToBroadcast (0 , HasDQI ? X86::VBROADCASTI64X2Z128rmkz : 0 ,
408+ X86::VPBROADCASTQZ256rmkz, 0 , 0 , 0 , 2 );
343409 case X86::VMOVDQA32Zrm:
344410 case X86::VMOVDQA64Zrm:
345411 case X86::VMOVDQU32Zrm:
@@ -348,39 +414,62 @@ bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
348414 X86::VPBROADCASTQZrm, X86::VPBROADCASTDZrm,
349415 HasBWI ? X86::VPBROADCASTWZrm : 0 ,
350416 HasBWI ? X86::VPBROADCASTBZrm : 0 , 1 );
417+ case X86::VMOVDQA32Zrmk:
418+ case X86::VMOVDQU32Zrmk:
419+ return ConvertToBroadcast (
420+ HasDQI ? X86::VBROADCASTI32X8rmk : 0 , X86::VBROADCASTI32X4rmk,
421+ HasDQI ? X86::VBROADCASTI32X2Zrmk : 0 , X86::VPBROADCASTDZrmk, 0 , 0 , 3 );
422+ case X86::VMOVDQA32Zrmkz:
423+ case X86::VMOVDQU32Zrmkz:
424+ return ConvertToBroadcast (HasDQI ? X86::VBROADCASTI32X8rmkz : 0 ,
425+ X86::VBROADCASTI32X4rmkz,
426+ HasDQI ? X86::VBROADCASTI32X2Zrmkz : 0 ,
427+ X86::VPBROADCASTDZrmkz, 0 , 0 , 2 );
428+ case X86::VMOVDQA64Zrmk:
429+ case X86::VMOVDQU64Zrmk:
430+ return ConvertToBroadcast (X86::VBROADCASTI64X4rmk,
431+ HasDQI ? X86::VBROADCASTI64X2rmk : 0 ,
432+ X86::VPBROADCASTQZrmk, 0 , 0 , 0 , 3 );
433+ case X86::VMOVDQA64Zrmkz:
434+ case X86::VMOVDQU64Zrmkz:
435+ return ConvertToBroadcast (X86::VBROADCASTI64X4rmkz,
436+ HasDQI ? X86::VBROADCASTI64X2rmkz : 0 ,
437+ X86::VPBROADCASTQZrmkz, 0 , 0 , 0 , 2 );
351438 }
352439
353- auto ConvertToBroadcastAVX512 = [&](unsigned OpSrc32, unsigned OpSrc64) {
354- unsigned OpBcst32 = 0 , OpBcst64 = 0 ;
355- unsigned OpNoBcst32 = 0 , OpNoBcst64 = 0 ;
440+ auto ConvertToBroadcastAVX512 = [&](unsigned OpSrc16, unsigned OpSrc32,
441+ unsigned OpSrc64) {
442+ if (OpSrc16) {
443+ if (const X86FoldTableEntry *Mem2Bcst =
444+ llvm::lookupBroadcastFoldTable (OpSrc16, 16 )) {
445+ if (ConvertToBroadcast (0 , 0 , 0 , 0 , Mem2Bcst->DstOp , 0 ,
446+ Mem2Bcst->Flags & TB_INDEX_MASK))
447+ return true ;
448+ }
449+ }
356450 if (OpSrc32) {
357451 if (const X86FoldTableEntry *Mem2Bcst =
358452 llvm::lookupBroadcastFoldTable (OpSrc32, 32 )) {
359- OpBcst32 = Mem2Bcst->DstOp ;
360- OpNoBcst32 = Mem2Bcst->Flags & TB_INDEX_MASK;
453+ if (ConvertToBroadcast (0 , 0 , 0 , Mem2Bcst->DstOp , 0 , 0 ,
454+ Mem2Bcst->Flags & TB_INDEX_MASK))
455+ return true ;
361456 }
362457 }
363458 if (OpSrc64) {
364459 if (const X86FoldTableEntry *Mem2Bcst =
365460 llvm::lookupBroadcastFoldTable (OpSrc64, 64 )) {
366- OpBcst64 = Mem2Bcst->DstOp ;
367- OpNoBcst64 = Mem2Bcst->Flags & TB_INDEX_MASK;
461+ if (ConvertToBroadcast (0 , 0 , Mem2Bcst->DstOp , 0 , 0 , 0 ,
462+ Mem2Bcst->Flags & TB_INDEX_MASK))
463+ return true ;
368464 }
369465 }
370- assert (((OpBcst32 == 0 ) || (OpBcst64 == 0 ) || (OpNoBcst32 == OpNoBcst64)) &&
371- " OperandNo mismatch" );
372-
373- if (OpBcst32 || OpBcst64) {
374- unsigned OpNo = OpBcst32 == 0 ? OpNoBcst64 : OpNoBcst32;
375- return ConvertToBroadcast (0 , 0 , OpBcst64, OpBcst32, 0 , 0 , OpNo);
376- }
377466 return false ;
378467 };
379468
380469 // Attempt to find a AVX512 mapping from a full width memory-fold instruction
381470 // to a broadcast-fold instruction variant.
382471 if ((MI.getDesc ().TSFlags & X86II::EncodingMask) == X86II::EVEX)
383- return ConvertToBroadcastAVX512 (Opc, Opc);
472+ return ConvertToBroadcastAVX512 (Opc, Opc, Opc );
384473
385474 // Reverse the X86InstrInfo::setExecutionDomainCustom EVEX->VEX logic
386475 // conversion to see if we can convert to a broadcasted (integer) logic op.
@@ -437,7 +526,7 @@ bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
437526 break ;
438527 }
439528 if (OpSrc32 || OpSrc64)
440- return ConvertToBroadcastAVX512 (OpSrc32, OpSrc64);
529+ return ConvertToBroadcastAVX512 (0 , OpSrc32, OpSrc64);
441530 }
442531
443532 return false ;
0 commit comments