@@ -318,25 +318,25 @@ bool GCNPreRAOptimizationsImpl::createListOfPackedInstr(
318318 int NumInst = 0 ;
319319
320320 auto E = BB->end ();
321- auto schedModel = TII->getSchedModel ();
322- const MCSchedClassDesc *schedClassDesc = schedModel .resolveSchedClass (&BeginMI);
323- const int NumMFMACycles = schedModel .getWriteProcResBegin (schedClassDesc )->ReleaseAtCycle ;
324- int totalCyclesBetweenCandidates = 0 ;
321+ auto SchedModel = TII->getSchedModel ();
322+ const MCSchedClassDesc *SchedClassDesc = SchedModel .resolveSchedClass (&BeginMI);
323+ const int NumMFMACycles = SchedModel .getWriteProcResBegin (SchedClassDesc )->ReleaseAtCycle ;
324+ int TotalCyclesBetweenCandidates = 0 ;
325325 for (auto I = std::next (BeginMI.getIterator ()); I != E; ++I) {
326326 MachineInstr &Instr = *I;
327- const MCSchedClassDesc *instrSchedClassDesc = schedModel .resolveSchedClass (&Instr);
328- totalCyclesBetweenCandidates += schedModel .getWriteProcResBegin (instrSchedClassDesc)->ReleaseAtCycle ;
327+ const MCSchedClassDesc *instrSchedClassDesc = SchedModel .resolveSchedClass (&Instr);
328+ TotalCyclesBetweenCandidates += SchedModel .getWriteProcResBegin (instrSchedClassDesc)->ReleaseAtCycle ;
329329 if (Instr.isMetaInstruction ())
330330 continue ;
331331
332332 if (Instr.isTerminator ())
333333 return false ;
334334
335- if (totalCyclesBetweenCandidates > NumMFMACycles)
335+ if (TotalCyclesBetweenCandidates > NumMFMACycles)
336336 return false ;
337337
338338 if ((isUnpackingSupportedInstr (Instr)) && TII->isNeverCoissue (Instr)) {
339- totalCyclesBetweenCandidates += 1 ;
339+ TotalCyclesBetweenCandidates += 1 ;
340340 instrsToUnpack.insert (&Instr);
341341 }
342342 }
@@ -411,10 +411,8 @@ SmallVector<MachineInstr *, 2> GCNPreRAOptimizationsImpl::insertUnpackedMI(
411411 if (isVreg_64) {
412412 Op0L_Op1L->getOperand (0 ).setIsUndef ();
413413 }
414- else {
415- if (I.getOperand (0 ).isUndef ()) {
416- Op0L_Op1L->getOperand (0 ).setIsUndef ();
417- }
414+ else if (I.getOperand (0 ).isUndef ()){
415+ Op0L_Op1L->getOperand (0 ).setIsUndef ();
418416 }
419417
420418 LIS->InsertMachineInstrInMaps (*Op0L_Op1L);
@@ -499,58 +497,37 @@ void GCNPreRAOptimizationsImpl::insertMI(MachineInstr &I) {
499497 TRI->getSubRegisterClass (Src0RC, AMDGPU::sub0);
500498 const TargetRegisterClass *SrcRC = TRI->getSubClassWithSubReg (Src0RC, 1 );
501499
502- if ((Src1RC->getID () == AMDGPU::SGPR_64RegClassID) ||
503- (Src0RC->getID () == AMDGPU::SGPR_64RegClassID)) {
504- if (Src1RC->getID () == AMDGPU::SGPR_64RegClassID) {
505- // try with sgpr32
506- SmallVector<MachineInstr *, 2 > copyInstrs = copyToVregAndInsertMI (I, 4 );
507- MachineInstr *CopySGPR1 = copyInstrs[0 ];
508- MachineInstr *CopySGPR2 = copyInstrs[1 ];
509-
510- if (DstRC->getID () == AMDGPU::VReg_64_Align2RegClassID) {
511- SmallVector<MachineInstr *, 2 > unpackedInstrs = insertUnpackedMI (
512- I, DstMO, SrcMO1, CopySGPR1->getOperand (0 ), SrcMO1,
513- CopySGPR2->getOperand (0 ), true );
514- unpackedInstrs[0 ]->addRegisterKilled (unpackedInstrs[0 ]->getOperand (2 ).getReg (), TRI);
515- unpackedInstrs[1 ]->addRegisterKilled (unpackedInstrs[1 ]->getOperand (2 ).getReg (), TRI);
516- } else {
517- SmallVector<MachineInstr *, 2 > unpackedInstrs = insertUnpackedMI (
518- I, DstMO, SrcMO1, CopySGPR1->getOperand (0 ), SrcMO1,
519- CopySGPR2->getOperand (0 ), false );
520- unpackedInstrs[0 ]->addRegisterKilled (unpackedInstrs[0 ]->getOperand (2 ).getReg (), TRI);
521- unpackedInstrs[1 ]->addRegisterKilled (unpackedInstrs[1 ]->getOperand (2 ).getReg (), TRI);
522- }
523- }
524- else {
525- SmallVector<MachineInstr *, 2 > copyInstrs = copyToVregAndInsertMI (I, 2 );
526- MachineInstr *CopySGPR1 = copyInstrs[0 ];
527- MachineInstr *CopySGPR2 = copyInstrs[1 ];
528-
529- if (DstRC->getID () == AMDGPU::VReg_64_Align2RegClassID) {
530- SmallVector<MachineInstr *, 2 > unpackedInstrs = insertUnpackedMI (
531- I, DstMO, CopySGPR1->getOperand (0 ), SrcMO2, CopySGPR2->getOperand (0 ), SrcMO2, true );
532- unpackedInstrs[0 ]->addRegisterKilled (unpackedInstrs[0 ]->getOperand (1 ).getReg (), TRI);
533- unpackedInstrs[1 ]->addRegisterKilled (unpackedInstrs[1 ]->getOperand (1 ).getReg (), TRI);
534- } else {
535- SmallVector<MachineInstr *, 2 > unpackedInstrs = insertUnpackedMI (
536- I, DstMO, CopySGPR1->getOperand (0 ), SrcMO2, CopySGPR2->getOperand (0 ), SrcMO2, false );
537- unpackedInstrs[0 ]->addRegisterKilled (unpackedInstrs[0 ]->getOperand (1 ).getReg (), TRI);
538- unpackedInstrs[1 ]->addRegisterKilled (unpackedInstrs[1 ]->getOperand (1 ).getReg (), TRI);
539- }
540- }
541- return ;
542- }
500+ if (Src1RC->getID () == AMDGPU::SGPR_64RegClassID) {
501+ // try with sgpr32
502+ SmallVector<MachineInstr *, 2 > copyInstrs = copyToVregAndInsertMI (I, 4 );
503+ MachineInstr *CopySGPR1 = copyInstrs[0 ];
504+ MachineInstr *CopySGPR2 = copyInstrs[1 ];
543505
544- if (DstRC->getID () == AMDGPU::VReg_512_Align2RegClassID) {
506+ bool isVReg64 = (DstRC->getID () == AMDGPU::VReg_64_Align2RegClassID);
545507 SmallVector<MachineInstr *, 2 > unpackedInstrs = insertUnpackedMI (
546- I, DstMO, SrcMO1, SrcMO2, SrcMO1,
547- SrcMO2, false );
508+ I, DstMO, SrcMO1, CopySGPR1->getOperand (0 ), SrcMO1,
509+ CopySGPR2->getOperand (0 ), isVReg64);
510+ unpackedInstrs[0 ]->addRegisterKilled (unpackedInstrs[0 ]->getOperand (2 ).getReg (), TRI);
511+ unpackedInstrs[1 ]->addRegisterKilled (unpackedInstrs[1 ]->getOperand (2 ).getReg (), TRI);
512+ return ;
548513 }
549- else if (DstRC->getID () == AMDGPU::VReg_64_Align2RegClassID) {
514+ else if (Src0RC->getID () == AMDGPU::SGPR_64RegClassID) {
515+ SmallVector<MachineInstr *, 2 > copyInstrs = copyToVregAndInsertMI (I, 2 );
516+ MachineInstr *CopySGPR1 = copyInstrs[0 ];
517+ MachineInstr *CopySGPR2 = copyInstrs[1 ];
518+
519+ bool isVReg64 = (DstRC->getID () == AMDGPU::VReg_64_Align2RegClassID);
550520 SmallVector<MachineInstr *, 2 > unpackedInstrs = insertUnpackedMI (
551- I, DstMO, SrcMO1, SrcMO2, SrcMO1,
552- SrcMO2, true );
521+ I, DstMO, CopySGPR1->getOperand (0 ), SrcMO2, CopySGPR2->getOperand (0 ), SrcMO2, isVReg64);
522+ unpackedInstrs[0 ]->addRegisterKilled (unpackedInstrs[0 ]->getOperand (1 ).getReg (), TRI);
523+ unpackedInstrs[1 ]->addRegisterKilled (unpackedInstrs[1 ]->getOperand (1 ).getReg (), TRI);
524+ return ;
553525 }
526+
527+ bool isVReg64 = (DstRC->getID () == AMDGPU::VReg_64_Align2RegClassID);
528+ SmallVector<MachineInstr *, 2 > unpackedInstrs = insertUnpackedMI (
529+ I, DstMO, SrcMO1, SrcMO2, SrcMO1,
530+ SrcMO2, isVReg64);
554531 return ;
555532}
556533
0 commit comments