@@ -109,12 +109,70 @@ bool RISCVExpandAtomicPseudo::expandMI(MachineBasicBlock &MBB,
109109 // expanded instructions for each pseudo is correct in the Size field of the
110110 // tablegen definition for the pseudo.
111111 switch (MBBI->getOpcode ()) {
112+ case RISCV::PseudoAtomicSwap32:
113+ return expandAtomicBinOp (MBB, MBBI, AtomicRMWInst::Xchg, false , 32 ,
114+ NextMBBI);
115+ case RISCV::PseudoAtomicSwap64:
116+ return expandAtomicBinOp (MBB, MBBI, AtomicRMWInst::Xchg, false , 64 ,
117+ NextMBBI);
118+ case RISCV::PseudoAtomicLoadAdd32:
119+ return expandAtomicBinOp (MBB, MBBI, AtomicRMWInst::Add, false , 32 ,
120+ NextMBBI);
121+ case RISCV::PseudoAtomicLoadAdd64:
122+ return expandAtomicBinOp (MBB, MBBI, AtomicRMWInst::Add, false , 64 ,
123+ NextMBBI);
124+ case RISCV::PseudoAtomicLoadSub32:
125+ return expandAtomicBinOp (MBB, MBBI, AtomicRMWInst::Sub, false , 32 ,
126+ NextMBBI);
127+ case RISCV::PseudoAtomicLoadSub64:
128+ return expandAtomicBinOp (MBB, MBBI, AtomicRMWInst::Sub, false , 64 ,
129+ NextMBBI);
130+ case RISCV::PseudoAtomicLoadAnd32:
131+ return expandAtomicBinOp (MBB, MBBI, AtomicRMWInst::And, false , 32 ,
132+ NextMBBI);
133+ case RISCV::PseudoAtomicLoadAnd64:
134+ return expandAtomicBinOp (MBB, MBBI, AtomicRMWInst::And, false , 64 ,
135+ NextMBBI);
136+ case RISCV::PseudoAtomicLoadOr32:
137+ return expandAtomicBinOp (MBB, MBBI, AtomicRMWInst::Or, false , 32 , NextMBBI);
138+ case RISCV::PseudoAtomicLoadOr64:
139+ return expandAtomicBinOp (MBB, MBBI, AtomicRMWInst::Or, false , 64 , NextMBBI);
140+ case RISCV::PseudoAtomicLoadXor32:
141+ return expandAtomicBinOp (MBB, MBBI, AtomicRMWInst::Xor, false , 32 ,
142+ NextMBBI);
143+ case RISCV::PseudoAtomicLoadXor64:
144+ return expandAtomicBinOp (MBB, MBBI, AtomicRMWInst::Xor, false , 64 ,
145+ NextMBBI);
112146 case RISCV::PseudoAtomicLoadNand32:
113147 return expandAtomicBinOp (MBB, MBBI, AtomicRMWInst::Nand, false , 32 ,
114148 NextMBBI);
115149 case RISCV::PseudoAtomicLoadNand64:
116150 return expandAtomicBinOp (MBB, MBBI, AtomicRMWInst::Nand, false , 64 ,
117151 NextMBBI);
152+ case RISCV::PseudoAtomicLoadMin32:
153+ return expandAtomicMinMaxOp (MBB, MBBI, AtomicRMWInst::Min, false , 32 ,
154+ NextMBBI);
155+ case RISCV::PseudoAtomicLoadMin64:
156+ return expandAtomicMinMaxOp (MBB, MBBI, AtomicRMWInst::Min, false , 64 ,
157+ NextMBBI);
158+ case RISCV::PseudoAtomicLoadMax32:
159+ return expandAtomicMinMaxOp (MBB, MBBI, AtomicRMWInst::Max, false , 32 ,
160+ NextMBBI);
161+ case RISCV::PseudoAtomicLoadMax64:
162+ return expandAtomicMinMaxOp (MBB, MBBI, AtomicRMWInst::Max, false , 64 ,
163+ NextMBBI);
164+ case RISCV::PseudoAtomicLoadUMin32:
165+ return expandAtomicMinMaxOp (MBB, MBBI, AtomicRMWInst::UMin, false , 32 ,
166+ NextMBBI);
167+ case RISCV::PseudoAtomicLoadUMin64:
168+ return expandAtomicMinMaxOp (MBB, MBBI, AtomicRMWInst::UMin, false , 64 ,
169+ NextMBBI);
170+ case RISCV::PseudoAtomicLoadUMax32:
171+ return expandAtomicMinMaxOp (MBB, MBBI, AtomicRMWInst::UMax, false , 32 ,
172+ NextMBBI);
173+ case RISCV::PseudoAtomicLoadUMax64:
174+ return expandAtomicMinMaxOp (MBB, MBBI, AtomicRMWInst::UMax, false , 64 ,
175+ NextMBBI);
118176 case RISCV::PseudoMaskedAtomicSwap32:
119177 return expandAtomicBinOp (MBB, MBBI, AtomicRMWInst::Xchg, true , 32 ,
120178 NextMBBI);
@@ -277,6 +335,36 @@ static void doAtomicBinOpExpansion(const RISCVInstrInfo *TII, MachineInstr &MI,
277335 switch (BinOp) {
278336 default :
279337 llvm_unreachable (" Unexpected AtomicRMW BinOp" );
338+ case AtomicRMWInst::Xchg:
339+ BuildMI (LoopMBB, DL, TII->get (RISCV::ADDI), ScratchReg)
340+ .addReg (IncrReg)
341+ .addImm (0 );
342+ break ;
343+ case AtomicRMWInst::Add:
344+ BuildMI (LoopMBB, DL, TII->get (RISCV::ADD), ScratchReg)
345+ .addReg (DestReg)
346+ .addReg (IncrReg);
347+ break ;
348+ case AtomicRMWInst::Sub:
349+ BuildMI (LoopMBB, DL, TII->get (RISCV::SUB), ScratchReg)
350+ .addReg (DestReg)
351+ .addReg (IncrReg);
352+ break ;
353+ case AtomicRMWInst::And:
354+ BuildMI (LoopMBB, DL, TII->get (RISCV::AND), ScratchReg)
355+ .addReg (DestReg)
356+ .addReg (IncrReg);
357+ break ;
358+ case AtomicRMWInst::Or:
359+ BuildMI (LoopMBB, DL, TII->get (RISCV::OR), ScratchReg)
360+ .addReg (DestReg)
361+ .addReg (IncrReg);
362+ break ;
363+ case AtomicRMWInst::Xor:
364+ BuildMI (LoopMBB, DL, TII->get (RISCV::XOR), ScratchReg)
365+ .addReg (DestReg)
366+ .addReg (IncrReg);
367+ break ;
280368 case AtomicRMWInst::Nand:
281369 BuildMI (LoopMBB, DL, TII->get (RISCV::AND), ScratchReg)
282370 .addReg (DestReg)
@@ -433,38 +521,98 @@ static void insertSext(const RISCVInstrInfo *TII, DebugLoc DL,
433521 .addReg (ShamtReg);
434522}
435523
436- bool RISCVExpandAtomicPseudo::expandAtomicMinMaxOp (
437- MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
438- AtomicRMWInst::BinOp BinOp, bool IsMasked, int Width,
439- MachineBasicBlock::iterator &NextMBBI) {
440- assert (IsMasked == true &&
441- " Should only need to expand masked atomic max/min" );
442- assert (Width == 32 && " Should never need to expand masked 64-bit operations" );
524+ static void insertZext (const RISCVInstrInfo *TII, DebugLoc DL,
525+ MachineBasicBlock *MBB, Register ValReg, Register SrcReg,
526+ int64_t Shamt) {
527+ BuildMI (MBB, DL, TII->get (RISCV::SLLI), ValReg).addReg (SrcReg).addImm (Shamt);
528+ BuildMI (MBB, DL, TII->get (RISCV::SRLI), ValReg).addReg (ValReg).addImm (Shamt);
529+ }
443530
444- MachineInstr &MI = *MBBI;
445- DebugLoc DL = MI.getDebugLoc ();
446- MachineFunction *MF = MBB.getParent ();
447- auto LoopHeadMBB = MF->CreateMachineBasicBlock (MBB.getBasicBlock ());
448- auto LoopIfBodyMBB = MF->CreateMachineBasicBlock (MBB.getBasicBlock ());
449- auto LoopTailMBB = MF->CreateMachineBasicBlock (MBB.getBasicBlock ());
450- auto DoneMBB = MF->CreateMachineBasicBlock (MBB.getBasicBlock ());
531+ static void doAtomicMinMaxOpExpansion (
532+ const RISCVInstrInfo *TII, MachineInstr &MI, DebugLoc DL,
533+ MachineBasicBlock *ThisMBB, MachineBasicBlock *LoopHeadMBB,
534+ MachineBasicBlock *LoopIfBodyMBB, MachineBasicBlock *LoopTailMBB,
535+ MachineBasicBlock *DoneMBB, AtomicRMWInst::BinOp BinOp, int Width,
536+ const RISCVSubtarget *STI) {
537+ Register DestReg = MI.getOperand (0 ).getReg ();
538+ Register ScratchReg = MI.getOperand (1 ).getReg ();
539+ Register AddrReg = MI.getOperand (2 ).getReg ();
540+ Register IncrReg = MI.getOperand (3 ).getReg ();
541+ bool IsUnsigned =
542+ BinOp == AtomicRMWInst::UMin || BinOp == AtomicRMWInst::UMax;
543+ bool Zext = IsUnsigned && STI->is64Bit () && Width == 32 ;
544+ AtomicOrdering Ordering =
545+ static_cast <AtomicOrdering>(MI.getOperand (4 ).getImm ());
451546
452- // Insert new MBBs.
453- MF->insert (++MBB.getIterator (), LoopHeadMBB);
454- MF->insert (++LoopHeadMBB->getIterator (), LoopIfBodyMBB);
455- MF->insert (++LoopIfBodyMBB->getIterator (), LoopTailMBB);
456- MF->insert (++LoopTailMBB->getIterator (), DoneMBB);
547+ // .loophead:
548+ // lr.[w|d] dest, (addr)
549+ // mv scratch, dest
550+ // ifnochangeneeded scratch, incr, .looptail
551+ BuildMI (LoopHeadMBB, DL, TII->get (getLRForRMW (Ordering, Width, STI)), DestReg)
552+ .addReg (AddrReg);
553+ if (Zext)
554+ insertZext (TII, DL, LoopHeadMBB, ScratchReg, DestReg, 32 );
555+ else
556+ BuildMI (LoopHeadMBB, DL, TII->get (RISCV::ADDI), ScratchReg)
557+ .addReg (DestReg)
558+ .addImm (0 );
559+ switch (BinOp) {
560+ default :
561+ llvm_unreachable (" Unexpected AtomicRMW BinOp" );
562+ case AtomicRMWInst::Max: {
563+ BuildMI (LoopHeadMBB, DL, TII->get (RISCV::BGE))
564+ .addReg (ScratchReg)
565+ .addReg (IncrReg)
566+ .addMBB (LoopTailMBB);
567+ break ;
568+ }
569+ case AtomicRMWInst::Min: {
570+ BuildMI (LoopHeadMBB, DL, TII->get (RISCV::BGE))
571+ .addReg (IncrReg)
572+ .addReg (ScratchReg)
573+ .addMBB (LoopTailMBB);
574+ break ;
575+ }
576+ case AtomicRMWInst::UMax:
577+ BuildMI (LoopHeadMBB, DL, TII->get (RISCV::BGEU))
578+ .addReg (ScratchReg)
579+ .addReg (IncrReg)
580+ .addMBB (LoopTailMBB);
581+ break ;
582+ case AtomicRMWInst::UMin:
583+ BuildMI (LoopHeadMBB, DL, TII->get (RISCV::BGEU))
584+ .addReg (IncrReg)
585+ .addReg (ScratchReg)
586+ .addMBB (LoopTailMBB);
587+ break ;
588+ }
457589
458- // Set up successors and transfer remaining instructions to DoneMBB.
459- LoopHeadMBB->addSuccessor (LoopIfBodyMBB);
460- LoopHeadMBB->addSuccessor (LoopTailMBB);
461- LoopIfBodyMBB->addSuccessor (LoopTailMBB);
462- LoopTailMBB->addSuccessor (LoopHeadMBB);
463- LoopTailMBB->addSuccessor (DoneMBB);
464- DoneMBB->splice (DoneMBB->end (), &MBB, MI, MBB.end ());
465- DoneMBB->transferSuccessors (&MBB);
466- MBB.addSuccessor (LoopHeadMBB);
590+ // .loopifbody:
591+ // mv scratch, incr
592+ BuildMI (LoopIfBodyMBB, DL, TII->get (RISCV::ADDI), ScratchReg)
593+ .addReg (IncrReg)
594+ .addImm (0 );
467595
596+ // .looptail:
597+ // sc.[w|d] scratch, scratch, (addr)
598+ // bnez scratch, loop
599+ BuildMI (LoopTailMBB, DL, TII->get (getSCForRMW (Ordering, Width, STI)),
600+ ScratchReg)
601+ .addReg (ScratchReg)
602+ .addReg (AddrReg);
603+ BuildMI (LoopTailMBB, DL, TII->get (RISCV::BNE))
604+ .addReg (ScratchReg)
605+ .addReg (RISCV::X0)
606+ .addMBB (LoopHeadMBB);
607+ }
608+
609+ static void doMaskedAtomicMinMaxOpExpansion (
610+ const RISCVInstrInfo *TII, MachineInstr &MI, DebugLoc DL,
611+ MachineBasicBlock *ThisMBB, MachineBasicBlock *LoopHeadMBB,
612+ MachineBasicBlock *LoopIfBodyMBB, MachineBasicBlock *LoopTailMBB,
613+ MachineBasicBlock *DoneMBB, AtomicRMWInst::BinOp BinOp, int Width,
614+ const RISCVSubtarget *STI) {
615+ assert (Width == 32 && " Should never need to expand masked 64-bit operations" );
468616 Register DestReg = MI.getOperand (0 ).getReg ();
469617 Register Scratch1Reg = MI.getOperand (1 ).getReg ();
470618 Register Scratch2Reg = MI.getOperand (2 ).getReg ();
@@ -541,6 +689,44 @@ bool RISCVExpandAtomicPseudo::expandAtomicMinMaxOp(
541689 .addReg (Scratch1Reg)
542690 .addReg (RISCV::X0)
543691 .addMBB (LoopHeadMBB);
692+ }
693+
694+ bool RISCVExpandAtomicPseudo::expandAtomicMinMaxOp (
695+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
696+ AtomicRMWInst::BinOp BinOp, bool IsMasked, int Width,
697+ MachineBasicBlock::iterator &NextMBBI) {
698+
699+ MachineInstr &MI = *MBBI;
700+ DebugLoc DL = MI.getDebugLoc ();
701+ MachineFunction *MF = MBB.getParent ();
702+ auto LoopHeadMBB = MF->CreateMachineBasicBlock (MBB.getBasicBlock ());
703+ auto LoopIfBodyMBB = MF->CreateMachineBasicBlock (MBB.getBasicBlock ());
704+ auto LoopTailMBB = MF->CreateMachineBasicBlock (MBB.getBasicBlock ());
705+ auto DoneMBB = MF->CreateMachineBasicBlock (MBB.getBasicBlock ());
706+
707+ // Insert new MBBs.
708+ MF->insert (++MBB.getIterator (), LoopHeadMBB);
709+ MF->insert (++LoopHeadMBB->getIterator (), LoopIfBodyMBB);
710+ MF->insert (++LoopIfBodyMBB->getIterator (), LoopTailMBB);
711+ MF->insert (++LoopTailMBB->getIterator (), DoneMBB);
712+
713+ // Set up successors and transfer remaining instructions to DoneMBB.
714+ LoopHeadMBB->addSuccessor (LoopIfBodyMBB);
715+ LoopHeadMBB->addSuccessor (LoopTailMBB);
716+ LoopIfBodyMBB->addSuccessor (LoopTailMBB);
717+ LoopTailMBB->addSuccessor (LoopHeadMBB);
718+ LoopTailMBB->addSuccessor (DoneMBB);
719+ DoneMBB->splice (DoneMBB->end (), &MBB, MI, MBB.end ());
720+ DoneMBB->transferSuccessors (&MBB);
721+ MBB.addSuccessor (LoopHeadMBB);
722+
723+ if (!IsMasked)
724+ doAtomicMinMaxOpExpansion (TII, MI, DL, &MBB, LoopHeadMBB, LoopIfBodyMBB,
725+ LoopTailMBB, DoneMBB, BinOp, Width, STI);
726+ else
727+ doMaskedAtomicMinMaxOpExpansion (TII, MI, DL, &MBB, LoopHeadMBB,
728+ LoopIfBodyMBB, LoopTailMBB, DoneMBB, BinOp,
729+ Width, STI);
544730
545731 NextMBBI = MBB.end ();
546732 MI.eraseFromParent ();
0 commit comments