Skip to content

Commit b6180fd

Browse files
committed
[RISCV][LLVM] Enable atomics for 'Zalrsc'
The 'A' atomics extension is composed of two subextensions, 'Zaamo' which has atomic memory operation instructions, and 'Zalrsc' which has load-reserve / store-conditional instructions. For machines where 'Zalrsc' is present, but 'Zaamo' is not, implement and enable atomics memory operations through pseudo expansion. Updates the predication and lowering control to be more precise about which 'Zaamo'/'Zalrsc' feature was truly requisite. There will be no functional change to subtargets supporting 'A', while allowing 'Zalrsc' only subtargets to utilize atomics at an increased code footprint.
1 parent 333c758 commit b6180fd

File tree

9 files changed

+12456
-40
lines changed

9 files changed

+12456
-40
lines changed

llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp

Lines changed: 223 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -109,12 +109,72 @@ bool RISCVExpandAtomicPseudo::expandMI(MachineBasicBlock &MBB,
109109
// expanded instructions for each pseudo is correct in the Size field of the
110110
// tablegen definition for the pseudo.
111111
switch (MBBI->getOpcode()) {
112+
case RISCV::PseudoAtomicSwap32:
113+
return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Xchg, false, 32,
114+
NextMBBI);
115+
case RISCV::PseudoAtomicSwap64:
116+
return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Xchg, false, 64,
117+
NextMBBI);
118+
case RISCV::PseudoAtomicLoadAdd32:
119+
return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Add, false, 32,
120+
NextMBBI);
121+
case RISCV::PseudoAtomicLoadAdd64:
122+
return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Add, false, 64,
123+
NextMBBI);
124+
case RISCV::PseudoAtomicLoadSub32:
125+
return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Sub, false, 32,
126+
NextMBBI);
127+
case RISCV::PseudoAtomicLoadSub64:
128+
return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Sub, false, 64,
129+
NextMBBI);
130+
case RISCV::PseudoAtomicLoadAnd32:
131+
return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::And, false, 32,
132+
NextMBBI);
133+
case RISCV::PseudoAtomicLoadAnd64:
134+
return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::And, false, 64,
135+
NextMBBI);
136+
case RISCV::PseudoAtomicLoadOr32:
137+
return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Or, false, 32,
138+
NextMBBI);
139+
case RISCV::PseudoAtomicLoadOr64:
140+
return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Or, false, 64,
141+
NextMBBI);
142+
case RISCV::PseudoAtomicLoadXor32:
143+
return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Xor, false, 32,
144+
NextMBBI);
145+
case RISCV::PseudoAtomicLoadXor64:
146+
return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Xor, false, 64,
147+
NextMBBI);
112148
case RISCV::PseudoAtomicLoadNand32:
113149
return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Nand, false, 32,
114150
NextMBBI);
115151
case RISCV::PseudoAtomicLoadNand64:
116152
return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Nand, false, 64,
117153
NextMBBI);
154+
case RISCV::PseudoAtomicLoadMin32:
155+
return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::Min, false, 32,
156+
NextMBBI);
157+
case RISCV::PseudoAtomicLoadMin64:
158+
return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::Min, false, 64,
159+
NextMBBI);
160+
case RISCV::PseudoAtomicLoadMax32:
161+
return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::Max, false, 32,
162+
NextMBBI);
163+
case RISCV::PseudoAtomicLoadMax64:
164+
return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::Max, false, 64,
165+
NextMBBI);
166+
case RISCV::PseudoAtomicLoadUMin32:
167+
return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMin, false, 32,
168+
NextMBBI);
169+
case RISCV::PseudoAtomicLoadUMin64:
170+
return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMin, false, 64,
171+
NextMBBI);
172+
case RISCV::PseudoAtomicLoadUMax32:
173+
return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMax, false, 32,
174+
NextMBBI);
175+
case RISCV::PseudoAtomicLoadUMax64:
176+
return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMax, false, 64,
177+
NextMBBI);
118178
case RISCV::PseudoMaskedAtomicSwap32:
119179
return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Xchg, true, 32,
120180
NextMBBI);
@@ -277,6 +337,36 @@ static void doAtomicBinOpExpansion(const RISCVInstrInfo *TII, MachineInstr &MI,
277337
switch (BinOp) {
278338
default:
279339
llvm_unreachable("Unexpected AtomicRMW BinOp");
340+
case AtomicRMWInst::Xchg:
341+
BuildMI(LoopMBB, DL, TII->get(RISCV::ADDI), ScratchReg)
342+
.addReg(IncrReg)
343+
.addImm(0);
344+
break;
345+
case AtomicRMWInst::Add:
346+
BuildMI(LoopMBB, DL, TII->get(RISCV::ADD), ScratchReg)
347+
.addReg(DestReg)
348+
.addReg(IncrReg);
349+
break;
350+
case AtomicRMWInst::Sub:
351+
BuildMI(LoopMBB, DL, TII->get(RISCV::SUB), ScratchReg)
352+
.addReg(DestReg)
353+
.addReg(IncrReg);
354+
break;
355+
case AtomicRMWInst::And:
356+
BuildMI(LoopMBB, DL, TII->get(RISCV::AND), ScratchReg)
357+
.addReg(DestReg)
358+
.addReg(IncrReg);
359+
break;
360+
case AtomicRMWInst::Or:
361+
BuildMI(LoopMBB, DL, TII->get(RISCV::OR), ScratchReg)
362+
.addReg(DestReg)
363+
.addReg(IncrReg);
364+
break;
365+
case AtomicRMWInst::Xor:
366+
BuildMI(LoopMBB, DL, TII->get(RISCV::XOR), ScratchReg)
367+
.addReg(DestReg)
368+
.addReg(IncrReg);
369+
break;
280370
case AtomicRMWInst::Nand:
281371
BuildMI(LoopMBB, DL, TII->get(RISCV::AND), ScratchReg)
282372
.addReg(DestReg)
@@ -433,38 +523,105 @@ static void insertSext(const RISCVInstrInfo *TII, DebugLoc DL,
433523
.addReg(ShamtReg);
434524
}
435525

436-
bool RISCVExpandAtomicPseudo::expandAtomicMinMaxOp(
437-
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
438-
AtomicRMWInst::BinOp BinOp, bool IsMasked, int Width,
439-
MachineBasicBlock::iterator &NextMBBI) {
440-
assert(IsMasked == true &&
441-
"Should only need to expand masked atomic max/min");
442-
assert(Width == 32 && "Should never need to expand masked 64-bit operations");
526+
static void insertZext(const RISCVInstrInfo *TII, DebugLoc DL,
527+
MachineBasicBlock *MBB, Register ValReg,
528+
Register SrcReg, int64_t Shamt) {
529+
BuildMI(MBB, DL, TII->get(RISCV::SLLI), ValReg)
530+
.addReg(SrcReg)
531+
.addImm(Shamt);
532+
BuildMI(MBB, DL, TII->get(RISCV::SRLI), ValReg)
533+
.addReg(ValReg)
534+
.addImm(Shamt);
535+
}
443536

444-
MachineInstr &MI = *MBBI;
445-
DebugLoc DL = MI.getDebugLoc();
446-
MachineFunction *MF = MBB.getParent();
447-
auto LoopHeadMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
448-
auto LoopIfBodyMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
449-
auto LoopTailMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
450-
auto DoneMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
537+
static void doAtomicMinMaxOpExpansion(const RISCVInstrInfo *TII, MachineInstr &MI,
538+
DebugLoc DL, MachineBasicBlock *ThisMBB,
539+
MachineBasicBlock *LoopHeadMBB,
540+
MachineBasicBlock *LoopIfBodyMBB,
541+
MachineBasicBlock *LoopTailMBB,
542+
MachineBasicBlock *DoneMBB,
543+
AtomicRMWInst::BinOp BinOp, int Width,
544+
const RISCVSubtarget *STI) {
545+
Register DestReg = MI.getOperand(0).getReg();
546+
Register ScratchReg = MI.getOperand(1).getReg();
547+
Register AddrReg = MI.getOperand(2).getReg();
548+
Register IncrReg = MI.getOperand(3).getReg();
549+
bool IsUnsigned = BinOp == AtomicRMWInst::UMin ||
550+
BinOp == AtomicRMWInst::UMax;
551+
bool Zext = IsUnsigned && STI->is64Bit() && Width == 32;
552+
AtomicOrdering Ordering =
553+
static_cast<AtomicOrdering>(MI.getOperand(4).getImm());
451554

452-
// Insert new MBBs.
453-
MF->insert(++MBB.getIterator(), LoopHeadMBB);
454-
MF->insert(++LoopHeadMBB->getIterator(), LoopIfBodyMBB);
455-
MF->insert(++LoopIfBodyMBB->getIterator(), LoopTailMBB);
456-
MF->insert(++LoopTailMBB->getIterator(), DoneMBB);
555+
// .loophead:
556+
// lr.[w|d] dest, (addr)
557+
// mv scratch, dest
558+
// ifnochangeneeded scratch, incr, .looptail
559+
BuildMI(LoopHeadMBB, DL, TII->get(getLRForRMW(Ordering, Width, STI)), DestReg)
560+
.addReg(AddrReg);
561+
if (Zext)
562+
insertZext(TII, DL, LoopHeadMBB, ScratchReg, DestReg, 32);
563+
else
564+
BuildMI(LoopHeadMBB, DL, TII->get(RISCV::ADDI), ScratchReg)
565+
.addReg(DestReg)
566+
.addImm(0);
567+
switch (BinOp) {
568+
default:
569+
llvm_unreachable("Unexpected AtomicRMW BinOp");
570+
case AtomicRMWInst::Max: {
571+
BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BGE))
572+
.addReg(ScratchReg)
573+
.addReg(IncrReg)
574+
.addMBB(LoopTailMBB);
575+
break;
576+
}
577+
case AtomicRMWInst::Min: {
578+
BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BGE))
579+
.addReg(IncrReg)
580+
.addReg(ScratchReg)
581+
.addMBB(LoopTailMBB);
582+
break;
583+
}
584+
case AtomicRMWInst::UMax:
585+
BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BGEU))
586+
.addReg(ScratchReg)
587+
.addReg(IncrReg)
588+
.addMBB(LoopTailMBB);
589+
break;
590+
case AtomicRMWInst::UMin:
591+
BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BGEU))
592+
.addReg(IncrReg)
593+
.addReg(ScratchReg)
594+
.addMBB(LoopTailMBB);
595+
break;
596+
}
457597

458-
// Set up successors and transfer remaining instructions to DoneMBB.
459-
LoopHeadMBB->addSuccessor(LoopIfBodyMBB);
460-
LoopHeadMBB->addSuccessor(LoopTailMBB);
461-
LoopIfBodyMBB->addSuccessor(LoopTailMBB);
462-
LoopTailMBB->addSuccessor(LoopHeadMBB);
463-
LoopTailMBB->addSuccessor(DoneMBB);
464-
DoneMBB->splice(DoneMBB->end(), &MBB, MI, MBB.end());
465-
DoneMBB->transferSuccessors(&MBB);
466-
MBB.addSuccessor(LoopHeadMBB);
598+
// .loopifbody:
599+
// mv scratch, incr
600+
BuildMI(LoopIfBodyMBB, DL, TII->get(RISCV::ADDI), ScratchReg)
601+
.addReg(IncrReg)
602+
.addImm(0);
467603

604+
// .looptail:
605+
// sc.[w|d] scratch, scratch, (addr)
606+
// bnez scratch, loop
607+
BuildMI(LoopTailMBB, DL, TII->get(getSCForRMW(Ordering, Width, STI)), ScratchReg)
608+
.addReg(ScratchReg)
609+
.addReg(AddrReg);
610+
BuildMI(LoopTailMBB, DL, TII->get(RISCV::BNE))
611+
.addReg(ScratchReg)
612+
.addReg(RISCV::X0)
613+
.addMBB(LoopHeadMBB);
614+
}
615+
616+
static void doMaskedAtomicMinMaxOpExpansion(const RISCVInstrInfo *TII, MachineInstr &MI,
617+
DebugLoc DL, MachineBasicBlock *ThisMBB,
618+
MachineBasicBlock *LoopHeadMBB,
619+
MachineBasicBlock *LoopIfBodyMBB,
620+
MachineBasicBlock *LoopTailMBB,
621+
MachineBasicBlock *DoneMBB,
622+
AtomicRMWInst::BinOp BinOp, int Width,
623+
const RISCVSubtarget *STI) {
624+
assert(Width == 32 && "Should never need to expand masked 64-bit operations");
468625
Register DestReg = MI.getOperand(0).getReg();
469626
Register Scratch1Reg = MI.getOperand(1).getReg();
470627
Register Scratch2Reg = MI.getOperand(2).getReg();
@@ -541,6 +698,44 @@ bool RISCVExpandAtomicPseudo::expandAtomicMinMaxOp(
541698
.addReg(Scratch1Reg)
542699
.addReg(RISCV::X0)
543700
.addMBB(LoopHeadMBB);
701+
}
702+
703+
bool RISCVExpandAtomicPseudo::expandAtomicMinMaxOp(
704+
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
705+
AtomicRMWInst::BinOp BinOp, bool IsMasked, int Width,
706+
MachineBasicBlock::iterator &NextMBBI) {
707+
708+
MachineInstr &MI = *MBBI;
709+
DebugLoc DL = MI.getDebugLoc();
710+
MachineFunction *MF = MBB.getParent();
711+
auto LoopHeadMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
712+
auto LoopIfBodyMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
713+
auto LoopTailMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
714+
auto DoneMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
715+
716+
// Insert new MBBs.
717+
MF->insert(++MBB.getIterator(), LoopHeadMBB);
718+
MF->insert(++LoopHeadMBB->getIterator(), LoopIfBodyMBB);
719+
MF->insert(++LoopIfBodyMBB->getIterator(), LoopTailMBB);
720+
MF->insert(++LoopTailMBB->getIterator(), DoneMBB);
721+
722+
// Set up successors and transfer remaining instructions to DoneMBB.
723+
LoopHeadMBB->addSuccessor(LoopIfBodyMBB);
724+
LoopHeadMBB->addSuccessor(LoopTailMBB);
725+
LoopIfBodyMBB->addSuccessor(LoopTailMBB);
726+
LoopTailMBB->addSuccessor(LoopHeadMBB);
727+
LoopTailMBB->addSuccessor(DoneMBB);
728+
DoneMBB->splice(DoneMBB->end(), &MBB, MI, MBB.end());
729+
DoneMBB->transferSuccessors(&MBB);
730+
MBB.addSuccessor(LoopHeadMBB);
731+
732+
if (!IsMasked)
733+
doAtomicMinMaxOpExpansion(TII, MI, DL, &MBB, LoopHeadMBB, LoopIfBodyMBB,
734+
LoopTailMBB, DoneMBB, BinOp, Width, STI);
735+
else
736+
doMaskedAtomicMinMaxOpExpansion(TII, MI, DL, &MBB, LoopHeadMBB,
737+
LoopIfBodyMBB, LoopTailMBB, DoneMBB, BinOp,
738+
Width, STI);
544739

545740
NextMBBI = MBB.end();
546741
MI.eraseFromParent();

llvm/lib/Target/RISCV/RISCVFeatures.td

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -218,6 +218,7 @@ def HasStdExtZaamo
218218
: Predicate<"Subtarget->hasStdExtZaamo()">,
219219
AssemblerPredicate<(any_of FeatureStdExtZaamo),
220220
"'Zaamo' (Atomic Memory Operations)">;
221+
def NoStdExtZaamo : Predicate<"!Subtarget->hasStdExtZaamo()">;
221222

222223
def FeatureStdExtZalrsc
223224
: RISCVExtension<1, 0, "Load-Reserved/Store-Conditional">;
@@ -1861,7 +1862,7 @@ def FeatureForcedAtomics : SubtargetFeature<
18611862
"forced-atomics", "HasForcedAtomics", "true",
18621863
"Assume that lock-free native-width atomics are available">;
18631864
def HasAtomicLdSt
1864-
: Predicate<"Subtarget->hasStdExtA() || Subtarget->hasForcedAtomics()">;
1865+
: Predicate<"Subtarget->hasStdExtZalrsc() || Subtarget->hasForcedAtomics()">;
18651866

18661867
def FeatureTaggedGlobals : SubtargetFeature<"tagged-globals",
18671868
"AllowTaggedGlobals",

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -688,7 +688,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
688688
else if (Subtarget.hasStdExtZicbop())
689689
setOperationAction(ISD::PREFETCH, MVT::Other, Legal);
690690

691-
if (Subtarget.hasStdExtA()) {
691+
if (Subtarget.hasStdExtZalrsc()) {
692692
setMaxAtomicSizeInBitsSupported(Subtarget.getXLen());
693693
if (Subtarget.hasStdExtZabha() && Subtarget.hasStdExtZacas())
694694
setMinCmpXchgSizeInBits(8);
@@ -1558,7 +1558,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
15581558
}
15591559
}
15601560

1561-
if (Subtarget.hasStdExtA())
1561+
if (Subtarget.hasStdExtZaamo())
15621562
setOperationAction(ISD::ATOMIC_LOAD_SUB, XLenVT, Expand);
15631563

15641564
if (Subtarget.hasForcedAtomics()) {
@@ -21876,7 +21876,7 @@ unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode(
2187621876
// result is then sign extended to XLEN. With +A, the minimum width is
2187721877
// 32 for both 64 and 32.
2187821878
assert(getMinCmpXchgSizeInBits() == 32);
21879-
assert(Subtarget.hasStdExtA());
21879+
assert(Subtarget.hasStdExtZalrsc());
2188021880
return Op.getValueSizeInBits() - 31;
2188121881
}
2188221882
break;

0 commit comments

Comments
 (0)