Skip to content

Commit b2436be

Browse files
committed
[RISCV][LLVM] Enable atomics for 'Zalrsc'
The 'A' atomics extension is composed of two subextensions, 'Zaamo' which has atomic memory operation instructions, and 'Zalrsc' which has load-reserve / store-conditional instructions. For machines where 'Zalrsc' is present, but 'Zaamo' is not, implement and enable atomics memory operations through pseudo expansion. Updates the predication and lowering control to be more precise about which 'Zaamo'/'Zalrsc' feature was truly requisite. There will be no functional change to subtargets supporting 'A', while allowing 'Zalrsc' only subtargets to utilize atomics at an increased code footprint.
1 parent ca4df68 commit b2436be

File tree

9 files changed

+12450
-43
lines changed

9 files changed

+12450
-43
lines changed

llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp

Lines changed: 214 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -109,12 +109,70 @@ bool RISCVExpandAtomicPseudo::expandMI(MachineBasicBlock &MBB,
109109
// expanded instructions for each pseudo is correct in the Size field of the
110110
// tablegen definition for the pseudo.
111111
switch (MBBI->getOpcode()) {
112+
case RISCV::PseudoAtomicSwap32:
113+
return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Xchg, false, 32,
114+
NextMBBI);
115+
case RISCV::PseudoAtomicSwap64:
116+
return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Xchg, false, 64,
117+
NextMBBI);
118+
case RISCV::PseudoAtomicLoadAdd32:
119+
return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Add, false, 32,
120+
NextMBBI);
121+
case RISCV::PseudoAtomicLoadAdd64:
122+
return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Add, false, 64,
123+
NextMBBI);
124+
case RISCV::PseudoAtomicLoadSub32:
125+
return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Sub, false, 32,
126+
NextMBBI);
127+
case RISCV::PseudoAtomicLoadSub64:
128+
return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Sub, false, 64,
129+
NextMBBI);
130+
case RISCV::PseudoAtomicLoadAnd32:
131+
return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::And, false, 32,
132+
NextMBBI);
133+
case RISCV::PseudoAtomicLoadAnd64:
134+
return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::And, false, 64,
135+
NextMBBI);
136+
case RISCV::PseudoAtomicLoadOr32:
137+
return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Or, false, 32, NextMBBI);
138+
case RISCV::PseudoAtomicLoadOr64:
139+
return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Or, false, 64, NextMBBI);
140+
case RISCV::PseudoAtomicLoadXor32:
141+
return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Xor, false, 32,
142+
NextMBBI);
143+
case RISCV::PseudoAtomicLoadXor64:
144+
return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Xor, false, 64,
145+
NextMBBI);
112146
case RISCV::PseudoAtomicLoadNand32:
113147
return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Nand, false, 32,
114148
NextMBBI);
115149
case RISCV::PseudoAtomicLoadNand64:
116150
return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Nand, false, 64,
117151
NextMBBI);
152+
case RISCV::PseudoAtomicLoadMin32:
153+
return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::Min, false, 32,
154+
NextMBBI);
155+
case RISCV::PseudoAtomicLoadMin64:
156+
return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::Min, false, 64,
157+
NextMBBI);
158+
case RISCV::PseudoAtomicLoadMax32:
159+
return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::Max, false, 32,
160+
NextMBBI);
161+
case RISCV::PseudoAtomicLoadMax64:
162+
return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::Max, false, 64,
163+
NextMBBI);
164+
case RISCV::PseudoAtomicLoadUMin32:
165+
return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMin, false, 32,
166+
NextMBBI);
167+
case RISCV::PseudoAtomicLoadUMin64:
168+
return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMin, false, 64,
169+
NextMBBI);
170+
case RISCV::PseudoAtomicLoadUMax32:
171+
return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMax, false, 32,
172+
NextMBBI);
173+
case RISCV::PseudoAtomicLoadUMax64:
174+
return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMax, false, 64,
175+
NextMBBI);
118176
case RISCV::PseudoMaskedAtomicSwap32:
119177
return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Xchg, true, 32,
120178
NextMBBI);
@@ -277,6 +335,36 @@ static void doAtomicBinOpExpansion(const RISCVInstrInfo *TII, MachineInstr &MI,
277335
switch (BinOp) {
278336
default:
279337
llvm_unreachable("Unexpected AtomicRMW BinOp");
338+
case AtomicRMWInst::Xchg:
339+
BuildMI(LoopMBB, DL, TII->get(RISCV::ADDI), ScratchReg)
340+
.addReg(IncrReg)
341+
.addImm(0);
342+
break;
343+
case AtomicRMWInst::Add:
344+
BuildMI(LoopMBB, DL, TII->get(RISCV::ADD), ScratchReg)
345+
.addReg(DestReg)
346+
.addReg(IncrReg);
347+
break;
348+
case AtomicRMWInst::Sub:
349+
BuildMI(LoopMBB, DL, TII->get(RISCV::SUB), ScratchReg)
350+
.addReg(DestReg)
351+
.addReg(IncrReg);
352+
break;
353+
case AtomicRMWInst::And:
354+
BuildMI(LoopMBB, DL, TII->get(RISCV::AND), ScratchReg)
355+
.addReg(DestReg)
356+
.addReg(IncrReg);
357+
break;
358+
case AtomicRMWInst::Or:
359+
BuildMI(LoopMBB, DL, TII->get(RISCV::OR), ScratchReg)
360+
.addReg(DestReg)
361+
.addReg(IncrReg);
362+
break;
363+
case AtomicRMWInst::Xor:
364+
BuildMI(LoopMBB, DL, TII->get(RISCV::XOR), ScratchReg)
365+
.addReg(DestReg)
366+
.addReg(IncrReg);
367+
break;
280368
case AtomicRMWInst::Nand:
281369
BuildMI(LoopMBB, DL, TII->get(RISCV::AND), ScratchReg)
282370
.addReg(DestReg)
@@ -433,38 +521,98 @@ static void insertSext(const RISCVInstrInfo *TII, DebugLoc DL,
433521
.addReg(ShamtReg);
434522
}
435523

436-
bool RISCVExpandAtomicPseudo::expandAtomicMinMaxOp(
437-
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
438-
AtomicRMWInst::BinOp BinOp, bool IsMasked, int Width,
439-
MachineBasicBlock::iterator &NextMBBI) {
440-
assert(IsMasked == true &&
441-
"Should only need to expand masked atomic max/min");
442-
assert(Width == 32 && "Should never need to expand masked 64-bit operations");
524+
static void insertZext(const RISCVInstrInfo *TII, DebugLoc DL,
525+
MachineBasicBlock *MBB, Register ValReg, Register SrcReg,
526+
int64_t Shamt) {
527+
BuildMI(MBB, DL, TII->get(RISCV::SLLI), ValReg).addReg(SrcReg).addImm(Shamt);
528+
BuildMI(MBB, DL, TII->get(RISCV::SRLI), ValReg).addReg(ValReg).addImm(Shamt);
529+
}
443530

444-
MachineInstr &MI = *MBBI;
445-
DebugLoc DL = MI.getDebugLoc();
446-
MachineFunction *MF = MBB.getParent();
447-
auto LoopHeadMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
448-
auto LoopIfBodyMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
449-
auto LoopTailMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
450-
auto DoneMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
531+
static void doAtomicMinMaxOpExpansion(
532+
const RISCVInstrInfo *TII, MachineInstr &MI, DebugLoc DL,
533+
MachineBasicBlock *ThisMBB, MachineBasicBlock *LoopHeadMBB,
534+
MachineBasicBlock *LoopIfBodyMBB, MachineBasicBlock *LoopTailMBB,
535+
MachineBasicBlock *DoneMBB, AtomicRMWInst::BinOp BinOp, int Width,
536+
const RISCVSubtarget *STI) {
537+
Register DestReg = MI.getOperand(0).getReg();
538+
Register ScratchReg = MI.getOperand(1).getReg();
539+
Register AddrReg = MI.getOperand(2).getReg();
540+
Register IncrReg = MI.getOperand(3).getReg();
541+
bool IsUnsigned =
542+
BinOp == AtomicRMWInst::UMin || BinOp == AtomicRMWInst::UMax;
543+
bool Zext = IsUnsigned && STI->is64Bit() && Width == 32;
544+
AtomicOrdering Ordering =
545+
static_cast<AtomicOrdering>(MI.getOperand(4).getImm());
451546

452-
// Insert new MBBs.
453-
MF->insert(++MBB.getIterator(), LoopHeadMBB);
454-
MF->insert(++LoopHeadMBB->getIterator(), LoopIfBodyMBB);
455-
MF->insert(++LoopIfBodyMBB->getIterator(), LoopTailMBB);
456-
MF->insert(++LoopTailMBB->getIterator(), DoneMBB);
547+
// .loophead:
548+
// lr.[w|d] dest, (addr)
549+
// mv scratch, dest
550+
// ifnochangeneeded scratch, incr, .looptail
551+
BuildMI(LoopHeadMBB, DL, TII->get(getLRForRMW(Ordering, Width, STI)), DestReg)
552+
.addReg(AddrReg);
553+
if (Zext)
554+
insertZext(TII, DL, LoopHeadMBB, ScratchReg, DestReg, 32);
555+
else
556+
BuildMI(LoopHeadMBB, DL, TII->get(RISCV::ADDI), ScratchReg)
557+
.addReg(DestReg)
558+
.addImm(0);
559+
switch (BinOp) {
560+
default:
561+
llvm_unreachable("Unexpected AtomicRMW BinOp");
562+
case AtomicRMWInst::Max: {
563+
BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BGE))
564+
.addReg(ScratchReg)
565+
.addReg(IncrReg)
566+
.addMBB(LoopTailMBB);
567+
break;
568+
}
569+
case AtomicRMWInst::Min: {
570+
BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BGE))
571+
.addReg(IncrReg)
572+
.addReg(ScratchReg)
573+
.addMBB(LoopTailMBB);
574+
break;
575+
}
576+
case AtomicRMWInst::UMax:
577+
BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BGEU))
578+
.addReg(ScratchReg)
579+
.addReg(IncrReg)
580+
.addMBB(LoopTailMBB);
581+
break;
582+
case AtomicRMWInst::UMin:
583+
BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BGEU))
584+
.addReg(IncrReg)
585+
.addReg(ScratchReg)
586+
.addMBB(LoopTailMBB);
587+
break;
588+
}
457589

458-
// Set up successors and transfer remaining instructions to DoneMBB.
459-
LoopHeadMBB->addSuccessor(LoopIfBodyMBB);
460-
LoopHeadMBB->addSuccessor(LoopTailMBB);
461-
LoopIfBodyMBB->addSuccessor(LoopTailMBB);
462-
LoopTailMBB->addSuccessor(LoopHeadMBB);
463-
LoopTailMBB->addSuccessor(DoneMBB);
464-
DoneMBB->splice(DoneMBB->end(), &MBB, MI, MBB.end());
465-
DoneMBB->transferSuccessors(&MBB);
466-
MBB.addSuccessor(LoopHeadMBB);
590+
// .loopifbody:
591+
// mv scratch, incr
592+
BuildMI(LoopIfBodyMBB, DL, TII->get(RISCV::ADDI), ScratchReg)
593+
.addReg(IncrReg)
594+
.addImm(0);
467595

596+
// .looptail:
597+
// sc.[w|d] scratch, scratch, (addr)
598+
// bnez scratch, loop
599+
BuildMI(LoopTailMBB, DL, TII->get(getSCForRMW(Ordering, Width, STI)),
600+
ScratchReg)
601+
.addReg(ScratchReg)
602+
.addReg(AddrReg);
603+
BuildMI(LoopTailMBB, DL, TII->get(RISCV::BNE))
604+
.addReg(ScratchReg)
605+
.addReg(RISCV::X0)
606+
.addMBB(LoopHeadMBB);
607+
}
608+
609+
static void doMaskedAtomicMinMaxOpExpansion(
610+
const RISCVInstrInfo *TII, MachineInstr &MI, DebugLoc DL,
611+
MachineBasicBlock *ThisMBB, MachineBasicBlock *LoopHeadMBB,
612+
MachineBasicBlock *LoopIfBodyMBB, MachineBasicBlock *LoopTailMBB,
613+
MachineBasicBlock *DoneMBB, AtomicRMWInst::BinOp BinOp, int Width,
614+
const RISCVSubtarget *STI) {
615+
assert(Width == 32 && "Should never need to expand masked 64-bit operations");
468616
Register DestReg = MI.getOperand(0).getReg();
469617
Register Scratch1Reg = MI.getOperand(1).getReg();
470618
Register Scratch2Reg = MI.getOperand(2).getReg();
@@ -541,6 +689,44 @@ bool RISCVExpandAtomicPseudo::expandAtomicMinMaxOp(
541689
.addReg(Scratch1Reg)
542690
.addReg(RISCV::X0)
543691
.addMBB(LoopHeadMBB);
692+
}
693+
694+
bool RISCVExpandAtomicPseudo::expandAtomicMinMaxOp(
695+
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
696+
AtomicRMWInst::BinOp BinOp, bool IsMasked, int Width,
697+
MachineBasicBlock::iterator &NextMBBI) {
698+
699+
MachineInstr &MI = *MBBI;
700+
DebugLoc DL = MI.getDebugLoc();
701+
MachineFunction *MF = MBB.getParent();
702+
auto LoopHeadMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
703+
auto LoopIfBodyMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
704+
auto LoopTailMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
705+
auto DoneMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
706+
707+
// Insert new MBBs.
708+
MF->insert(++MBB.getIterator(), LoopHeadMBB);
709+
MF->insert(++LoopHeadMBB->getIterator(), LoopIfBodyMBB);
710+
MF->insert(++LoopIfBodyMBB->getIterator(), LoopTailMBB);
711+
MF->insert(++LoopTailMBB->getIterator(), DoneMBB);
712+
713+
// Set up successors and transfer remaining instructions to DoneMBB.
714+
LoopHeadMBB->addSuccessor(LoopIfBodyMBB);
715+
LoopHeadMBB->addSuccessor(LoopTailMBB);
716+
LoopIfBodyMBB->addSuccessor(LoopTailMBB);
717+
LoopTailMBB->addSuccessor(LoopHeadMBB);
718+
LoopTailMBB->addSuccessor(DoneMBB);
719+
DoneMBB->splice(DoneMBB->end(), &MBB, MI, MBB.end());
720+
DoneMBB->transferSuccessors(&MBB);
721+
MBB.addSuccessor(LoopHeadMBB);
722+
723+
if (!IsMasked)
724+
doAtomicMinMaxOpExpansion(TII, MI, DL, &MBB, LoopHeadMBB, LoopIfBodyMBB,
725+
LoopTailMBB, DoneMBB, BinOp, Width, STI);
726+
else
727+
doMaskedAtomicMinMaxOpExpansion(TII, MI, DL, &MBB, LoopHeadMBB,
728+
LoopIfBodyMBB, LoopTailMBB, DoneMBB, BinOp,
729+
Width, STI);
544730

545731
NextMBBI = MBB.end();
546732
MI.eraseFromParent();

llvm/lib/Target/RISCV/RISCVFeatures.td

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -218,6 +218,7 @@ def HasStdExtZaamo
218218
: Predicate<"Subtarget->hasStdExtZaamo()">,
219219
AssemblerPredicate<(any_of FeatureStdExtZaamo),
220220
"'Zaamo' (Atomic Memory Operations)">;
221+
def NoStdExtZaamo : Predicate<"!Subtarget->hasStdExtZaamo()">;
221222

222223
def FeatureStdExtZalrsc
223224
: RISCVExtension<1, 0, "Load-Reserved/Store-Conditional">;
@@ -1864,7 +1865,7 @@ def FeatureForcedAtomics : SubtargetFeature<
18641865
"forced-atomics", "HasForcedAtomics", "true",
18651866
"Assume that lock-free native-width atomics are available">;
18661867
def HasAtomicLdSt
1867-
: Predicate<"Subtarget->hasStdExtA() || Subtarget->hasForcedAtomics()">;
1868+
: Predicate<"Subtarget->hasStdExtZalrsc() || Subtarget->hasForcedAtomics()">;
18681869

18691870
def FeatureTaggedGlobals : SubtargetFeature<"tagged-globals",
18701871
"AllowTaggedGlobals",

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -688,7 +688,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
688688
else if (Subtarget.hasStdExtZicbop())
689689
setOperationAction(ISD::PREFETCH, MVT::Other, Legal);
690690

691-
if (Subtarget.hasStdExtA()) {
691+
if (Subtarget.hasStdExtZalrsc()) {
692692
setMaxAtomicSizeInBitsSupported(Subtarget.getXLen());
693693
if (Subtarget.hasStdExtZabha() && Subtarget.hasStdExtZacas())
694694
setMinCmpXchgSizeInBits(8);
@@ -1558,7 +1558,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
15581558
}
15591559
}
15601560

1561-
if (Subtarget.hasStdExtA())
1561+
if (Subtarget.hasStdExtZaamo())
15621562
setOperationAction(ISD::ATOMIC_LOAD_SUB, XLenVT, Expand);
15631563

15641564
if (Subtarget.hasForcedAtomics()) {
@@ -21878,7 +21878,7 @@ unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode(
2187821878
// result is then sign extended to XLEN. With +A, the minimum width is
2187921879
// 32 for both 64 and 32.
2188021880
assert(getMinCmpXchgSizeInBits() == 32);
21881-
assert(Subtarget.hasStdExtA());
21881+
assert(Subtarget.hasStdExtZalrsc());
2188221882
return Op.getValueSizeInBits() - 31;
2188321883
}
2188421884
break;

0 commit comments

Comments
 (0)