Skip to content

Commit 40611f9

Browse files
arsenmhuangkaiyao
authored andcommitted
AArch64: Use consistent atomicrmw expansion for FP operations (#103702)
Use LLSC or cmpxchg in the same cases as for the unsupported integer operations. This required some fixups to the LLSC implementatation to deal with the fp128 case. The comment about floating-point exceptions was wrong, because floating-point exceptions are not really exceptions at all.
1 parent 76aea28 commit 40611f9

File tree

5 files changed

+376
-1055
lines changed

5 files changed

+376
-1055
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 42 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -26565,21 +26565,37 @@ AArch64TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
2656526565
: AtomicExpansionKind::LLSC;
2656626566
}
2656726567

26568+
// Return true if the atomic operation expansion will lower to use a library
26569+
// call, and is thus ineligible to use an LLSC expansion.
26570+
static bool rmwOpMayLowerToLibcall(const AArch64Subtarget &Subtarget,
26571+
const AtomicRMWInst *RMW) {
26572+
if (!RMW->isFloatingPointOperation())
26573+
return false;
26574+
switch (RMW->getType()->getScalarType()->getTypeID()) {
26575+
case Type::FloatTyID:
26576+
case Type::DoubleTyID:
26577+
case Type::HalfTyID:
26578+
case Type::BFloatTyID:
26579+
// Will use soft float
26580+
return !Subtarget.hasFPARMv8();
26581+
default:
26582+
// fp128 will emit library calls.
26583+
return true;
26584+
}
26585+
26586+
llvm_unreachable("covered type switch");
26587+
}
26588+
2656826589
// The "default" for integer RMW operations is to expand to an LL/SC loop.
2656926590
// However, with the LSE instructions (or outline-atomics mode, which provides
2657026591
// library routines in place of the LSE-instructions), we can directly emit many
2657126592
// operations instead.
26572-
//
26573-
// Floating-point operations are always emitted to a cmpxchg loop, because they
26574-
// may trigger a trap which aborts an LLSC sequence.
2657526593
TargetLowering::AtomicExpansionKind
2657626594
AArch64TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
26577-
unsigned Size = AI->getType()->getPrimitiveSizeInBits();
26595+
Type *Ty = AI->getType();
26596+
unsigned Size = Ty->getPrimitiveSizeInBits();
2657826597
assert(Size <= 128 && "AtomicExpandPass should've handled larger sizes.");
2657926598

26580-
if (AI->isFloatingPointOperation())
26581-
return AtomicExpansionKind::CmpXChg;
26582-
2658326599
bool CanUseLSE128 = Subtarget->hasLSE128() && Size == 128 &&
2658426600
(AI->getOperation() == AtomicRMWInst::Xchg ||
2658526601
AI->getOperation() == AtomicRMWInst::Or ||
@@ -26589,7 +26605,8 @@ AArch64TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
2658926605

2659026606
// Nand is not supported in LSE.
2659126607
// Leave 128 bits to LLSC or CmpXChg.
26592-
if (AI->getOperation() != AtomicRMWInst::Nand && Size < 128) {
26608+
if (AI->getOperation() != AtomicRMWInst::Nand && Size < 128 &&
26609+
!AI->isFloatingPointOperation()) {
2659326610
if (Subtarget->hasLSE())
2659426611
return AtomicExpansionKind::None;
2659526612
if (Subtarget->outlineAtomics()) {
@@ -26615,7 +26632,7 @@ AArch64TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
2661526632
// succeed. So at -O0 lower this operation to a CAS loop. Also worthwhile if
2661626633
// we have a single CAS instruction that can replace the loop.
2661726634
if (getTargetMachine().getOptLevel() == CodeGenOptLevel::None ||
26618-
Subtarget->hasLSE())
26635+
Subtarget->hasLSE() || rmwOpMayLowerToLibcall(*Subtarget, AI))
2661926636
return AtomicExpansionKind::CmpXChg;
2662026637

2662126638
return AtomicExpansionKind::LLSC;
@@ -26662,10 +26679,14 @@ Value *AArch64TargetLowering::emitLoadLinked(IRBuilderBase &Builder,
2666226679

2666326680
Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
2666426681
Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
26665-
Lo = Builder.CreateZExt(Lo, ValueTy, "lo64");
26666-
Hi = Builder.CreateZExt(Hi, ValueTy, "hi64");
26667-
return Builder.CreateOr(
26668-
Lo, Builder.CreateShl(Hi, ConstantInt::get(ValueTy, 64)), "val64");
26682+
26683+
auto *Int128Ty = Type::getInt128Ty(Builder.getContext());
26684+
Lo = Builder.CreateZExt(Lo, Int128Ty, "lo64");
26685+
Hi = Builder.CreateZExt(Hi, Int128Ty, "hi64");
26686+
26687+
Value *Or = Builder.CreateOr(
26688+
Lo, Builder.CreateShl(Hi, ConstantInt::get(Int128Ty, 64)), "val64");
26689+
return Builder.CreateBitCast(Or, ValueTy);
2666926690
}
2667026691

2667126692
Type *Tys[] = { Addr->getType() };
@@ -26676,8 +26697,8 @@ Value *AArch64TargetLowering::emitLoadLinked(IRBuilderBase &Builder,
2667626697
const DataLayout &DL = M->getDataLayout();
2667726698
IntegerType *IntEltTy = Builder.getIntNTy(DL.getTypeSizeInBits(ValueTy));
2667826699
CallInst *CI = Builder.CreateCall(Ldxr, Addr);
26679-
CI->addParamAttr(
26680-
0, Attribute::get(Builder.getContext(), Attribute::ElementType, ValueTy));
26700+
CI->addParamAttr(0, Attribute::get(Builder.getContext(),
26701+
Attribute::ElementType, IntEltTy));
2668126702
Value *Trunc = Builder.CreateTrunc(CI, IntEltTy);
2668226703

2668326704
return Builder.CreateBitCast(Trunc, ValueTy);
@@ -26703,9 +26724,13 @@ Value *AArch64TargetLowering::emitStoreConditional(IRBuilderBase &Builder,
2670326724
IsRelease ? Intrinsic::aarch64_stlxp : Intrinsic::aarch64_stxp;
2670426725
Function *Stxr = Intrinsic::getDeclaration(M, Int);
2670526726
Type *Int64Ty = Type::getInt64Ty(M->getContext());
26727+
Type *Int128Ty = Type::getInt128Ty(M->getContext());
26728+
26729+
Value *CastVal = Builder.CreateBitCast(Val, Int128Ty);
2670626730

26707-
Value *Lo = Builder.CreateTrunc(Val, Int64Ty, "lo");
26708-
Value *Hi = Builder.CreateTrunc(Builder.CreateLShr(Val, 64), Int64Ty, "hi");
26731+
Value *Lo = Builder.CreateTrunc(CastVal, Int64Ty, "lo");
26732+
Value *Hi =
26733+
Builder.CreateTrunc(Builder.CreateLShr(CastVal, 64), Int64Ty, "hi");
2670926734
return Builder.CreateCall(Stxr, {Lo, Hi, Addr});
2671026735
}
2671126736

0 commit comments

Comments
 (0)