@@ -1033,6 +1033,12 @@ static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn,
10331033 break ; // No other 'amdgcn.atomic.*'
10341034 }
10351035
1036+ if (Name.starts_with (" ds.fadd" )) {
1037+ // Replaced with atomicrmw fadd, so there's no new declaration.
1038+ NewFn = nullptr ;
1039+ return true ;
1040+ }
1041+
10361042 if (Name.starts_with (" ldexp." )) {
10371043 // Target specific intrinsic became redundant
10381044 NewFn = Intrinsic::getDeclaration (
@@ -2331,40 +2337,74 @@ static Value *upgradeARMIntrinsicCall(StringRef Name, CallBase *CI, Function *F,
23312337 llvm_unreachable (" Unknown function for ARM CallBase upgrade." );
23322338}
23332339
2340+ // These are expected to have the arguments:
2341+ // atomic.intrin (ptr, rmw_value, ordering, scope, isVolatile)
2342+ //
2343+ // Except for int_amdgcn_ds_fadd_v2bf16 which only has (ptr, rmw_value).
2344+ //
23342345static Value *upgradeAMDGCNIntrinsicCall (StringRef Name, CallBase *CI,
23352346 Function *F, IRBuilder<> &Builder) {
2336- const bool IsInc = Name.starts_with (" atomic.inc." );
2337- if (IsInc || Name.starts_with (" atomic.dec." )) {
2338- if (CI->getNumOperands () != 6 ) // Malformed bitcode.
2339- return nullptr ;
2347+ AtomicRMWInst::BinOp RMWOp =
2348+ StringSwitch<AtomicRMWInst::BinOp>(Name)
2349+ .StartsWith (" ds.fadd" , AtomicRMWInst::FAdd)
2350+ .StartsWith (" atomic.inc." , AtomicRMWInst::UIncWrap)
2351+ .StartsWith (" atomic.dec." , AtomicRMWInst::UDecWrap);
2352+
2353+ unsigned NumOperands = CI->getNumOperands ();
2354+ if (NumOperands < 3 ) // Malformed bitcode.
2355+ return nullptr ;
23402356
2341- AtomicRMWInst::BinOp RMWOp =
2342- IsInc ? AtomicRMWInst::UIncWrap : AtomicRMWInst::UDecWrap;
2357+ Value *Ptr = CI->getArgOperand (0 );
2358+ if (!isa<PointerType>(Ptr->getType ())) // Malformed.
2359+ return nullptr ;
23432360
2344- Value *Ptr = CI->getArgOperand (0 );
2345- Value *Val = CI->getArgOperand (1 );
2346- ConstantInt *OrderArg = dyn_cast<ConstantInt>(CI->getArgOperand (2 ));
2361+ Value *Val = CI->getArgOperand (1 );
2362+ if (Val->getType () != CI->getType ()) // Malformed.
2363+ return nullptr ;
2364+
2365+ ConstantInt *OrderArg = nullptr ;
2366+ bool IsVolatile = false ;
2367+
2368+ // These should have 5 arguments (plus the callee). A separate version of the
2369+ // ds_fadd intrinsic was defined for bf16 which was missing arguments.
2370+ if (NumOperands > 3 )
2371+ OrderArg = dyn_cast<ConstantInt>(CI->getArgOperand (2 ));
2372+
2373+ // Ignore scope argument at 3
2374+
2375+ if (NumOperands > 5 ) {
23472376 ConstantInt *VolatileArg = dyn_cast<ConstantInt>(CI->getArgOperand (4 ));
2377+ IsVolatile = !VolatileArg || !VolatileArg->isZero ();
2378+ }
23482379
2349- AtomicOrdering Order = AtomicOrdering::SequentiallyConsistent;
2350- if (OrderArg && isValidAtomicOrdering (OrderArg->getZExtValue ()))
2351- Order = static_cast <AtomicOrdering>(OrderArg->getZExtValue ());
2352- if (Order == AtomicOrdering::NotAtomic ||
2353- Order == AtomicOrdering::Unordered)
2354- Order = AtomicOrdering::SequentiallyConsistent;
2355-
2356- // The scope argument never really worked correctly. Use agent as the most
2357- // conservative option which should still always produce the instruction.
2358- SyncScope::ID SSID = F-> getContext (). getOrInsertSyncScopeID ( " agent " );
2359- AtomicRMWInst *RMW =
2360- Builder. CreateAtomicRMW (RMWOp, Ptr, Val, std:: nullopt , Order, SSID);
2361-
2362- if (!VolatileArg || !VolatileArg-> isZero ())
2363- RMW-> setVolatile ( true );
2364- return RMW;
2380+ AtomicOrdering Order = AtomicOrdering::SequentiallyConsistent;
2381+ if (OrderArg && isValidAtomicOrdering (OrderArg->getZExtValue ()))
2382+ Order = static_cast <AtomicOrdering>(OrderArg->getZExtValue ());
2383+ if (Order == AtomicOrdering::NotAtomic || Order == AtomicOrdering::Unordered)
2384+ Order = AtomicOrdering::SequentiallyConsistent;
2385+
2386+ LLVMContext &Ctx = F-> getContext ();
2387+
2388+ // Handle the v2bf16 intrinsic which used <2 x i16> instead of <2 x bfloat>
2389+ Type *RetTy = CI-> getType ( );
2390+ if (VectorType *VT = dyn_cast<VectorType>(RetTy)) {
2391+ if (VT-> getElementType ()-> isIntegerTy ( 16 )) {
2392+ VectorType *AsBF16 =
2393+ VectorType::get ( Type::getBFloatTy (Ctx), VT-> getElementCount ());
2394+ Val = Builder. CreateBitCast (Val, AsBF16 );
2395+ }
23652396 }
23662397
2367- llvm_unreachable (" Unknown function for AMDGPU intrinsic upgrade." );
2398+ // The scope argument never really worked correctly. Use agent as the most
2399+ // conservative option which should still always produce the instruction.
2400+ SyncScope::ID SSID = Ctx.getOrInsertSyncScopeID (" agent" );
2401+ AtomicRMWInst *RMW =
2402+ Builder.CreateAtomicRMW (RMWOp, Ptr, Val, std::nullopt , Order, SSID);
2403+
2404+ if (IsVolatile)
2405+ RMW->setVolatile (true );
2406+
2407+ return Builder.CreateBitCast (RMW, RetTy);
23682408}
23692409
23702410// / Helper to unwrap intrinsic call MetadataAsValue operands.
0 commit comments