@@ -18140,9 +18140,35 @@ void CodeGenFunction::ProcessOrderScopeAMDGCN(Value *Order, Value *Scope,
18140
18140
break;
18141
18141
}
18142
18142
18143
+ // Some of the atomic builtins take the scope as a string name.
18143
18144
StringRef scp;
18144
- llvm::getConstantStringInfo(Scope, scp);
18145
- SSID = getLLVMContext().getOrInsertSyncScopeID(scp);
18145
+ if (llvm::getConstantStringInfo(Scope, scp)) {
18146
+ SSID = getLLVMContext().getOrInsertSyncScopeID(scp);
18147
+ return;
18148
+ }
18149
+
18150
+ // Older builtins had an enum argument for the memory scope.
18151
+ int scope = cast<llvm::ConstantInt>(Scope)->getZExtValue();
18152
+ switch (scope) {
18153
+ case 0: // __MEMORY_SCOPE_SYSTEM
18154
+ SSID = llvm::SyncScope::System;
18155
+ break;
18156
+ case 1: // __MEMORY_SCOPE_DEVICE
18157
+ SSID = getLLVMContext().getOrInsertSyncScopeID("agent");
18158
+ break;
18159
+ case 2: // __MEMORY_SCOPE_WRKGRP
18160
+ SSID = getLLVMContext().getOrInsertSyncScopeID("workgroup");
18161
+ break;
18162
+ case 3: // __MEMORY_SCOPE_WVFRNT
18163
+ SSID = getLLVMContext().getOrInsertSyncScopeID("wavefront");
18164
+ break;
18165
+ case 4: // __MEMORY_SCOPE_SINGLE
18166
+ SSID = llvm::SyncScope::SingleThread;
18167
+ break;
18168
+ default:
18169
+ SSID = llvm::SyncScope::System;
18170
+ break;
18171
+ }
18146
18172
}
18147
18173
18148
18174
llvm::Value *CodeGenFunction::EmitScalarOrConstFoldImmArg(unsigned ICEArguments,
@@ -18558,14 +18584,10 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
18558
18584
Function *F = CGM.getIntrinsic(Intrin, { Src0->getType() });
18559
18585
return Builder.CreateCall(F, { Src0, Builder.getFalse() });
18560
18586
}
18561
- case AMDGPU::BI__builtin_amdgcn_ds_faddf:
18562
18587
case AMDGPU::BI__builtin_amdgcn_ds_fminf:
18563
18588
case AMDGPU::BI__builtin_amdgcn_ds_fmaxf: {
18564
18589
Intrinsic::ID Intrin;
18565
18590
switch (BuiltinID) {
18566
- case AMDGPU::BI__builtin_amdgcn_ds_faddf:
18567
- Intrin = Intrinsic::amdgcn_ds_fadd;
18568
- break;
18569
18591
case AMDGPU::BI__builtin_amdgcn_ds_fminf:
18570
18592
Intrin = Intrinsic::amdgcn_ds_fmin;
18571
18593
break;
@@ -18656,35 +18678,6 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
18656
18678
llvm::Function *F = CGM.getIntrinsic(IID, {Addr->getType()});
18657
18679
return Builder.CreateCall(F, {Addr, Val});
18658
18680
}
18659
- case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f64:
18660
- case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32:
18661
- case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2f16: {
18662
- Intrinsic::ID IID;
18663
- llvm::Type *ArgTy;
18664
- switch (BuiltinID) {
18665
- case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32:
18666
- ArgTy = llvm::Type::getFloatTy(getLLVMContext());
18667
- IID = Intrinsic::amdgcn_ds_fadd;
18668
- break;
18669
- case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f64:
18670
- ArgTy = llvm::Type::getDoubleTy(getLLVMContext());
18671
- IID = Intrinsic::amdgcn_ds_fadd;
18672
- break;
18673
- case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2f16:
18674
- ArgTy = llvm::FixedVectorType::get(
18675
- llvm::Type::getHalfTy(getLLVMContext()), 2);
18676
- IID = Intrinsic::amdgcn_ds_fadd;
18677
- break;
18678
- }
18679
- llvm::Value *Addr = EmitScalarExpr(E->getArg(0));
18680
- llvm::Value *Val = EmitScalarExpr(E->getArg(1));
18681
- llvm::Constant *ZeroI32 = llvm::ConstantInt::getIntegerValue(
18682
- llvm::Type::getInt32Ty(getLLVMContext()), APInt(32, 0, true));
18683
- llvm::Constant *ZeroI1 = llvm::ConstantInt::getIntegerValue(
18684
- llvm::Type::getInt1Ty(getLLVMContext()), APInt(1, 0));
18685
- llvm::Function *F = CGM.getIntrinsic(IID, {ArgTy});
18686
- return Builder.CreateCall(F, {Addr, Val, ZeroI32, ZeroI32, ZeroI1});
18687
- }
18688
18681
case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_i32:
18689
18682
case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_v2i32:
18690
18683
case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4i16:
@@ -19044,7 +19037,12 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
19044
19037
case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
19045
19038
case AMDGPU::BI__builtin_amdgcn_atomic_inc64:
19046
19039
case AMDGPU::BI__builtin_amdgcn_atomic_dec32:
19047
- case AMDGPU::BI__builtin_amdgcn_atomic_dec64: {
19040
+ case AMDGPU::BI__builtin_amdgcn_atomic_dec64:
19041
+ case AMDGPU::BI__builtin_amdgcn_ds_faddf:
19042
+ case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f64:
19043
+ case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32:
19044
+ case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2f16:
19045
+ case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16: {
19048
19046
llvm::AtomicRMWInst::BinOp BinOp;
19049
19047
switch (BuiltinID) {
19050
19048
case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
@@ -19055,23 +19053,54 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
19055
19053
case AMDGPU::BI__builtin_amdgcn_atomic_dec64:
19056
19054
BinOp = llvm::AtomicRMWInst::UDecWrap;
19057
19055
break;
19056
+ case AMDGPU::BI__builtin_amdgcn_ds_faddf:
19057
+ case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f64:
19058
+ case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32:
19059
+ case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2f16:
19060
+ case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16:
19061
+ BinOp = llvm::AtomicRMWInst::FAdd;
19062
+ break;
19058
19063
}
19059
19064
19060
19065
Address Ptr = CheckAtomicAlignment(*this, E);
19061
19066
Value *Val = EmitScalarExpr(E->getArg(1));
19067
+ llvm::Type *OrigTy = Val->getType();
19068
+ QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
19062
19069
19063
- ProcessOrderScopeAMDGCN(EmitScalarExpr(E->getArg(2)),
19064
- EmitScalarExpr(E->getArg(3)), AO, SSID);
19070
+ bool Volatile;
19065
19071
19066
- QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
19067
- bool Volatile =
19068
- PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
19072
+ if (BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_faddf) {
19073
+ // __builtin_amdgcn_ds_faddf has an explicit volatile argument
19074
+ Volatile =
19075
+ cast<ConstantInt>(EmitScalarExpr(E->getArg(4)))->getZExtValue();
19076
+ } else {
19077
+ // Infer volatile from the passed type.
19078
+ Volatile =
19079
+ PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
19080
+ }
19081
+
19082
+ if (E->getNumArgs() >= 4) {
19083
+ // Some of the builtins have explicit ordering and scope arguments.
19084
+ ProcessOrderScopeAMDGCN(EmitScalarExpr(E->getArg(2)),
19085
+ EmitScalarExpr(E->getArg(3)), AO, SSID);
19086
+ } else {
19087
+ // The ds_fadd_* builtins do not have syncscope/order arguments.
19088
+ SSID = llvm::SyncScope::System;
19089
+ AO = AtomicOrdering::SequentiallyConsistent;
19090
+
19091
+ // The v2bf16 builtin uses i16 instead of a natural bfloat type.
19092
+ if (BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16) {
19093
+ llvm::Type *V2BF16Ty = FixedVectorType::get(
19094
+ llvm::Type::getBFloatTy(Builder.getContext()), 2);
19095
+ Val = Builder.CreateBitCast(Val, V2BF16Ty);
19096
+ }
19097
+ }
19069
19098
19070
19099
llvm::AtomicRMWInst *RMW =
19071
19100
Builder.CreateAtomicRMW(BinOp, Ptr, Val, AO, SSID);
19072
19101
if (Volatile)
19073
19102
RMW->setVolatile(true);
19074
- return RMW;
19103
+ return Builder.CreateBitCast( RMW, OrigTy) ;
19075
19104
}
19076
19105
case AMDGPU::BI__builtin_amdgcn_s_sendmsg_rtn:
19077
19106
case AMDGPU::BI__builtin_amdgcn_s_sendmsg_rtnl: {
0 commit comments