@@ -18955,22 +18955,6 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
1895518955 CGM.getIntrinsic(IID, {ArgTy, Addr->getType(), Val->getType()});
1895618956 return Builder.CreateCall(F, {Addr, Val});
1895718957 }
18958- case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16:
18959- case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16: {
18960- Intrinsic::ID IID;
18961- switch (BuiltinID) {
18962- case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16:
18963- IID = Intrinsic::amdgcn_global_atomic_fadd_v2bf16;
18964- break;
18965- case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16:
18966- IID = Intrinsic::amdgcn_flat_atomic_fadd_v2bf16;
18967- break;
18968- }
18969- llvm::Value *Addr = EmitScalarExpr(E->getArg(0));
18970- llvm::Value *Val = EmitScalarExpr(E->getArg(1));
18971- llvm::Function *F = CGM.getIntrinsic(IID, {Addr->getType()});
18972- return Builder.CreateCall(F, {Addr, Val});
18973- }
1897418958 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_i32:
1897518959 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_v2i32:
1897618960 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4i16:
@@ -19352,7 +19336,9 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
1935219336 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16:
1935319337 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16:
1935419338 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32:
19355- case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64: {
19339+ case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64:
19340+ case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16:
19341+ case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16: {
1935619342 llvm::AtomicRMWInst::BinOp BinOp;
1935719343 switch (BuiltinID) {
1935819344 case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
@@ -19374,6 +19360,8 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
1937419360 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16:
1937519361 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32:
1937619362 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64:
19363+ case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16:
19364+ case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16:
1937719365 BinOp = llvm::AtomicRMWInst::FAdd;
1937819366 break;
1937919367 case AMDGPU::BI__builtin_amdgcn_ds_fminf:
@@ -19418,7 +19406,9 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
1941819406 AO = AtomicOrdering::Monotonic;
1941919407
1942019408 // The v2bf16 builtin uses i16 instead of a natural bfloat type.
19421- if (BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16) {
19409+ if (BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16 ||
19410+ BuiltinID == AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16 ||
19411+ BuiltinID == AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16) {
1942219412 llvm::Type *V2BF16Ty = FixedVectorType::get(
1942319413 llvm::Type::getBFloatTy(Builder.getContext()), 2);
1942419414 Val = Builder.CreateBitCast(Val, V2BF16Ty);
0 commit comments