@@ -18946,22 +18946,6 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
1894618946 CGM.getIntrinsic(IID, {ArgTy, Addr->getType(), Val->getType()});
1894718947 return Builder.CreateCall(F, {Addr, Val});
1894818948 }
18949- case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16:
18950- case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16: {
18951- Intrinsic::ID IID;
18952- switch (BuiltinID) {
18953- case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16:
18954- IID = Intrinsic::amdgcn_global_atomic_fadd_v2bf16;
18955- break;
18956- case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16:
18957- IID = Intrinsic::amdgcn_flat_atomic_fadd_v2bf16;
18958- break;
18959- }
18960- llvm::Value *Addr = EmitScalarExpr(E->getArg(0));
18961- llvm::Value *Val = EmitScalarExpr(E->getArg(1));
18962- llvm::Function *F = CGM.getIntrinsic(IID, {Addr->getType()});
18963- return Builder.CreateCall(F, {Addr, Val});
18964- }
1896518949 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_i32:
1896618950 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_v2i32:
1896718951 case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4i16:
@@ -19343,7 +19327,9 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
1934319327 case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16:
1934419328 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16:
1934519329 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32:
19346- case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64: {
19330+ case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64:
19331+ case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16:
19332+ case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16: {
1934719333 llvm::AtomicRMWInst::BinOp BinOp;
1934819334 switch (BuiltinID) {
1934919335 case AMDGPU::BI__builtin_amdgcn_atomic_inc32:
@@ -19365,6 +19351,8 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
1936519351 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16:
1936619352 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32:
1936719353 case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64:
19354+ case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16:
19355+ case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16:
1936819356 BinOp = llvm::AtomicRMWInst::FAdd;
1936919357 break;
1937019358 case AMDGPU::BI__builtin_amdgcn_ds_fminf:
@@ -19409,7 +19397,9 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
1940919397 AO = AtomicOrdering::Monotonic;
1941019398
1941119399 // The v2bf16 builtin uses i16 instead of a natural bfloat type.
19412- if (BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16) {
19400+ if (BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16 ||
19401+ BuiltinID == AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16 ||
19402+ BuiltinID == AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16) {
1941319403 llvm::Type *V2BF16Ty = FixedVectorType::get(
1941419404 llvm::Type::getBFloatTy(Builder.getContext()), 2);
1941519405 Val = Builder.CreateBitCast(Val, V2BF16Ty);
0 commit comments