@@ -6306,7 +6306,7 @@ static void emitOMPAtomicWriteExpr(CodeGenFunction &CGF,
6306
6306
6307
6307
static bool canUseAMDGPUFastFPAtomics (CodeGenFunction &CGF, LValue X,
6308
6308
RValue Update, BinaryOperatorKind BO,
6309
- const Expr *Hint) {
6309
+ const Expr *Hint, SourceLocation Loc ) {
6310
6310
6311
6311
if (!Update.isScalar ())
6312
6312
return false ;
@@ -6345,10 +6345,11 @@ static bool canUseAMDGPUFastFPAtomics(CodeGenFunction &CGF, LValue X,
6345
6345
}
6346
6346
}
6347
6347
6348
- // Fast FP atomics only work when the Update type is the same as the target X.
6349
- // If not, rever to atomicxchg and warn the user.
6350
- bool hasXandUpdateSameType =
6351
- (Update.getScalarVal ()->getType () == X.getAddress (CGF).getElementType ());
6348
+ bool supportsFastFPAtomics =
6349
+ Context.getTargetInfo ().getTriple ().isAMDGCN () &&
6350
+ CGF.CGM .getOpenMPRuntime ().supportFastFPAtomics () &&
6351
+ CGF.CGM .getLangOpts ().OpenMPIsTargetDevice &&
6352
+ userRequestsAMDGPUFastFPAtomics;
6352
6353
6353
6354
bool addOpHasAMDGPUFastVersion =
6354
6355
BO == BO_Add && (Update.getScalarVal ()->getType ()->isDoubleTy () ||
@@ -6358,18 +6359,42 @@ static bool canUseAMDGPUFastFPAtomics(CodeGenFunction &CGF, LValue X,
6358
6359
(BO == BO_LT || BO == BO_GT) &&
6359
6360
Update.getScalarVal ()->getType ()->isDoubleTy ();
6360
6361
6361
- return Context.getTargetInfo ().getTriple ().isAMDGCN () &&
6362
- CGF.CGM .getOpenMPRuntime ().supportFastFPAtomics () &&
6363
- CGF.CGM .getLangOpts ().OpenMPIsTargetDevice &&
6364
- userRequestsAMDGPUFastFPAtomics &&
6365
- (addOpHasAMDGPUFastVersion || minMaxOpHasAMDGPUFastVersion) &&
6366
- hasXandUpdateSameType && X.isSimple ();
6362
+ if (!supportsFastFPAtomics ||
6363
+ (!addOpHasAMDGPUFastVersion && !minMaxOpHasAMDGPUFastVersion))
6364
+ return false ;
6365
+
6366
+ llvm::Type *UpdateType = Update.getScalarVal ()->getType ();
6367
+ llvm::Type *XType = X.getAddress (CGF).getElementType ();
6368
+
6369
+ bool isUpdateLosslesslyCastableToX =
6370
+ UpdateType->canLosslesslyBitCastTo (XType);
6371
+
6372
+ if (!isUpdateLosslesslyCastableToX) {
6373
+
6374
+ auto getTypeNameAsString = [](llvm::Type* T) -> std::string {
6375
+ std::string TypeNameStr;
6376
+ llvm::raw_string_ostream OutputStream (TypeNameStr);
6377
+ T->print (OutputStream);
6378
+ return TypeNameStr;
6379
+ };
6380
+
6381
+ unsigned DiagID = CGF.CGM .getDiags ().getCustomDiagID (
6382
+ DiagnosticsEngine::Warning,
6383
+ " Can't emit fast FP atomic call due to type mismatch. The operation "
6384
+ " tries to assign %0 to %1. A fallback atomic operation is "
6385
+ " emitted which ignores the type conflict. Result may be incorrect!" );
6386
+ clang::DiagnosticBuilder DB = CGF.CGM .getDiags ().Report (Loc, DiagID);
6387
+ DB.AddString (getTypeNameAsString (UpdateType));
6388
+ DB.AddString (getTypeNameAsString (XType));
6389
+ }
6390
+
6391
+ return isUpdateLosslesslyCastableToX;
6367
6392
}
6368
6393
6369
6394
static std::pair<bool , RValue>
6370
6395
emitOMPAtomicRMW (CodeGenFunction &CGF, LValue X, RValue Update,
6371
6396
BinaryOperatorKind BO, llvm::AtomicOrdering AO,
6372
- bool IsXLHSInRHSPart, const Expr *Hint) {
6397
+ bool IsXLHSInRHSPart, const Expr *Hint, SourceLocation Loc ) {
6373
6398
ASTContext &Context = CGF.getContext ();
6374
6399
6375
6400
if (CGF.CGM .getOpenMPRuntime ().mustEmitSafeAtomic (CGF, X, Update, BO)) {
@@ -6378,7 +6403,7 @@ emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X, RValue Update,
6378
6403
return std::make_pair (false , RValue::get (nullptr ));
6379
6404
}
6380
6405
6381
- bool useFPAtomics = canUseAMDGPUFastFPAtomics (CGF, X, Update, BO, Hint);
6406
+ bool useFPAtomics = canUseAMDGPUFastFPAtomics (CGF, X, Update, BO, Hint, Loc );
6382
6407
if (useFPAtomics) {
6383
6408
auto Ret = CGF.CGM .getOpenMPRuntime ().emitFastFPAtomicCall (
6384
6409
CGF, X, Update, BO, IsXLHSInRHSPart);
@@ -6509,7 +6534,7 @@ std::pair<bool, RValue> CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr(
6509
6534
// x--, --x -> xrval - 1;
6510
6535
// x = x binop expr; -> xrval binop expr
6511
6536
// x = expr Op x; - > expr binop xrval;
6512
- auto Res = emitOMPAtomicRMW (*this , X, E, BO, AO, IsXLHSInRHSPart, Hint);
6537
+ auto Res = emitOMPAtomicRMW (*this , X, E, BO, AO, IsXLHSInRHSPart, Hint, Loc );
6513
6538
if (!Res.first ) {
6514
6539
if (X.isGlobalReg ()) {
6515
6540
// Emit an update expression: 'xrval' binop 'expr' or 'expr' binop
@@ -6745,9 +6770,9 @@ static void emitOMPAtomicCompareExpr(
6745
6770
}
6746
6771
6747
6772
// Check if fast AMDGPU FP atomics can be used for the current operation:
6748
- bool canUseFastAtomics =
6749
- canUseAMDGPUFastFPAtomics ( CGF, XLVal, RValue::get (EVal),
6750
- cast<BinaryOperator>(CE)-> getOpcode (), Hint );
6773
+ bool canUseFastAtomics = canUseAMDGPUFastFPAtomics (
6774
+ CGF, XLVal, RValue::get (EVal), cast<BinaryOperator>(CE)-> getOpcode ( ),
6775
+ Hint, Loc );
6751
6776
if (canUseFastAtomics) {
6752
6777
CGF.CGM .getOpenMPRuntime ().emitFastFPAtomicCall (
6753
6778
CGF, XLVal, RValue::get (EVal), cast<BinaryOperator>(CE)->getOpcode (),
0 commit comments