Skip to content

Commit b483a7a

Browse files
ThorBlcarlobertolli
authored andcommitted
[MI300] Improved fix for SWDEV-443900
Restructured the method body of canUseAMDGPUFastFPAtomics. A type check is only performed if a fast atomic operation can be emitted at all. The patch also fixes the failure of some lit tests. None of them meet the conditions to emit a fast atomic fp operation. Change-Id: I35275de8639173b84f6e6defa4ad34b484181cf5
1 parent dad5ee7 commit b483a7a

File tree

1 file changed

+42
-17
lines changed

1 file changed

+42
-17
lines changed

clang/lib/CodeGen/CGStmtOpenMP.cpp

Lines changed: 42 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -6306,7 +6306,7 @@ static void emitOMPAtomicWriteExpr(CodeGenFunction &CGF,
63066306

63076307
static bool canUseAMDGPUFastFPAtomics(CodeGenFunction &CGF, LValue X,
63086308
RValue Update, BinaryOperatorKind BO,
6309-
const Expr *Hint) {
6309+
const Expr *Hint, SourceLocation Loc) {
63106310

63116311
if (!Update.isScalar())
63126312
return false;
@@ -6345,10 +6345,11 @@ static bool canUseAMDGPUFastFPAtomics(CodeGenFunction &CGF, LValue X,
63456345
}
63466346
}
63476347

6348-
// Fast FP atomics only work when the Update type is the same as the target X.
6349-
// If not, rever to atomicxchg and warn the user.
6350-
bool hasXandUpdateSameType =
6351-
(Update.getScalarVal()->getType() == X.getAddress(CGF).getElementType());
6348+
bool supportsFastFPAtomics =
6349+
Context.getTargetInfo().getTriple().isAMDGCN() &&
6350+
CGF.CGM.getOpenMPRuntime().supportFastFPAtomics() &&
6351+
CGF.CGM.getLangOpts().OpenMPIsTargetDevice &&
6352+
userRequestsAMDGPUFastFPAtomics;
63526353

63536354
bool addOpHasAMDGPUFastVersion =
63546355
BO == BO_Add && (Update.getScalarVal()->getType()->isDoubleTy() ||
@@ -6358,18 +6359,42 @@ static bool canUseAMDGPUFastFPAtomics(CodeGenFunction &CGF, LValue X,
63586359
(BO == BO_LT || BO == BO_GT) &&
63596360
Update.getScalarVal()->getType()->isDoubleTy();
63606361

6361-
return Context.getTargetInfo().getTriple().isAMDGCN() &&
6362-
CGF.CGM.getOpenMPRuntime().supportFastFPAtomics() &&
6363-
CGF.CGM.getLangOpts().OpenMPIsTargetDevice &&
6364-
userRequestsAMDGPUFastFPAtomics &&
6365-
(addOpHasAMDGPUFastVersion || minMaxOpHasAMDGPUFastVersion) &&
6366-
hasXandUpdateSameType && X.isSimple();
6362+
if (!supportsFastFPAtomics ||
6363+
(!addOpHasAMDGPUFastVersion && !minMaxOpHasAMDGPUFastVersion))
6364+
return false;
6365+
6366+
llvm::Type *UpdateType = Update.getScalarVal()->getType();
6367+
llvm::Type *XType = X.getAddress(CGF).getElementType();
6368+
6369+
bool isUpdateLosslesslyCastableToX =
6370+
UpdateType->canLosslesslyBitCastTo(XType);
6371+
6372+
if (!isUpdateLosslesslyCastableToX) {
6373+
6374+
auto getTypeNameAsString = [](llvm::Type* T) -> std::string {
6375+
std::string TypeNameStr;
6376+
llvm::raw_string_ostream OutputStream(TypeNameStr);
6377+
T->print(OutputStream);
6378+
return TypeNameStr;
6379+
};
6380+
6381+
unsigned DiagID = CGF.CGM.getDiags().getCustomDiagID(
6382+
DiagnosticsEngine::Warning,
6383+
"Can't emit fast FP atomic call due to type mismatch. The operation "
6384+
"tries to assign %0 to %1. A fallback atomic operation is "
6385+
"emitted which ignores the type conflict. Result may be incorrect!");
6386+
clang::DiagnosticBuilder DB = CGF.CGM.getDiags().Report(Loc, DiagID);
6387+
DB.AddString(getTypeNameAsString(UpdateType));
6388+
DB.AddString(getTypeNameAsString(XType));
6389+
}
6390+
6391+
return isUpdateLosslesslyCastableToX;
63676392
}
63686393

63696394
static std::pair<bool, RValue>
63706395
emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X, RValue Update,
63716396
BinaryOperatorKind BO, llvm::AtomicOrdering AO,
6372-
bool IsXLHSInRHSPart, const Expr *Hint) {
6397+
bool IsXLHSInRHSPart, const Expr *Hint, SourceLocation Loc) {
63736398
ASTContext &Context = CGF.getContext();
63746399

63756400
if (CGF.CGM.getOpenMPRuntime().mustEmitSafeAtomic(CGF, X, Update, BO)) {
@@ -6378,7 +6403,7 @@ emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X, RValue Update,
63786403
return std::make_pair(false, RValue::get(nullptr));
63796404
}
63806405

6381-
bool useFPAtomics = canUseAMDGPUFastFPAtomics(CGF, X, Update, BO, Hint);
6406+
bool useFPAtomics = canUseAMDGPUFastFPAtomics(CGF, X, Update, BO, Hint, Loc);
63826407
if (useFPAtomics) {
63836408
auto Ret = CGF.CGM.getOpenMPRuntime().emitFastFPAtomicCall(
63846409
CGF, X, Update, BO, IsXLHSInRHSPart);
@@ -6509,7 +6534,7 @@ std::pair<bool, RValue> CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr(
65096534
// x--, --x -> xrval - 1;
65106535
// x = x binop expr; -> xrval binop expr
65116536
// x = expr Op x; - > expr binop xrval;
6512-
auto Res = emitOMPAtomicRMW(*this, X, E, BO, AO, IsXLHSInRHSPart, Hint);
6537+
auto Res = emitOMPAtomicRMW(*this, X, E, BO, AO, IsXLHSInRHSPart, Hint, Loc);
65136538
if (!Res.first) {
65146539
if (X.isGlobalReg()) {
65156540
// Emit an update expression: 'xrval' binop 'expr' or 'expr' binop
@@ -6745,9 +6770,9 @@ static void emitOMPAtomicCompareExpr(
67456770
}
67466771

67476772
// Check if fast AMDGPU FP atomics can be used for the current operation:
6748-
bool canUseFastAtomics =
6749-
canUseAMDGPUFastFPAtomics(CGF, XLVal, RValue::get(EVal),
6750-
cast<BinaryOperator>(CE)->getOpcode(), Hint);
6773+
bool canUseFastAtomics = canUseAMDGPUFastFPAtomics(
6774+
CGF, XLVal, RValue::get(EVal), cast<BinaryOperator>(CE)->getOpcode(),
6775+
Hint, Loc);
67516776
if (canUseFastAtomics) {
67526777
CGF.CGM.getOpenMPRuntime().emitFastFPAtomicCall(
67536778
CGF, XLVal, RValue::get(EVal), cast<BinaryOperator>(CE)->getOpcode(),

0 commit comments

Comments
 (0)