Skip to content

Commit e912e7c

Browse files
committed
[flang][OpenMP] Add implicit casts for omp.atomic.capture
1 parent c617466 commit e912e7c

File tree

4 files changed

+240
-124
lines changed

4 files changed

+240
-124
lines changed

flang/docs/OpenMPSupport.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,4 +64,4 @@ Note : No distinction is made between the support in Parser/Semantics, MLIR, Low
6464
| target teams distribute parallel loop simd construct | P | device, reduction, dist_schedule and linear clauses are not supported |
6565

6666
## OpenMP 3.1, OpenMP 2.5, OpenMP 1.1
67-
All features except a few corner cases in atomic (complex type, different but compatible types in lhs and rhs), threadprivate (character type) constructs/clauses are supported.
67+
All features except a few corner cases in threadprivate (character type) constructs/clauses are supported.

flang/lib/Lower/OpenMP/OpenMP.cpp

Lines changed: 161 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -2865,6 +2865,85 @@ static void genAtomicWrite(lower::AbstractConverter &converter,
28652865
rightHandClauseList, loc);
28662866
}
28672867

2868+
/*
2869+
Emit an implicit cast. Different yet compatible types on
2870+
omp.atomic.read constitute valid Fortran. The OMPIRBuilder will
2871+
emit atomic instructions (on primitive types) and `__atomic_load`
2872+
libcall (on complex type) without explicitly converting
2873+
between such compatible types. The OMPIRBuilder relies on the
2874+
frontend to resolve such inconsistencies between `omp.atomic.read `
2875+
operand types. Similar inconsistencies between operand types in
2876+
`omp.atomic.write` are resolved through implicit casting by use of typed
2877+
assignment (i.e. `evaluate::Assignment`). However, use of typed
2878+
assignment in `omp.atomic.read` (of form `v = x`) leads to an unsafe,
2879+
non-atomic load of `x` into a temporary `alloca`, followed by an atomic
2880+
read of form `v = alloca`. Hence, it is needed to perform a custom
2881+
implicit cast.
2882+
2883+
An atomic read of form `v = x` would (without implicit casting)
2884+
lower to `omp.atomic.read %v = %x : !fir.ref<type1>, !fir.ref<type2>,
2885+
type2`. This implicit casting will rather generate the following FIR:
2886+
2887+
%alloca = fir.alloca type2
2888+
omp.atomic.read %alloca = %x : !fir.ref<type2>, !fir.ref<type2>, type2
2889+
%load = fir.load %alloca : !fir.ref<type2>
2890+
%cvt = fir.convert %load : (type2) -> type1
2891+
fir.store %cvt to %v : !fir.ref<type1>
2892+
2893+
These sequence of operations is thread-safe since each thread allocates
2894+
the `alloca` in its stack, and performs `%alloca = %x` atomically. Once
2895+
safely read, each thread performs the implicit cast on the local
2896+
`alloca`, and writes the final result to `%v`.
2897+
2898+
/// \param builder : FirOpBuilder
2899+
/// \param loc : Location for FIR generation
2900+
/// \param toAddress : Address of %v
2901+
/// \param toType : Type of %v
2902+
/// \param fromType : Type of %x
2903+
/// \param alloca : Thread scoped `alloca`
2904+
// It is the responsibility of the callee
2905+
// to position the `alloca` at `AllocaIP`
2906+
// through `builder.getAllocaBlock()`
2907+
*/
2908+
2909+
static void emitAtomicReadImplicitCast(fir::FirOpBuilder &builder,
2910+
mlir::Location loc,
2911+
mlir::Value toAddress, mlir::Type toType,
2912+
mlir::Type fromType,
2913+
mlir::Value alloca) {
2914+
auto load = builder.create<fir::LoadOp>(loc, alloca);
2915+
if (fir::isa_complex(fromType) && !fir::isa_complex(toType)) {
2916+
// Emit an additional `ExtractValueOp` if `fromAddress` is of complex
2917+
// type, but `toAddress` is not.
2918+
auto extract = builder.create<fir::ExtractValueOp>(
2919+
loc, mlir::cast<mlir::ComplexType>(fromType).getElementType(), load,
2920+
builder.getArrayAttr(
2921+
builder.getIntegerAttr(builder.getIndexType(), 0)));
2922+
auto cvt = builder.create<fir::ConvertOp>(loc, toType, extract);
2923+
builder.create<fir::StoreOp>(loc, cvt, toAddress);
2924+
} else if (!fir::isa_complex(fromType) && fir::isa_complex(toType)) {
2925+
// Emit an additional `InsertValueOp` if `toAddress` is of complex
2926+
// type, but `fromAddress` is not.
2927+
mlir::Value undef = builder.create<fir::UndefOp>(loc, toType);
2928+
mlir::Type complexEleTy =
2929+
mlir::cast<mlir::ComplexType>(toType).getElementType();
2930+
mlir::Value cvt = builder.create<fir::ConvertOp>(loc, complexEleTy, load);
2931+
mlir::Value zero = builder.createRealZeroConstant(loc, complexEleTy);
2932+
mlir::Value idx0 = builder.create<fir::InsertValueOp>(
2933+
loc, toType, undef, cvt,
2934+
builder.getArrayAttr(
2935+
builder.getIntegerAttr(builder.getIndexType(), 0)));
2936+
mlir::Value idx1 = builder.create<fir::InsertValueOp>(
2937+
loc, toType, idx0, zero,
2938+
builder.getArrayAttr(
2939+
builder.getIntegerAttr(builder.getIndexType(), 1)));
2940+
builder.create<fir::StoreOp>(loc, idx1, toAddress);
2941+
} else {
2942+
auto cvt = builder.create<fir::ConvertOp>(loc, toType, load);
2943+
builder.create<fir::StoreOp>(loc, cvt, toAddress);
2944+
}
2945+
}
2946+
28682947
/// Processes an atomic construct with read clause.
28692948
static void genAtomicRead(lower::AbstractConverter &converter,
28702949
const parser::OmpAtomicRead &atomicRead,
@@ -2891,34 +2970,7 @@ static void genAtomicRead(lower::AbstractConverter &converter,
28912970
*semantics::GetExpr(assignmentStmtVariable), stmtCtx));
28922971

28932972
if (fromAddress.getType() != toAddress.getType()) {
2894-
// Emit an implicit cast. Different yet compatible types on
2895-
// omp.atomic.read constitute valid Fortran. The OMPIRBuilder will
2896-
// emit atomic instructions (on primitive types) and `__atomic_load`
2897-
// libcall (on complex type) without explicitly converting
2898-
// between such compatible types. The OMPIRBuilder relies on the
2899-
// frontend to resolve such inconsistencies between `omp.atomic.read `
2900-
// operand types. Similar inconsistencies between operand types in
2901-
// `omp.atomic.write` are resolved through implicit casting by use of typed
2902-
// assignment (i.e. `evaluate::Assignment`). However, use of typed
2903-
// assignment in `omp.atomic.read` (of form `v = x`) leads to an unsafe,
2904-
// non-atomic load of `x` into a temporary `alloca`, followed by an atomic
2905-
// read of form `v = alloca`. Hence, it is needed to perform a custom
2906-
// implicit cast.
2907-
2908-
// An atomic read of form `v = x` would (without implicit casting)
2909-
// lower to `omp.atomic.read %v = %x : !fir.ref<type1>, !fir.ref<type2>,
2910-
// type2`. This implicit casting will rather generate the following FIR:
2911-
//
2912-
// %alloca = fir.alloca type2
2913-
// omp.atomic.read %alloca = %x : !fir.ref<type2>, !fir.ref<type2>, type2
2914-
// %load = fir.load %alloca : !fir.ref<type2>
2915-
// %cvt = fir.convert %load : (type2) -> type1
2916-
// fir.store %cvt to %v : !fir.ref<type1>
2917-
2918-
// These sequence of operations is thread-safe since each thread allocates
2919-
// the `alloca` in its stack, and performs `%alloca = %x` atomically. Once
2920-
// safely read, each thread performs the implicit cast on the local
2921-
// `alloca`, and writes the final result to `%v`.
2973+
29222974
mlir::Type toType = fir::unwrapRefType(toAddress.getType());
29232975
mlir::Type fromType = fir::unwrapRefType(fromAddress.getType());
29242976
fir::FirOpBuilder &builder = converter.getFirOpBuilder();
@@ -2930,37 +2982,8 @@ static void genAtomicRead(lower::AbstractConverter &converter,
29302982
genAtomicCaptureStatement(converter, fromAddress, alloca,
29312983
leftHandClauseList, rightHandClauseList,
29322984
elementType, loc);
2933-
auto load = builder.create<fir::LoadOp>(loc, alloca);
2934-
if (fir::isa_complex(fromType) && !fir::isa_complex(toType)) {
2935-
// Emit an additional `ExtractValueOp` if `fromAddress` is of complex
2936-
// type, but `toAddress` is not.
2937-
auto extract = builder.create<fir::ExtractValueOp>(
2938-
loc, mlir::cast<mlir::ComplexType>(fromType).getElementType(), load,
2939-
builder.getArrayAttr(
2940-
builder.getIntegerAttr(builder.getIndexType(), 0)));
2941-
auto cvt = builder.create<fir::ConvertOp>(loc, toType, extract);
2942-
builder.create<fir::StoreOp>(loc, cvt, toAddress);
2943-
} else if (!fir::isa_complex(fromType) && fir::isa_complex(toType)) {
2944-
// Emit an additional `InsertValueOp` if `toAddress` is of complex
2945-
// type, but `fromAddress` is not.
2946-
mlir::Value undef = builder.create<fir::UndefOp>(loc, toType);
2947-
mlir::Type complexEleTy =
2948-
mlir::cast<mlir::ComplexType>(toType).getElementType();
2949-
mlir::Value cvt = builder.create<fir::ConvertOp>(loc, complexEleTy, load);
2950-
mlir::Value zero = builder.createRealZeroConstant(loc, complexEleTy);
2951-
mlir::Value idx0 = builder.create<fir::InsertValueOp>(
2952-
loc, toType, undef, cvt,
2953-
builder.getArrayAttr(
2954-
builder.getIntegerAttr(builder.getIndexType(), 0)));
2955-
mlir::Value idx1 = builder.create<fir::InsertValueOp>(
2956-
loc, toType, idx0, zero,
2957-
builder.getArrayAttr(
2958-
builder.getIntegerAttr(builder.getIndexType(), 1)));
2959-
builder.create<fir::StoreOp>(loc, idx1, toAddress);
2960-
} else {
2961-
auto cvt = builder.create<fir::ConvertOp>(loc, toType, load);
2962-
builder.create<fir::StoreOp>(loc, cvt, toAddress);
2963-
}
2985+
emitAtomicReadImplicitCast(builder, loc, toAddress, toType, fromType,
2986+
alloca);
29642987
} else
29652988
genAtomicCaptureStatement(converter, fromAddress, toAddress,
29662989
leftHandClauseList, rightHandClauseList,
@@ -3049,10 +3072,6 @@ static void genAtomicCapture(lower::AbstractConverter &converter,
30493072
mlir::Type stmt2VarType =
30503073
fir::getBase(converter.genExprValue(assign2.lhs, stmtCtx)).getType();
30513074

3052-
// Check if implicit type is needed
3053-
if (stmt1VarType != stmt2VarType)
3054-
TODO(loc, "atomic capture requiring implicit type casts");
3055-
30563075
mlir::Operation *atomicCaptureOp = nullptr;
30573076
mlir::IntegerAttr hint = nullptr;
30583077
mlir::omp::ClauseMemoryOrderKindAttr memoryOrder = nullptr;
@@ -3075,10 +3094,31 @@ static void genAtomicCapture(lower::AbstractConverter &converter,
30753094
// Atomic capture construct is of the form [capture-stmt, update-stmt]
30763095
const semantics::SomeExpr &fromExpr = *semantics::GetExpr(stmt1Expr);
30773096
mlir::Type elementType = converter.genType(fromExpr);
3078-
genAtomicCaptureStatement(converter, stmt2LHSArg, stmt1LHSArg,
3079-
/*leftHandClauseList=*/nullptr,
3080-
/*rightHandClauseList=*/nullptr, elementType,
3081-
loc);
3097+
if (stmt1VarType != stmt2VarType) {
3098+
mlir::Value alloca;
3099+
mlir::Type toType = fir::unwrapRefType(stmt1LHSArg.getType());
3100+
mlir::Type fromType = fir::unwrapRefType(stmt2LHSArg.getType());
3101+
{
3102+
mlir::OpBuilder::InsertionGuard guard(firOpBuilder);
3103+
firOpBuilder.setInsertionPointToStart(firOpBuilder.getAllocaBlock());
3104+
alloca = firOpBuilder.create<fir::AllocaOp>(loc, fromType);
3105+
}
3106+
genAtomicCaptureStatement(converter, stmt2LHSArg, alloca,
3107+
/*leftHandClauseList=*/nullptr,
3108+
/*rightHandClauseList=*/nullptr, elementType,
3109+
loc);
3110+
{
3111+
mlir::OpBuilder::InsertionGuard guard(firOpBuilder);
3112+
firOpBuilder.setInsertionPointAfter(atomicCaptureOp);
3113+
emitAtomicReadImplicitCast(firOpBuilder, loc, stmt1LHSArg, toType,
3114+
fromType, alloca);
3115+
}
3116+
} else {
3117+
genAtomicCaptureStatement(converter, stmt2LHSArg, stmt1LHSArg,
3118+
/*leftHandClauseList=*/nullptr,
3119+
/*rightHandClauseList=*/nullptr, elementType,
3120+
loc);
3121+
}
30823122
genAtomicUpdateStatement(
30833123
converter, stmt2LHSArg, stmt2VarType, stmt2Var, stmt2Expr,
30843124
/*leftHandClauseList=*/nullptr,
@@ -3091,10 +3131,32 @@ static void genAtomicCapture(lower::AbstractConverter &converter,
30913131
firOpBuilder.setInsertionPointToStart(&block);
30923132
const semantics::SomeExpr &fromExpr = *semantics::GetExpr(stmt1Expr);
30933133
mlir::Type elementType = converter.genType(fromExpr);
3094-
genAtomicCaptureStatement(converter, stmt2LHSArg, stmt1LHSArg,
3095-
/*leftHandClauseList=*/nullptr,
3096-
/*rightHandClauseList=*/nullptr, elementType,
3097-
loc);
3134+
3135+
if (stmt1VarType != stmt2VarType) {
3136+
mlir::Value alloca;
3137+
mlir::Type toType = fir::unwrapRefType(stmt1LHSArg.getType());
3138+
mlir::Type fromType = fir::unwrapRefType(stmt2LHSArg.getType());
3139+
{
3140+
mlir::OpBuilder::InsertionGuard guard(firOpBuilder);
3141+
firOpBuilder.setInsertionPointToStart(firOpBuilder.getAllocaBlock());
3142+
alloca = firOpBuilder.create<fir::AllocaOp>(loc, fromType);
3143+
}
3144+
genAtomicCaptureStatement(converter, stmt2LHSArg, alloca,
3145+
/*leftHandClauseList=*/nullptr,
3146+
/*rightHandClauseList=*/nullptr, elementType,
3147+
loc);
3148+
{
3149+
mlir::OpBuilder::InsertionGuard guard(firOpBuilder);
3150+
firOpBuilder.setInsertionPointAfter(atomicCaptureOp);
3151+
emitAtomicReadImplicitCast(firOpBuilder, loc, stmt1LHSArg, toType,
3152+
fromType, alloca);
3153+
}
3154+
} else {
3155+
genAtomicCaptureStatement(converter, stmt2LHSArg, stmt1LHSArg,
3156+
/*leftHandClauseList=*/nullptr,
3157+
/*rightHandClauseList=*/nullptr, elementType,
3158+
loc);
3159+
}
30983160
genAtomicWriteStatement(converter, stmt2LHSArg, stmt2RHSArg,
30993161
/*leftHandClauseList=*/nullptr,
31003162
/*rightHandClauseList=*/nullptr, loc);
@@ -3107,10 +3169,34 @@ static void genAtomicCapture(lower::AbstractConverter &converter,
31073169
converter, stmt1LHSArg, stmt1VarType, stmt1Var, stmt1Expr,
31083170
/*leftHandClauseList=*/nullptr,
31093171
/*rightHandClauseList=*/nullptr, loc, atomicCaptureOp);
3110-
genAtomicCaptureStatement(converter, stmt1LHSArg, stmt2LHSArg,
3111-
/*leftHandClauseList=*/nullptr,
3112-
/*rightHandClauseList=*/nullptr, elementType,
3113-
loc);
3172+
3173+
if (stmt1VarType != stmt2VarType) {
3174+
mlir::Value alloca;
3175+
mlir::Type toType = fir::unwrapRefType(stmt2LHSArg.getType());
3176+
mlir::Type fromType = fir::unwrapRefType(stmt1LHSArg.getType());
3177+
3178+
{
3179+
mlir::OpBuilder::InsertionGuard guard(firOpBuilder);
3180+
firOpBuilder.setInsertionPointToStart(firOpBuilder.getAllocaBlock());
3181+
alloca = firOpBuilder.create<fir::AllocaOp>(loc, fromType);
3182+
}
3183+
3184+
genAtomicCaptureStatement(converter, stmt1LHSArg, alloca,
3185+
/*leftHandClauseList=*/nullptr,
3186+
/*rightHandClauseList=*/nullptr, elementType,
3187+
loc);
3188+
{
3189+
mlir::OpBuilder::InsertionGuard guard(firOpBuilder);
3190+
firOpBuilder.setInsertionPointAfter(atomicCaptureOp);
3191+
emitAtomicReadImplicitCast(firOpBuilder, loc, stmt2LHSArg, toType,
3192+
fromType, alloca);
3193+
}
3194+
} else {
3195+
genAtomicCaptureStatement(converter, stmt1LHSArg, stmt2LHSArg,
3196+
/*leftHandClauseList=*/nullptr,
3197+
/*rightHandClauseList=*/nullptr, elementType,
3198+
loc);
3199+
}
31143200
}
31153201
firOpBuilder.setInsertionPointToEnd(&block);
31163202
firOpBuilder.create<mlir::omp::TerminatorOp>(loc);

flang/test/Lower/OpenMP/Todo/atomic-capture-implicit-cast.f90

Lines changed: 0 additions & 48 deletions
This file was deleted.

0 commit comments

Comments
 (0)