Skip to content

Commit ff7b608

Browse files
committed
[flang][cuda] Add c_devloc as intrinsic and inline it during lowering
1 parent 99c2e3b commit ff7b608

File tree

8 files changed

+139
-5
lines changed

8 files changed

+139
-5
lines changed

flang/include/flang/Optimizer/Builder/FIRBuilder.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -744,6 +744,11 @@ mlir::Value genMaxWithZero(fir::FirOpBuilder &builder, mlir::Location loc,
744744
mlir::Value genCPtrOrCFunptrAddr(fir::FirOpBuilder &builder, mlir::Location loc,
745745
mlir::Value cPtr, mlir::Type ty);
746746

747+
/// The type(C_DEVPTR) is defined as the derived type with only one
748+
/// component of C_PTR type. Get the C address from the C_PTR component.
749+
mlir::Value genCDevPtrAddr(fir::FirOpBuilder &builder, mlir::Location loc,
750+
mlir::Value cDevPtr, mlir::Type ty);
751+
747752
/// Get the C address value.
748753
mlir::Value genCPtrOrCFunptrValue(fir::FirOpBuilder &builder,
749754
mlir::Location loc, mlir::Value cPtr);

flang/include/flang/Optimizer/Builder/IntrinsicCall.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -214,6 +214,7 @@ struct IntrinsicLibrary {
214214
llvm::ArrayRef<fir::ExtendedValue>);
215215
fir::ExtendedValue genCAssociatedCPtr(mlir::Type,
216216
llvm::ArrayRef<fir::ExtendedValue>);
217+
fir::ExtendedValue genCDevLoc(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
217218
mlir::Value genErfcScaled(mlir::Type resultType,
218219
llvm::ArrayRef<mlir::Value> args);
219220
void genCFPointer(llvm::ArrayRef<fir::ExtendedValue>);

flang/lib/Evaluate/intrinsics.cpp

Lines changed: 73 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2595,6 +2595,8 @@ class IntrinsicProcTable::Implementation {
25952595
ActualArguments &, FoldingContext &) const;
25962596
std::optional<SpecificCall> HandleC_Loc(
25972597
ActualArguments &, FoldingContext &) const;
2598+
std::optional<SpecificCall> HandleC_Devloc(
2599+
ActualArguments &, FoldingContext &) const;
25982600
const std::string &ResolveAlias(const std::string &name) const {
25992601
auto iter{aliases_.find(name)};
26002602
return iter == aliases_.end() ? name : iter->second;
@@ -2622,7 +2624,8 @@ bool IntrinsicProcTable::Implementation::IsIntrinsicFunction(
26222624
return true;
26232625
}
26242626
// special cases
2625-
return name == "__builtin_c_loc" || name == "null";
2627+
return name == "__builtin_c_loc" || name == "__builtin_c_devloc" ||
2628+
name == "null";
26262629
}
26272630
bool IntrinsicProcTable::Implementation::IsIntrinsicSubroutine(
26282631
const std::string &name0) const {
@@ -3012,6 +3015,73 @@ std::optional<SpecificCall> IntrinsicProcTable::Implementation::HandleC_Loc(
30123015
return std::nullopt;
30133016
}
30143017

3018+
// CUDA Fortran C_DEVLOC(x)
3019+
std::optional<SpecificCall> IntrinsicProcTable::Implementation::HandleC_Devloc(
3020+
ActualArguments &arguments, FoldingContext &context) const {
3021+
static const char *const keywords[]{"cptr", nullptr};
3022+
3023+
if (CheckAndRearrangeArguments(arguments, context.messages(), keywords)) {
3024+
CHECK(arguments.size() == 1);
3025+
const auto *expr{arguments[0].value().UnwrapExpr()};
3026+
if (auto typeAndShape{characteristics::TypeAndShape::Characterize(
3027+
arguments[0], context)}) {
3028+
if (expr && !IsContiguous(*expr, context).value_or(true)) {
3029+
context.messages().Say(arguments[0]->sourceLocation(),
3030+
"C_DEVLOC() argument must be contiguous"_err_en_US);
3031+
}
3032+
if (auto constExtents{AsConstantExtents(context, typeAndShape->shape())};
3033+
constExtents && GetSize(*constExtents) == 0) {
3034+
context.messages().Say(arguments[0]->sourceLocation(),
3035+
"C_DEVLOC() argument may not be a zero-sized array"_err_en_US);
3036+
}
3037+
if (!(typeAndShape->type().category() != TypeCategory::Derived ||
3038+
typeAndShape->type().IsAssumedType() ||
3039+
(!typeAndShape->type().IsPolymorphic() &&
3040+
CountNonConstantLenParameters(
3041+
typeAndShape->type().GetDerivedTypeSpec()) == 0))) {
3042+
context.messages().Say(arguments[0]->sourceLocation(),
3043+
"C_DEVLOC() argument must have an intrinsic type, assumed type, or non-polymorphic derived type with no non-constant length parameter"_err_en_US);
3044+
} else if (typeAndShape->type().knownLength().value_or(1) == 0) {
3045+
context.messages().Say(arguments[0]->sourceLocation(),
3046+
"C_DEVLOC() argument may not be zero-length character"_err_en_US);
3047+
} else if (typeAndShape->type().category() != TypeCategory::Derived &&
3048+
!IsInteroperableIntrinsicType(typeAndShape->type()).value_or(true)) {
3049+
if (typeAndShape->type().category() == TypeCategory::Character &&
3050+
typeAndShape->type().kind() == 1) {
3051+
// Default character kind, but length is not known to be 1
3052+
if (context.languageFeatures().ShouldWarn(
3053+
common::UsageWarning::CharacterInteroperability)) {
3054+
context.messages().Say(
3055+
common::UsageWarning::CharacterInteroperability,
3056+
arguments[0]->sourceLocation(),
3057+
"C_DEVLOC() argument has non-interoperable character length"_warn_en_US);
3058+
}
3059+
} else if (context.languageFeatures().ShouldWarn(
3060+
common::UsageWarning::Interoperability)) {
3061+
context.messages().Say(common::UsageWarning::Interoperability,
3062+
arguments[0]->sourceLocation(),
3063+
"C_DEVLOC() argument has non-interoperable intrinsic type or kind"_warn_en_US);
3064+
}
3065+
}
3066+
3067+
characteristics::DummyDataObject ddo{std::move(*typeAndShape)};
3068+
ddo.intent = common::Intent::In;
3069+
return SpecificCall{
3070+
SpecificIntrinsic{"__builtin_c_devloc"s,
3071+
characteristics::Procedure{
3072+
characteristics::FunctionResult{
3073+
DynamicType{GetBuiltinDerivedType(
3074+
builtinsScope_, "__builtin_c_devptr")}},
3075+
characteristics::DummyArguments{
3076+
characteristics::DummyArgument{"cptr"s, std::move(ddo)}},
3077+
characteristics::Procedure::Attrs{
3078+
characteristics::Procedure::Attr::Pure}}},
3079+
std::move(arguments)};
3080+
}
3081+
}
3082+
return std::nullopt;
3083+
}
3084+
30153085
static bool CheckForNonPositiveValues(FoldingContext &context,
30163086
const ActualArgument &arg, const std::string &procName,
30173087
const std::string &argName) {
@@ -3202,6 +3272,8 @@ std::optional<SpecificCall> IntrinsicProcTable::Implementation::Probe(
32023272
} else { // function
32033273
if (call.name == "__builtin_c_loc") {
32043274
return HandleC_Loc(arguments, context);
3275+
} else if (call.name == "__builtin_c_devloc") {
3276+
return HandleC_Devloc(arguments, context);
32053277
} else if (call.name == "null") {
32063278
return HandleNull(arguments, context);
32073279
}

flang/lib/Optimizer/Builder/FIRBuilder.cpp

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1626,6 +1626,25 @@ mlir::Value fir::factory::genCPtrOrCFunptrAddr(fir::FirOpBuilder &builder,
16261626
cPtr, addrFieldIndex);
16271627
}
16281628

1629+
mlir::Value fir::factory::genCDevPtrAddr(fir::FirOpBuilder &builder,
1630+
mlir::Location loc,
1631+
mlir::Value cDevPtr, mlir::Type ty) {
1632+
auto recTy = mlir::cast<fir::RecordType>(ty);
1633+
assert(recTy.getTypeList().size() == 1);
1634+
auto cptrFieldName = recTy.getTypeList()[0].first;
1635+
mlir::Type cptrFieldTy = recTy.getTypeList()[0].second;
1636+
auto fieldIndexType = fir::FieldType::get(ty.getContext());
1637+
mlir::Value cptrFieldIndex = builder.create<fir::FieldIndexOp>(
1638+
loc, fieldIndexType, cptrFieldName, recTy,
1639+
/*typeParams=*/mlir::ValueRange{});
1640+
auto cptrCoord = builder.create<fir::CoordinateOp>(
1641+
loc, builder.getRefType(cptrFieldTy), cDevPtr, cptrFieldIndex);
1642+
auto [addrFieldIndex, addrFieldTy] =
1643+
genCPtrOrCFunptrFieldIndex(builder, loc, cptrFieldTy);
1644+
return builder.create<fir::CoordinateOp>(loc, builder.getRefType(addrFieldTy),
1645+
cptrCoord, addrFieldIndex);
1646+
}
1647+
16291648
mlir::Value fir::factory::genCPtrOrCFunptrValue(fir::FirOpBuilder &builder,
16301649
mlir::Location loc,
16311650
mlir::Value cPtr) {

flang/lib/Optimizer/Builder/IntrinsicCall.cpp

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,7 @@ static constexpr IntrinsicHandler handlers[]{
167167
&I::genCAssociatedCPtr,
168168
{{{"c_ptr_1", asAddr}, {"c_ptr_2", asAddr, handleDynamicOptional}}},
169169
/*isElemental=*/false},
170+
{"c_devloc", &I::genCDevLoc, {{{"x", asBox}}}, /*isElemental=*/false},
170171
{"c_f_pointer",
171172
&I::genCFPointer,
172173
{{{"cptr", asValue},
@@ -2828,11 +2829,14 @@ static mlir::Value getAddrFromBox(fir::FirOpBuilder &builder,
28282829
static fir::ExtendedValue
28292830
genCLocOrCFunLoc(fir::FirOpBuilder &builder, mlir::Location loc,
28302831
mlir::Type resultType, llvm::ArrayRef<fir::ExtendedValue> args,
2831-
bool isFunc = false) {
2832+
bool isFunc = false, bool isDevLoc = false) {
28322833
assert(args.size() == 1);
28332834
mlir::Value res = builder.create<fir::AllocaOp>(loc, resultType);
2834-
mlir::Value resAddr =
2835-
fir::factory::genCPtrOrCFunptrAddr(builder, loc, res, resultType);
2835+
mlir::Value resAddr;
2836+
if (isDevLoc)
2837+
resAddr = fir::factory::genCDevPtrAddr(builder, loc, res, resultType);
2838+
else
2839+
resAddr = fir::factory::genCPtrOrCFunptrAddr(builder, loc, res, resultType);
28362840
assert(fir::isa_box_type(fir::getBase(args[0]).getType()) &&
28372841
"argument must have been lowered to box type");
28382842
mlir::Value argAddr = getAddrFromBox(builder, loc, args[0], isFunc);
@@ -2889,6 +2893,14 @@ IntrinsicLibrary::genCAssociatedCPtr(mlir::Type resultType,
28892893
return genCAssociated(builder, loc, resultType, args);
28902894
}
28912895

2896+
// C_DEVLOC
2897+
fir::ExtendedValue
2898+
IntrinsicLibrary::genCDevLoc(mlir::Type resultType,
2899+
llvm::ArrayRef<fir::ExtendedValue> args) {
2900+
return genCLocOrCFunLoc(builder, loc, resultType, args, /*isFunc=*/false,
2901+
/*isDevLoc=*/true);
2902+
}
2903+
28922904
// C_F_POINTER
28932905
void IntrinsicLibrary::genCFPointer(llvm::ArrayRef<fir::ExtendedValue> args) {
28942906
assert(args.size() == 3);

flang/module/__fortran_builtins.f90

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,9 @@
2222
intrinsic :: __builtin_c_loc
2323
public :: __builtin_c_loc
2424

25+
intrinsic :: __builtin_c_devloc
26+
public :: __builtin_c_devloc
27+
2528
intrinsic :: __builtin_c_f_pointer
2629
public :: __builtin_c_f_pointer
2730

@@ -144,6 +147,7 @@
144147

145148
type :: __force_derived_type_instantiations
146149
type(__builtin_c_ptr) :: c_ptr
150+
type(__builtin_c_devptr) :: c_devptr
147151
type(__builtin_c_funptr) :: c_funptr
148152
type(__builtin_event_type) :: event_type
149153
type(__builtin_lock_type) :: lock_type

flang/module/__fortran_type_info.f90

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
module __fortran_type_info
1515

1616
use, intrinsic :: __fortran_builtins, &
17-
only: __builtin_c_ptr, __builtin_c_funptr
17+
only: __builtin_c_ptr, __builtin_c_devptr, __builtin_c_funptr
1818
implicit none
1919

2020
! Set PRIVATE by default to explicitly only export what is meant
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
! RUN: bbc -emit-hlfir -fcuda %s -o - | FileCheck %s
2+
3+
attributes(global) subroutine testcdevloc(a)
4+
use __fortran_builtins, only: c_devloc => __builtin_c_devloc
5+
integer, device :: a(10)
6+
print*, c_devloc(a(1))
7+
end
8+
9+
! CHECK-LABEL: func.func @_QPtestcdevloc(
10+
! CHECK-SAME: %[[A_ARG:.*]]: !fir.ref<!fir.array<10xi32>> {cuf.data_attr = #cuf.cuda<device>, fir.bindc_name = "a"}) attributes {cuf.proc_attr = #cuf.cuda_proc<global>}
11+
! CHECK: %[[A:.*]]:2 = hlfir.declare %[[A_ARG]](%{{.*}}) dummy_scope %{{.*}} {data_attr = #cuf.cuda<device>, uniq_name = "_QFtestcdevlocEa"} : (!fir.ref<!fir.array<10xi32>>, !fir.shape<1>, !fir.dscope) -> (!fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>)
12+
! CHECK: %[[A1:.*]] = hlfir.designate %[[A]]#0 (%c1{{.*}}) : (!fir.ref<!fir.array<10xi32>>, index) -> !fir.ref<i32>
13+
! CHECK: %[[BOX:.*]] = fir.embox %[[A1]] : (!fir.ref<i32>) -> !fir.box<i32>
14+
! CHECK: %[[CDEVPTR:.*]] = fir.alloca !fir.type<_QM__fortran_builtinsT__builtin_c_devptr{cptr:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>
15+
! CHECK: %[[FIELD_CPTR:.*]] = fir.field_index cptr, !fir.type<_QM__fortran_builtinsT__builtin_c_devptr{cptr:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>
16+
! CHECK: %[[COORD_CPTR:.*]] = fir.coordinate_of %[[CDEVPTR]], %[[FIELD_CPTR]] : (!fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_devptr{cptr:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>, !fir.field) -> !fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>
17+
! CHECK: %[[FIELD_ADDRESS:.*]] = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>
18+
! CHECK: %[[COORD_ADDRESS:.*]] = fir.coordinate_of %[[COORD_CPTR]], %[[FIELD_ADDRESS]] : (!fir.ref<!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>>, !fir.field) -> !fir.ref<i64>
19+
! CHECK: %[[BOX_ADDR:.*]] = fir.box_addr %[[BOX]] : (!fir.box<i32>) -> !fir.ref<i32>
20+
! CHECK: %[[ADDRESS_A1:.*]] = fir.convert %[[BOX_ADDR]] : (!fir.ref<i32>) -> i64
21+
! CHECK: fir.store %[[ADDRESS_A1]] to %[[COORD_ADDRESS]] : !fir.ref<i64>

0 commit comments

Comments
 (0)