Skip to content

Commit 7ba7101

Browse files
authored
[flang][cuda] Add pointer attribute to allocate/deallocate ops (#170937)
Similar for the double descriptor information added in #170901, we need to carry over the pointer information until the op can be converted. The correct detection would fail if the op is converted late.
1 parent 27bf5fd commit 7ba7101

File tree

5 files changed

+19
-17
lines changed

5 files changed

+19
-17
lines changed

flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ def cuf_AllocateOp : cuf_Op<"allocate", [AttrSizedOperandSegments,
101101
Arg<Optional<AnyRefOrBoxType>, "", [MemWrite]>:$pinned,
102102
Arg<Optional<AnyRefOrBoxType>, "", [MemRead]>:$source,
103103
cuf_DataAttributeAttr:$data_attr, UnitAttr:$hasStat,
104-
UnitAttr:$hasDoubleDescriptor);
104+
UnitAttr:$hasDoubleDescriptor, UnitAttr:$pointer);
105105

106106
let results = (outs AnyIntegerType:$stat);
107107

@@ -129,7 +129,7 @@ def cuf_DeallocateOp : cuf_Op<"deallocate",
129129
let arguments = (ins Arg<fir_ReferenceType, "", [MemRead, MemWrite]>:$box,
130130
Arg<Optional<AnyRefOrBoxType>, "", [MemWrite]>:$errmsg,
131131
cuf_DataAttributeAttr:$data_attr, UnitAttr:$hasStat,
132-
UnitAttr:$hasDoubleDescriptor);
132+
UnitAttr:$hasDoubleDescriptor, UnitAttr:$pointer);
133133

134134
let results = (outs AnyIntegerType:$stat);
135135

flang/lib/Lower/Allocatable.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -803,7 +803,8 @@ class AllocateStmtHelper {
803803
builder, loc, retTy, box.getAddr(), errmsg, stream, pinned,
804804
source, cudaAttr,
805805
errorManager.hasStatSpec() ? builder.getUnitAttr() : nullptr,
806-
doubleDescriptors ? builder.getUnitAttr() : nullptr)
806+
doubleDescriptors ? builder.getUnitAttr() : nullptr,
807+
box.isPointer() ? builder.getUnitAttr() : nullptr)
807808
.getResult();
808809
}
809810

@@ -873,7 +874,8 @@ static mlir::Value genCudaDeallocate(fir::FirOpBuilder &builder,
873874
return cuf::DeallocateOp::create(
874875
builder, loc, retTy, box.getAddr(), errmsg, cudaAttr,
875876
errorManager.hasStatSpec() ? builder.getUnitAttr() : nullptr,
876-
doubleDescriptors ? builder.getUnitAttr() : nullptr)
877+
doubleDescriptors ? builder.getUnitAttr() : nullptr,
878+
box.isPointer() ? builder.getUnitAttr() : nullptr)
877879
.getResult();
878880
}
879881

flang/lib/Optimizer/Transforms/CUDA/CUFAllocationConversion.cpp

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -322,15 +322,7 @@ struct CUFAllocateOpConversion
322322
fir::FirOpBuilder builder(rewriter, mod);
323323
mlir::Location loc = op.getLoc();
324324

325-
bool isPointer = false;
326-
327-
if (auto declareOp =
328-
mlir::dyn_cast_or_null<fir::DeclareOp>(op.getBox().getDefiningOp()))
329-
if (declareOp.getFortranAttrs() &&
330-
bitEnumContainsAny(*declareOp.getFortranAttrs(),
331-
fir::FortranVariableFlagsEnum::pointer))
332-
isPointer = true;
333-
325+
bool isPointer = op.getPointer();
334326
if (op.getHasDoubleDescriptor()) {
335327
// Allocation for module variable are done with custom runtime entry point
336328
// so the descriptors can be synchronized.

flang/test/Fir/CUDA/cuda-allocate.fir

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -183,7 +183,7 @@ func.func @_QQallocate_stream() {
183183
func.func @_QPp_alloc() {
184184
%0 = cuf.alloc !fir.box<!fir.ptr<!fir.array<?xcomplex<f32>>>> {bindc_name = "complex_array", data_attr = #cuf.cuda<device>, uniq_name = "_QFp_allocEcomplex_array"} -> !fir.ref<!fir.box<!fir.ptr<!fir.array<?xcomplex<f32>>>>>
185185
%4 = fir.declare %0 {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<pointer>, uniq_name = "_QFp_allocEcomplex_array"} : (!fir.ref<!fir.box<!fir.ptr<!fir.array<?xcomplex<f32>>>>>) -> !fir.ref<!fir.box<!fir.ptr<!fir.array<?xcomplex<f32>>>>>
186-
%9 = cuf.allocate %4 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xcomplex<f32>>>>> {data_attr = #cuf.cuda<device>} -> i32
186+
%9 = cuf.allocate %4 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xcomplex<f32>>>>> {data_attr = #cuf.cuda<device>, pointer} -> i32
187187
return
188188
}
189189

@@ -201,7 +201,7 @@ func.func @_QPpointer_source() {
201201
%5 = cuf.alloc !fir.box<!fir.ptr<!fir.array<?x?xf32>>> {bindc_name = "a_d", data_attr = #cuf.cuda<device>, uniq_name = "_QFpointer_sourceEa_d"} -> !fir.ref<!fir.box<!fir.ptr<!fir.array<?x?xf32>>>>
202202
%7 = fir.declare %5 {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<pointer>, uniq_name = "_QFpointer_sourceEa_d"} : (!fir.ref<!fir.box<!fir.ptr<!fir.array<?x?xf32>>>>) -> !fir.ref<!fir.box<!fir.ptr<!fir.array<?x?xf32>>>>
203203
%8 = fir.load %4 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?x?xf32>>>>
204-
%22 = cuf.allocate %7 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?x?xf32>>>> source(%8 : !fir.box<!fir.ptr<!fir.array<?x?xf32>>>) {data_attr = #cuf.cuda<device>} -> i32
204+
%22 = cuf.allocate %7 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?x?xf32>>>> source(%8 : !fir.box<!fir.ptr<!fir.array<?x?xf32>>>) {data_attr = #cuf.cuda<device>, pointer} -> i32
205205
return
206206
}
207207

@@ -226,7 +226,7 @@ func.func @_QQpointer_sync() attributes {fir.bindc_name = "test"} {
226226
%3 = fir.convert %c1 : (index) -> i64
227227
%4 = fir.convert %c10_i32 : (i32) -> i64
228228
fir.call @_FortranAAllocatableSetBounds(%2, %c0_i32, %3, %4) fastmath<contract> : (!fir.ref<!fir.box<none>>, i32, i64, i64) -> ()
229-
%6 = cuf.allocate %1 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>> {data_attr = #cuf.cuda<device>, hasDoubleDescriptor} -> i32
229+
%6 = cuf.allocate %1 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>> {data_attr = #cuf.cuda<device>, hasDoubleDescriptor, pointer} -> i32
230230
return
231231
}
232232

@@ -246,7 +246,7 @@ func.func @_QMmod1Ppointer_source_global() {
246246
%2 = fir.alloca !fir.box<!fir.ptr<!fir.array<?x?xf32>>> {bindc_name = "a", uniq_name = "_QMmod1Fallocate_source_globalEa"}
247247
%6 = fir.declare %2 {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QMmod1Fallocate_source_globalEa"} : (!fir.ref<!fir.box<!fir.ptr<!fir.array<?x?xf32>>>>) -> !fir.ref<!fir.box<!fir.ptr<!fir.array<?x?xf32>>>>
248248
%7 = fir.load %6 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?x?xf32>>>>
249-
%21 = cuf.allocate %1 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?x?xf32>>>> source(%7 : !fir.box<!fir.ptr<!fir.array<?x?xf32>>>) {data_attr = #cuf.cuda<device>, hasDoubleDescriptor} -> i32
249+
%21 = cuf.allocate %1 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?x?xf32>>>> source(%7 : !fir.box<!fir.ptr<!fir.array<?x?xf32>>>) {data_attr = #cuf.cuda<device>, hasDoubleDescriptor, pointer} -> i32
250250
return
251251
}
252252

flang/test/Lower/CUDA/cuda-allocatable.cuf

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -227,6 +227,14 @@ end
227227
! CHECK: %[[FLASE_CONV:.*]] = fir.convert %[[FALSE]] : (i1) -> !fir.logical<4>
228228
! CHECK: fir.store %[[FLASE_CONV]] to %[[PLOG_DECL]]#0 : !fir.ref<!fir.logical<4>>
229229

230+
subroutine devicepointer()
231+
integer, device, pointer :: i(:)
232+
allocate(i(10))
233+
end
234+
235+
! CHECK-LABEL: func.func @_QPdevicepointer()
236+
! CHECK: cuf.allocate{{.*}}pointer
237+
230238
subroutine cuda_component()
231239
use globals
232240
type(t1), pointer, dimension(:) :: d

0 commit comments

Comments
 (0)