Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
204 changes: 140 additions & 64 deletions flang/lib/Lower/OpenACC.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -244,17 +244,29 @@ static void createDeclareAllocFuncWithArg(mlir::OpBuilder &modBuilder,
if (unwrapFirBox)
asFortranDesc << accFirDescriptorPostfix.str();

// Updating descriptor must occur before the mapping of the data so that
// attached data pointer is not overwritten.
mlir::acc::UpdateDeviceOp updateDeviceOp =
createDataEntryOp<mlir::acc::UpdateDeviceOp>(
builder, loc, registerFuncOp.getArgument(0), asFortranDesc, bounds,
/*structured=*/false, /*implicit=*/true,
mlir::acc::DataClause::acc_update_device, descTy,
/*async=*/{}, /*asyncDeviceTypes=*/{}, /*asyncOnlyDeviceTypes=*/{});
llvm::SmallVector<int32_t> operandSegments{0, 0, 0, 1};
llvm::SmallVector<mlir::Value> operands{updateDeviceOp.getResult()};
createSimpleOp<mlir::acc::UpdateOp>(builder, loc, operands, operandSegments);
// For descriptor, preserve old behavior when unwrapping FIR box: update.
if (unwrapFirBox) {
mlir::acc::UpdateDeviceOp updateDeviceOp =
createDataEntryOp<mlir::acc::UpdateDeviceOp>(
builder, loc, registerFuncOp.getArgument(0), asFortranDesc, bounds,
/*structured=*/false, /*implicit=*/true,
mlir::acc::DataClause::acc_update_device, descTy,
/*async=*/{}, /*asyncDeviceTypes=*/{}, /*asyncOnlyDeviceTypes=*/{});
llvm::SmallVector<int32_t> operandSegments{0, 0, 0, 1};
llvm::SmallVector<mlir::Value> operands{updateDeviceOp.getResult()};
createSimpleOp<mlir::acc::UpdateOp>(builder, loc, operands,
operandSegments);
} else {
// New behavior: start a structured region with declare_enter.
EntryOp descEntryOp = createDataEntryOp<EntryOp>(
builder, loc, registerFuncOp.getArgument(0), asFortranDesc, bounds,
/*structured=*/false, /*implicit=*/true, clause, descTy,
/*async=*/{}, /*asyncDeviceTypes=*/{}, /*asyncOnlyDeviceTypes=*/{});
mlir::acc::DeclareEnterOp::create(
builder, loc,
mlir::acc::DeclareTokenType::get(descEntryOp.getContext()),
mlir::ValueRange(descEntryOp.getAccVar()));
}

if (unwrapFirBox) {
mlir::Value desc =
Expand Down Expand Up @@ -299,30 +311,58 @@ static void createDeclareDeallocFuncWithArg(
}

llvm::SmallVector<mlir::Value> bounds;
mlir::acc::GetDevicePtrOp entryOp =
createDataEntryOp<mlir::acc::GetDevicePtrOp>(
builder, loc, var, asFortran, bounds,
/*structured=*/false, /*implicit=*/false, clause, var.getType(),
/*async=*/{}, /*asyncDeviceTypes=*/{}, /*asyncOnlyDeviceTypes=*/{});
mlir::acc::DeclareExitOp::create(builder, loc, mlir::Value{},
mlir::ValueRange(entryOp.getAccVar()));

if constexpr (std::is_same_v<ExitOp, mlir::acc::CopyoutOp> ||
std::is_same_v<ExitOp, mlir::acc::UpdateHostOp>)
ExitOp::create(builder, entryOp.getLoc(), entryOp.getAccVar(),
entryOp.getVar(), entryOp.getVarType(), entryOp.getBounds(),
entryOp.getAsyncOperands(),
entryOp.getAsyncOperandsDeviceTypeAttr(),
entryOp.getAsyncOnlyAttr(), entryOp.getDataClause(),
/*structured=*/false, /*implicit=*/false,
builder.getStringAttr(*entryOp.getName()));
else
ExitOp::create(builder, entryOp.getLoc(), entryOp.getAccVar(),
entryOp.getBounds(), entryOp.getAsyncOperands(),
entryOp.getAsyncOperandsDeviceTypeAttr(),
entryOp.getAsyncOnlyAttr(), entryOp.getDataClause(),
/*structured=*/false, /*implicit=*/false,
builder.getStringAttr(*entryOp.getName()));
if (unwrapFirBox) {
// Unwrap: delete device payload using getdeviceptr + declare_exit + ExitOp
mlir::acc::GetDevicePtrOp entryOp =
createDataEntryOp<mlir::acc::GetDevicePtrOp>(
builder, loc, var, asFortran, bounds,
/*structured=*/false, /*implicit=*/false, clause, var.getType(),
/*async=*/{}, /*asyncDeviceTypes=*/{}, /*asyncOnlyDeviceTypes=*/{});
mlir::acc::DeclareExitOp::create(builder, loc, mlir::Value{},
mlir::ValueRange(entryOp.getAccVar()));

if constexpr (std::is_same_v<ExitOp, mlir::acc::CopyoutOp> ||
std::is_same_v<ExitOp, mlir::acc::UpdateHostOp>)
ExitOp::create(builder, entryOp.getLoc(), entryOp.getAccVar(),
entryOp.getVar(), entryOp.getVarType(),
entryOp.getBounds(), entryOp.getAsyncOperands(),
entryOp.getAsyncOperandsDeviceTypeAttr(),
entryOp.getAsyncOnlyAttr(), entryOp.getDataClause(),
/*structured=*/false, /*implicit=*/false,
builder.getStringAttr(*entryOp.getName()));
else
ExitOp::create(builder, entryOp.getLoc(), entryOp.getAccVar(),
entryOp.getBounds(), entryOp.getAsyncOperands(),
entryOp.getAsyncOperandsDeviceTypeAttr(),
entryOp.getAsyncOnlyAttr(), entryOp.getDataClause(),
/*structured=*/false, /*implicit=*/false,
builder.getStringAttr(*entryOp.getName()));
} else {
mlir::acc::GetDevicePtrOp entryOp =
createDataEntryOp<mlir::acc::GetDevicePtrOp>(
builder, loc, var, asFortran, bounds,
/*structured=*/false, /*implicit=*/false, clause, var.getType(),
/*async=*/{}, /*asyncDeviceTypes=*/{}, /*asyncOnlyDeviceTypes=*/{});
mlir::acc::DeclareExitOp::create(builder, loc, mlir::Value{},
mlir::ValueRange(entryOp.getAccVar()));

if constexpr (std::is_same_v<ExitOp, mlir::acc::CopyoutOp> ||
std::is_same_v<ExitOp, mlir::acc::UpdateHostOp>)
ExitOp::create(builder, entryOp.getLoc(), entryOp.getAccVar(),
entryOp.getVar(), entryOp.getVarType(),
entryOp.getBounds(), entryOp.getAsyncOperands(),
entryOp.getAsyncOperandsDeviceTypeAttr(),
entryOp.getAsyncOnlyAttr(), entryOp.getDataClause(),
/*structured=*/false, /*implicit=*/false,
builder.getStringAttr(*entryOp.getName()));
else
ExitOp::create(builder, entryOp.getLoc(), entryOp.getAccVar(),
entryOp.getBounds(), entryOp.getAsyncOperands(),
entryOp.getAsyncOperandsDeviceTypeAttr(),
entryOp.getAsyncOnlyAttr(), entryOp.getDataClause(),
/*structured=*/false, /*implicit=*/false,
builder.getStringAttr(*entryOp.getName()));
}

// Generate the post dealloc function.
modBuilder.setInsertionPointAfter(preDeallocOp);
Expand All @@ -338,15 +378,28 @@ static void createDeclareDeallocFuncWithArg(
asFortran << accFirDescriptorPostfix.str();
}

mlir::acc::UpdateDeviceOp updateDeviceOp =
createDataEntryOp<mlir::acc::UpdateDeviceOp>(
builder, loc, var, asFortran, bounds,
/*structured=*/false, /*implicit=*/true,
mlir::acc::DataClause::acc_update_device, var.getType(),
/*async=*/{}, /*asyncDeviceTypes=*/{}, /*asyncOnlyDeviceTypes=*/{});
llvm::SmallVector<int32_t> operandSegments{0, 0, 0, 1};
llvm::SmallVector<mlir::Value> operands{updateDeviceOp.getResult()};
createSimpleOp<mlir::acc::UpdateOp>(builder, loc, operands, operandSegments);
if (unwrapFirBox) {
// Old behavior: update descriptor after deallocation.
mlir::acc::UpdateDeviceOp updateDeviceOp =
createDataEntryOp<mlir::acc::UpdateDeviceOp>(
builder, loc, var, asFortran, bounds,
/*structured=*/false, /*implicit=*/true,
mlir::acc::DataClause::acc_update_device, var.getType(),
/*async=*/{}, /*asyncDeviceTypes=*/{}, /*asyncOnlyDeviceTypes=*/{});
llvm::SmallVector<int32_t> operandSegments{0, 0, 0, 1};
llvm::SmallVector<mlir::Value> operands{updateDeviceOp.getResult()};
createSimpleOp<mlir::acc::UpdateOp>(builder, loc, operands,
operandSegments);
} else {
// New behavior: end structured region with declare_exit.
mlir::acc::GetDevicePtrOp postEntryOp =
createDataEntryOp<mlir::acc::GetDevicePtrOp>(
builder, loc, var, asFortran, bounds,
/*structured=*/false, /*implicit=*/true, clause, var.getType(),
/*async=*/{}, /*asyncDeviceTypes=*/{}, /*asyncOnlyDeviceTypes=*/{});
mlir::acc::DeclareExitOp::create(builder, loc, mlir::Value{},
mlir::ValueRange(postEntryOp.getAccVar()));
}
modBuilder.setInsertionPointAfter(postDeallocOp);
builder.restoreInsertionPoint(crtInsPt);
}
Expand Down Expand Up @@ -3989,17 +4042,28 @@ static void createDeclareAllocFunc(mlir::OpBuilder &modBuilder,
asFortranDesc << accFirDescriptorPostfix.str();
llvm::SmallVector<mlir::Value> bounds;

// Updating descriptor must occur before the mapping of the data so that
// attached data pointer is not overwritten.
mlir::acc::UpdateDeviceOp updateDeviceOp =
createDataEntryOp<mlir::acc::UpdateDeviceOp>(
builder, loc, addrOp, asFortranDesc, bounds,
/*structured=*/false, /*implicit=*/true,
mlir::acc::DataClause::acc_update_device, addrOp.getType(),
/*async=*/{}, /*asyncDeviceTypes=*/{}, /*asyncOnlyDeviceTypes=*/{});
llvm::SmallVector<int32_t> operandSegments{0, 0, 0, 1};
llvm::SmallVector<mlir::Value> operands{updateDeviceOp.getResult()};
createSimpleOp<mlir::acc::UpdateOp>(builder, loc, operands, operandSegments);
// For unwrapFirBox=false this remains declare_enter; for unwrapFirBox=true,
// the descriptor post-alloc remains update behavior.
if (unwrapFirBox) {
mlir::acc::UpdateDeviceOp updDesc =
createDataEntryOp<mlir::acc::UpdateDeviceOp>(
builder, loc, addrOp, asFortranDesc, bounds,
/*structured=*/false, /*implicit=*/true,
mlir::acc::DataClause::acc_update_device, addrOp.getType(),
/*async=*/{}, /*asyncDeviceTypes=*/{}, /*asyncOnlyDeviceTypes=*/{});
llvm::SmallVector<int32_t> seg{0, 0, 0, 1};
llvm::SmallVector<mlir::Value> ops{updDesc.getResult()};
createSimpleOp<mlir::acc::UpdateOp>(builder, loc, ops, seg);
} else {
EntryOp descEntryOp = createDataEntryOp<EntryOp>(
builder, loc, addrOp, asFortranDesc, bounds,
/*structured=*/false, /*implicit=*/true, clause, addrOp.getType(),
/*async=*/{}, /*asyncDeviceTypes=*/{}, /*asyncOnlyDeviceTypes=*/{});
mlir::acc::DeclareEnterOp::create(
builder, loc,
mlir::acc::DeclareTokenType::get(descEntryOp.getContext()),
mlir::ValueRange(descEntryOp.getAccVar()));
}

if (unwrapFirBox) {
auto loadOp = fir::LoadOp::create(builder, loc, addrOp.getResult());
Expand Down Expand Up @@ -4092,15 +4156,27 @@ static void createDeclareDeallocFunc(mlir::OpBuilder &modBuilder,
if (unwrapFirBox)
asFortran << accFirDescriptorPostfix.str();
llvm::SmallVector<mlir::Value> bounds;
mlir::acc::UpdateDeviceOp updateDeviceOp =
createDataEntryOp<mlir::acc::UpdateDeviceOp>(
builder, loc, addrOp, asFortran, bounds,
/*structured=*/false, /*implicit=*/true,
mlir::acc::DataClause::acc_update_device, addrOp.getType(),
/*async=*/{}, /*asyncDeviceTypes=*/{}, /*asyncOnlyDeviceTypes=*/{});
llvm::SmallVector<int32_t> operandSegments{0, 0, 0, 1};
llvm::SmallVector<mlir::Value> operands{updateDeviceOp.getResult()};
createSimpleOp<mlir::acc::UpdateOp>(builder, loc, operands, operandSegments);
if (unwrapFirBox) {
// Unwrap mode: update the descriptor after deallocation (no declare_exit).
mlir::acc::UpdateDeviceOp updDesc =
createDataEntryOp<mlir::acc::UpdateDeviceOp>(
builder, loc, addrOp, asFortran, bounds,
/*structured=*/false, /*implicit=*/true,
mlir::acc::DataClause::acc_update_device, addrOp.getType(),
/*async=*/{}, /*asyncDeviceTypes=*/{}, /*asyncOnlyDeviceTypes=*/{});
llvm::SmallVector<int32_t> seg{0, 0, 0, 1};
llvm::SmallVector<mlir::Value> ops{updDesc.getResult()};
createSimpleOp<mlir::acc::UpdateOp>(builder, loc, ops, seg);
} else {
// Default: end the structured declare region using declare_exit.
mlir::acc::GetDevicePtrOp descEntryOp =
createDataEntryOp<mlir::acc::GetDevicePtrOp>(
builder, loc, addrOp, asFortran, bounds,
/*structured=*/false, /*implicit=*/true, clause, addrOp.getType(),
/*async=*/{}, /*asyncDeviceTypes=*/{}, /*asyncOnlyDeviceTypes=*/{});
mlir::acc::DeclareExitOp::create(builder, loc, mlir::Value{},
mlir::ValueRange(descEntryOp.getAccVar()));
}
modBuilder.setInsertionPointAfter(postDeallocOp);
}

Expand Down
16 changes: 8 additions & 8 deletions flang/test/Lower/OpenACC/acc-declare.f90
Original file line number Diff line number Diff line change
Expand Up @@ -250,15 +250,15 @@ subroutine acc_declare_allocate()

! CHECK-LABEL: func.func private @_QMacc_declareFacc_declare_allocateEa_acc_declare_update_desc_post_alloc(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we update the name?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@razvanlupusoru could you please weigh in here?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes please :) It's OK to avoid "update_desc" in the name in both old behavior and new behavior.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

updated!

! CHECK-SAME: %[[ARG0:.*]]: !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) {
! CHECK: %[[UPDATE:.*]] = acc.update_device varPtr(%[[ARG0]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {implicit = true, name = "a", structured = false}
! CHECK: acc.update dataOperands(%[[UPDATE]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
! CHECK: %[[CREATE_DESC:.*]] = acc.create varPtr(%[[ARG0]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {implicit = true, name = "a", structured = false}
! CHECK: acc.declare_enter dataOperands(%[[CREATE_DESC]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
Comment on lines +253 to +254
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If the device box is only created post allocation, what happens if you have descriptor query request on the device before allocation?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It will get the empty descriptor with no data attached to it. this is because in the constructor we also call acc.declare_enter, which ensures at device attach time the descriptor will be present. We handle acc.declare_enter in these two scenarios differently downstream

! CHECK: return
! CHECK: }

! CHECK-LABEL: func.func private @_QMacc_declareFacc_declare_allocateEa_acc_declare_update_desc_post_dealloc(
! CHECK-SAME: %[[ARG0:.*]]: !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) {
! CHECK: %[[UPDATE:.*]] = acc.update_device varPtr(%[[ARG0]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {implicit = true, name = "a", structured = false}
! CHECK: acc.update dataOperands(%[[UPDATE]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
! CHECK: %[[DEVPTR:.*]] = acc.getdeviceptr varPtr(%[[ARG0]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {dataClause = #acc<data_clause acc_create>, implicit = true, name = "a", structured = false}
! CHECK: acc.declare_exit dataOperands(%[[DEVPTR]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
! CHECK: return
! CHECK: }

Expand Down Expand Up @@ -330,15 +330,15 @@ module acc_declare_allocatable_test

! CHECK-LABEL: func.func private @_QMacc_declare_allocatable_testEdata1_acc_declare_update_desc_post_alloc() {
! CHECK: %[[GLOBAL_ADDR:.*]] = fir.address_of(@_QMacc_declare_allocatable_testEdata1) : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
! CHECK: %[[UPDATE:.*]] = acc.update_device varPtr(%[[GLOBAL_ADDR]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {implicit = true, name = "data1", structured = false}
! CHECK: acc.update dataOperands(%[[UPDATE]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
! CHECK: %[[CREATE_DESC:.*]] = acc.create varPtr(%[[GLOBAL_ADDR]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {implicit = true, name = "data1", structured = false}
! CHECK: acc.declare_enter dataOperands(%[[CREATE_DESC]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
! CHECK: return
! CHECK: }

! CHECK-LABEL: func.func private @_QMacc_declare_allocatable_testEdata1_acc_declare_update_desc_post_dealloc() {
! CHECK: %[[GLOBAL_ADDR:.*]] = fir.address_of(@_QMacc_declare_allocatable_testEdata1) : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
! CHECK: %[[UPDATE:.*]] = acc.update_device varPtr(%[[GLOBAL_ADDR]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {implicit = true, name = "data1", structured = false}
! CHECK: acc.update dataOperands(%[[UPDATE]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
! CHECK: %[[DEVPTR:.*]] = acc.getdeviceptr varPtr(%[[GLOBAL_ADDR]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {dataClause = #acc<data_clause acc_create>, implicit = true, name = "data1", structured = false}
! CHECK: acc.declare_exit dataOperands(%[[DEVPTR]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
! CHECK: return
! CHECK: }

Expand Down