-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[OpenMP][MLIR] Descriptor explicit member map lowering changes #111191
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
Created using spr 1.3.4
|
@llvm/pr-subscribers-mlir-openmp @llvm/pr-subscribers-flang-openmp Author: None (agozillon) ChangesThis is one of 3 PRs in a PR stack that aims to add support for explicit mapping of The primary changes in this PR are the OpenMPToLLVMIRTranslation.cpp changes, There are also additions of tests which should help to showcase the affect Patch is 30.32 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/111191.diff 6 Files Affected:
diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
index 66f63fc02fe2f3..60acf59a7d93e0 100644
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
@@ -879,7 +879,7 @@ def MapInfoOp : OpenMP_Op<"map.info", [AttrSizedOperandSegments]> {
TypeAttr:$var_type,
Optional<OpenMP_PointerLikeType>:$var_ptr_ptr,
Variadic<OpenMP_PointerLikeType>:$members,
- OptionalAttr<AnyIntElementsAttr>:$members_index,
+ OptionalAttr<IndexListArrayAttr>:$members_index,
Variadic<OpenMP_MapBoundsType>:$bounds, /* rank-0 to rank-{n-1} */
OptionalAttr<UI64Attr>:$map_type,
OptionalAttr<VariableCaptureKindAttr>:$map_capture_type,
diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
index d516c8d9e0be6c..f9024dd93ae144 100644
--- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
+++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
@@ -1392,16 +1392,15 @@ static void printMapClause(OpAsmPrinter &p, Operation *op,
}
static ParseResult parseMembersIndex(OpAsmParser &parser,
- DenseIntElementsAttr &membersIdx) {
- SmallVector<APInt> values;
+ ArrayAttr &membersIdx) {
+ SmallVector<Attribute> values, memberIdxs;
int64_t value;
- int64_t shape[2] = {0, 0};
- unsigned shapeTmp = 0;
+
auto parseIndices = [&]() -> ParseResult {
if (parser.parseInteger(value))
return failure();
- shapeTmp++;
- values.push_back(APInt(32, value));
+ values.push_back(IntegerAttr::get(parser.getBuilder().getIntegerType(64),
+ mlir::APInt(64, value)));
return success();
};
@@ -1415,51 +1414,32 @@ static ParseResult parseMembersIndex(OpAsmParser &parser,
if (failed(parser.parseRSquare()))
return failure();
- // Only set once, if any indices are not the same size
- // we error out in the next check as that's unsupported
- if (shape[1] == 0)
- shape[1] = shapeTmp;
-
- // Verify that the recently parsed list is equal to the
- // first one we parsed, they must be equal lengths to
- // keep the rectangular shape DenseIntElementsAttr
- // requires
- if (shapeTmp != shape[1])
- return failure();
-
- shapeTmp = 0;
- shape[0]++;
+ memberIdxs.push_back(ArrayAttr::get(parser.getContext(), values));
+ values.clear();
} while (succeeded(parser.parseOptionalComma()));
- if (!values.empty()) {
- ShapedType valueType =
- VectorType::get(shape, IntegerType::get(parser.getContext(), 32));
- membersIdx = DenseIntElementsAttr::get(valueType, values);
- }
+ if (!memberIdxs.empty())
+ membersIdx = ArrayAttr::get(parser.getContext(), memberIdxs);
return success();
}
static void printMembersIndex(OpAsmPrinter &p, MapInfoOp op,
- DenseIntElementsAttr membersIdx) {
- llvm::ArrayRef<int64_t> shape = membersIdx.getShapedType().getShape();
- assert(shape.size() <= 2);
-
+ ArrayAttr membersIdx) {
if (!membersIdx)
return;
- for (int i = 0; i < shape[0]; ++i) {
+ SmallVector<std::string> idxs;
+ for (auto [i, v] : llvm::enumerate(membersIdx)) {
+ auto memberIdx = mlir::cast<mlir::ArrayAttr>(v);
p << "[";
- int rowOffset = i * shape[1];
- for (int j = 0; j < shape[1]; ++j) {
- p << membersIdx.getValues<int32_t>()[rowOffset + j];
- if ((j + 1) < shape[1])
- p << ",";
- }
- p << "]";
-
- if ((i + 1) < shape[0])
+ for (auto v2 : memberIdx.getValue())
+ idxs.push_back(
+ std::to_string(mlir::cast<mlir::IntegerAttr>(v2).getInt()));
+ p << llvm::join(idxs, ",") << "]";
+ if ((i + 1) < membersIdx.getValue().size())
p << ", ";
+ idxs.clear();
}
}
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 19d80fbbd699b0..a8995ae9646d64 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -2468,51 +2468,45 @@ static int getMapDataMemberIdx(MapInfoData &mapData, omp::MapInfoOp memberOp) {
return std::distance(mapData.MapClause.begin(), res);
}
-static omp::MapInfoOp getFirstOrLastMappedMemberPtr(omp::MapInfoOp mapInfo,
- bool first) {
- DenseIntElementsAttr indexAttr = mapInfo.getMembersIndexAttr();
-
+static mlir::omp::MapInfoOp
+getFirstOrLastMappedMemberPtr(mlir::omp::MapInfoOp mapInfo, bool first) {
+ mlir::ArrayAttr indexAttr = mapInfo.getMembersIndexAttr();
// Only 1 member has been mapped, we can return it.
if (indexAttr.size() == 1)
- if (auto mapOp =
- dyn_cast<omp::MapInfoOp>(mapInfo.getMembers()[0].getDefiningOp()))
+ if (auto mapOp = mlir::dyn_cast<mlir::omp::MapInfoOp>(
+ mapInfo.getMembers()[0].getDefiningOp()))
return mapOp;
- llvm::ArrayRef<int64_t> shape = indexAttr.getShapedType().getShape();
- llvm::SmallVector<size_t> indices(shape[0]);
+ llvm::SmallVector<size_t> indices(indexAttr.size());
std::iota(indices.begin(), indices.end(), 0);
llvm::sort(indices.begin(), indices.end(),
[&](const size_t a, const size_t b) {
- auto indexValues = indexAttr.getValues<int32_t>();
- for (int i = 0; i < shape[1]; ++i) {
- int aIndex = indexValues[a * shape[1] + i];
- int bIndex = indexValues[b * shape[1] + i];
+ auto memberIndicesA = mlir::cast<mlir::ArrayAttr>(indexAttr[a]);
+ auto memberIndicesB = mlir::cast<mlir::ArrayAttr>(indexAttr[b]);
+ for (const auto it : llvm::zip(memberIndicesA, memberIndicesB)) {
+ int64_t aIndex =
+ mlir::cast<mlir::IntegerAttr>(std::get<0>(it)).getInt();
+ int64_t bIndex =
+ mlir::cast<mlir::IntegerAttr>(std::get<1>(it)).getInt();
if (aIndex == bIndex)
continue;
- if (aIndex != -1 && bIndex == -1)
- return false;
-
- if (aIndex == -1 && bIndex != -1)
- return true;
-
- // A is earlier in the record type layout than B
if (aIndex < bIndex)
return first;
- if (bIndex < aIndex)
+ if (aIndex > bIndex)
return !first;
}
- // Iterated the entire list and couldn't make a decision, all
- // elements were likely the same. Return false, since the sort
- // comparator should return false for equal elements.
- return false;
+ // Iterated the up until the end of the smallest member and
+ // they were found to be equal up to that point, so select
+ // the member with the lowest index count, so the "parent"
+ return memberIndicesA.size() < memberIndicesB.size();
});
- return llvm::cast<omp::MapInfoOp>(
+ return llvm::cast<mlir::omp::MapInfoOp>(
mapInfo.getMembers()[indices.front()].getDefiningOp());
}
@@ -2663,6 +2657,8 @@ static llvm::omp::OpenMPOffloadMappingFlags mapParentWithMembers(
auto mapOp = dyn_cast<omp::MapInfoOp>(mapData.MapClause[mapDataIndex]);
int firstMemberIdx = getMapDataMemberIdx(
mapData, getFirstOrLastMappedMemberPtr(mapOp, true));
+ // NOTE/TODO: Should perhaps use OriginalValue here instead of Pointers to
+ // avoid offset or any manipulations interfering with the calculation.
lowAddr = builder.CreatePointerCast(mapData.Pointers[firstMemberIdx],
builder.getPtrTy());
int lastMemberIdx = getMapDataMemberIdx(
@@ -2680,17 +2676,8 @@ static llvm::omp::OpenMPOffloadMappingFlags mapParentWithMembers(
/*isSigned=*/false);
combinedInfo.Sizes.push_back(size);
- // TODO: This will need to be expanded to include the whole host of logic for
- // the map flags that Clang currently supports (e.g. it should take the map
- // flag of the parent map flag, remove the OMP_MAP_TARGET_PARAM and do some
- // further case specific flag modifications). For the moment, it handles what
- // we support as expected.
- llvm::omp::OpenMPOffloadMappingFlags mapFlag =
- llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO;
-
llvm::omp::OpenMPOffloadMappingFlags memberOfFlag =
ompBuilder.getMemberOfFlag(combinedInfo.BasePointers.size() - 1);
- ompBuilder.setCorrectMemberOfFlag(mapFlag, memberOfFlag);
// This creates the initial MEMBER_OF mapping that consists of
// the parent/top level container (same as above effectively, except
@@ -2699,6 +2686,12 @@ static llvm::omp::OpenMPOffloadMappingFlags mapParentWithMembers(
// only relevant if the structure in its totality is being mapped,
// otherwise the above suffices.
if (!parentClause.getPartialMap()) {
+ // TODO: This will need to be expanded to include the whole host of logic
+ // for the map flags that Clang currently supports (e.g. it should do some
+ // further case specific flag modifications). For the moment, it handles
+ // what we support as expected.
+ llvm::omp::OpenMPOffloadMappingFlags mapFlag = mapData.Types[mapDataIndex];
+ ompBuilder.setCorrectMemberOfFlag(mapFlag, memberOfFlag);
combinedInfo.Types.emplace_back(mapFlag);
combinedInfo.DevicePointers.emplace_back(
llvm::OpenMPIRBuilder::DeviceInfoTy::None);
@@ -2749,6 +2742,31 @@ static void processMapMembersWithParent(
assert(memberDataIdx >= 0 && "could not find mapped member of structure");
+ // If we're currently mapping a pointer to a block of data, we must
+ // initially map the pointer, and then attatch/bind the data with a
+ // subsequent map to the pointer. This segment of code generates the
+ // pointer mapping, which can in certain cases can be optimised
+ // out as Clang currently does in its lowering. However, for the moment
+ // we do not do so, in part as we currently have substantially less
+ // information on the data being mapped at this stage.
+ if (checkIfPointerMap(memberClause)) {
+ auto mapFlag = llvm::omp::OpenMPOffloadMappingFlags(
+ memberClause.getMapType().value());
+ mapFlag &= ~llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
+ mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
+ ompBuilder.setCorrectMemberOfFlag(mapFlag, memberOfFlag);
+ combinedInfo.Types.emplace_back(mapFlag);
+ combinedInfo.DevicePointers.emplace_back(
+ llvm::OpenMPIRBuilder::DeviceInfoTy::None);
+ combinedInfo.Names.emplace_back(
+ LLVM::createMappingInformation(memberClause.getLoc(), ompBuilder));
+ combinedInfo.BasePointers.emplace_back(
+ mapData.BasePointers[mapDataIndex]);
+ combinedInfo.Pointers.emplace_back(mapData.BasePointers[memberDataIdx]);
+ combinedInfo.Sizes.emplace_back(builder.getInt64(
+ moduleTranslation.getLLVMModule()->getDataLayout().getPointerSize()));
+ }
+
// Same MemberOfFlag to indicate its link with parent and other members
// of.
auto mapFlag =
@@ -2764,7 +2782,12 @@ static void processMapMembersWithParent(
mapData.DevicePointers[memberDataIdx]);
combinedInfo.Names.emplace_back(
LLVM::createMappingInformation(memberClause.getLoc(), ompBuilder));
- combinedInfo.BasePointers.emplace_back(mapData.BasePointers[mapDataIndex]);
+ if (checkIfPointerMap(memberClause))
+ combinedInfo.BasePointers.emplace_back(
+ mapData.BasePointers[memberDataIdx]);
+ else
+ combinedInfo.BasePointers.emplace_back(
+ mapData.BasePointers[mapDataIndex]);
combinedInfo.Pointers.emplace_back(mapData.Pointers[memberDataIdx]);
combinedInfo.Sizes.emplace_back(mapData.Sizes[memberDataIdx]);
}
diff --git a/mlir/test/Target/LLVMIR/omptarget-fortran-allocatable-record-type-mapping-host.mlir b/mlir/test/Target/LLVMIR/omptarget-fortran-allocatable-record-type-mapping-host.mlir
new file mode 100644
index 00000000000000..8c2d13fcfd6385
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/omptarget-fortran-allocatable-record-type-mapping-host.mlir
@@ -0,0 +1,66 @@
+// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
+
+// This test checks the offload sizes, map types and base pointers and pointers
+// provided to the OpenMP kernel argument structure are correct when lowering
+// to LLVM-IR from MLIR when performing explicit member mapping of a record type
+// that includes fortran allocatables in various locations of the record types
+// hierarchy.
+
+module attributes {omp.is_target_device = false, omp.target_triples = ["amdgcn-amd-amdhsa"]} {
+ llvm.func @omp_nested_derived_type_alloca_map(%arg0: !llvm.ptr) {
+ %0 = llvm.mlir.constant(4 : index) : i64
+ %1 = llvm.mlir.constant(1 : index) : i64
+ %2 = llvm.mlir.constant(2 : index) : i64
+ %3 = llvm.mlir.constant(0 : index) : i64
+ %4 = llvm.mlir.constant(6 : index) : i64
+ %5 = omp.map.bounds lower_bound(%3 : i64) upper_bound(%0 : i64) extent(%0 : i64) stride(%1 : i64) start_idx(%3 : i64) {stride_in_bytes = true}
+ %6 = llvm.getelementptr %arg0[0, 6] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTtop_layer", (f32, struct<(ptr, i64, i32, i8, i8, i8, i8)>, array<10 x i32>, f32, struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>, i32, struct<"_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTmiddle_layer", (f32, array<10 x i32>, struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>, i32)>)>
+ %7 = llvm.getelementptr %6[0, 2] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTmiddle_layer", (f32, array<10 x i32>, struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>, i32)>
+ %8 = llvm.getelementptr %7[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
+ %9 = omp.map.info var_ptr(%7 : !llvm.ptr, i32) var_ptr_ptr(%8 : !llvm.ptr) map_clauses(tofrom) capture(ByRef) bounds(%5) -> !llvm.ptr {name = ""}
+ %10 = omp.map.info var_ptr(%7 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = "one_l%nest%array_k"}
+ %11 = omp.map.info var_ptr(%arg0 : !llvm.ptr, !llvm.struct<"_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTtop_layer", (f32, struct<(ptr, i64, i32, i8, i8, i8, i8)>, array<10 x i32>, f32, struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>, i32, struct<"_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTmiddle_layer", (f32, array<10 x i32>, struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>, i32)>)>) map_clauses(tofrom) capture(ByRef) members(%10, %9 : [6,2], [6,2,0] : !llvm.ptr, !llvm.ptr) -> !llvm.ptr {name = "one_l", partial_map = true}
+ omp.target map_entries(%10 -> %arg1, %9 -> %arg2, %11 -> %arg3 : !llvm.ptr, !llvm.ptr, !llvm.ptr) {
+ omp.terminator
+ }
+ llvm.return
+ }
+}
+
+// CHECK: @.offload_sizes{{.*}} = private unnamed_addr constant [4 x i64] [i64 0, i64 48, i64 8, i64 20]
+// CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [4 x i64] [i64 32, i64 281474976710659, i64 281474976710659, i64 281474976710675]
+
+// CHECK: define void @omp_nested_derived_type_alloca_map(ptr %[[ARG:.*]]) {
+
+// CHECK: %[[NESTED_DTYPE_MEMBER_GEP:.*]] = getelementptr %_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTtop_layer, ptr %[[ARG]], i32 0, i32 6
+// CHECK: %[[NESTED_ALLOCATABLE_MEMBER_GEP:.*]] = getelementptr %_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTmiddle_layer, ptr %[[NESTED_DTYPE_MEMBER_GEP]], i32 0, i32 2
+// CHECK: %[[NESTED_ALLOCATABLE_MEMBER_BADDR_GEP:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr %[[NESTED_ALLOCATABLE_MEMBER_GEP]], i32 0, i32 0
+// CHECK: %[[NESTED_ALLOCATABLE_MEMBER_BADDR_LOAD:.*]] = load ptr, ptr %[[NESTED_ALLOCATABLE_MEMBER_BADDR_GEP]], align 8
+// CHECK: %[[ARR_OFFSET:.*]] = getelementptr inbounds i32, ptr %[[NESTED_ALLOCATABLE_MEMBER_BADDR_LOAD]], i64 0
+// CHECK: %[[DTYPE_SIZE_SEGMENT_CALC_1:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr %[[NESTED_ALLOCATABLE_MEMBER_GEP]], i64 1
+// CHECK: %[[DTYPE_SIZE_SEGMENT_CALC_2:.*]] = ptrtoint ptr %[[DTYPE_SIZE_SEGMENT_CALC_1]] to i64
+// CHECK: %[[DTYPE_SIZE_SEGMENT_CALC_3:.*]] = ptrtoint ptr %[[NESTED_ALLOCATABLE_MEMBER_GEP]] to i64
+// CHECK: %[[DTYPE_SIZE_SEGMENT_CALC_4:.*]] = sub i64 %[[DTYPE_SIZE_SEGMENT_CALC_2]], %[[DTYPE_SIZE_SEGMENT_CALC_3]]
+// CHECK: %[[DTYPE_SIZE_SEGMENT_CALC_5:.*]] = sdiv exact i64 %[[DTYPE_SIZE_SEGMENT_CALC_4]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
+
+// CHECK: %[[BASE_PTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_baseptrs, i32 0, i32 0
+// CHECK: store ptr %[[ARG]], ptr %[[BASE_PTRS]], align 8
+// CHECK: %[[OFFLOAD_PTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_ptrs, i32 0, i32 0
+// CHECK: store ptr %[[NESTED_ALLOCATABLE_MEMBER_GEP]], ptr %[[OFFLOAD_PTRS]], align 8
+// CHECK: %[[OFFLOAD_SIZES:.*]] = getelementptr inbounds [4 x i64], ptr %.offload_sizes, i32 0, i32 0
+// CHECK: store i64 %[[DTYPE_SIZE_SEGMENT_CALC_5]], ptr %[[OFFLOAD_SIZES]], align 8
+
+// CHECK: %[[BASE_PTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_baseptrs, i32 0, i32 1
+// CHECK: store ptr %[[ARG]], ptr %[[BASE_PTRS]], align 8
+// CHECK: %[[OFFLOAD_PTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_ptrs, i32 0, i32 1
+// CHECK: store ptr %[[NESTED_ALLOCATABLE_MEMBER_GEP]], ptr %[[OFFLOAD_PTRS]], align 8
+
+// CHECK: %[[BASE_PTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_baseptrs, i32 0, i32 2
+// CHECK: store ptr %[[ARG]], ptr %[[BASE_PTRS]], align 8
+// CHECK: %[[OFFLOAD_PTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_ptrs, i32 0, i32 2
+// CHECK: store ptr %[[NESTED_ALLOCATABLE_MEMBER_BADDR_GEP]], ptr %[[OFFLOAD_PTRS]], align 8
+
+// CHECK: %[[BASE_PTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_baseptrs, i32 0, i32 3
+// CHECK: store ptr %[[NESTED_ALLOCATABLE_MEMBER_BADDR_GEP]], ptr %[[BASE_PTRS]], align 8
+// CHECK: %[[OFFLOAD_PTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_ptrs, i32 0, i32 3
+// CHECK: store ptr %[[ARR_OFFSET]], ptr %[[OFFLOAD_PTRS]], align 8
diff --git a/mlir/test/Target/LLVMIR/omptarget-fortran-allocatable-types-host.mlir b/mlir/test/Target/LLVMIR/omptarget-fortran-allocatable-types-host.mlir
index 6fe77bd228ef2f..0db856b1b0fa42 100644
--- a/mlir/test/Target/LLVMIR/omptarget-fortran-allocatable-types-host.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-fortran-allocatable-types-host.mlir
@@ -1,9 +1,9 @@
// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
// This test checks the offload sizes, map types and base pointers and pointers
-// provided to the OpenMP kernel argument structure are correct when lowering
-// to LLVM-IR from MLIR when a fortran allocatable descriptor type is provided
-// alongside the omp.map.info, the test utilises mapping of array sections,
+// provided to the OpenMP kernel argument structure are correct when lowering
+// to LLVM-IR from MLIR when a fortran allocatable descriptor type is provided
+// alongside the omp.map.info, the test utilises mapping of array sections,
// full arrays and individual allocated scalars.
module attributes {omp.is_target_device = false, omp.target_triples = ["amdgcn-amd-amdhsa"]} {
@@ -59,9 +59,9 @@ module attributes {omp.is_target_device = false, omp.target_triples = ["amdgcn-a
// CHECK: @[[FULL_ARR_GLOB:.*]] = internal global { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] } undef
// CHECK: @[[ARR_SECT_GLOB:.*]] = internal global { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] } undef
-// CHECK: @.offloa...
[truncated]
|
|
@llvm/pr-subscribers-mlir-llvm Author: None (agozillon) ChangesThis is one of 3 PRs in a PR stack that aims to add support for explicit mapping of The primary changes in this PR are the OpenMPToLLVMIRTranslation.cpp changes, There are also additions of tests which should help to showcase the affect Patch is 30.32 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/111191.diff 6 Files Affected:
diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
index 66f63fc02fe2f3..60acf59a7d93e0 100644
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
@@ -879,7 +879,7 @@ def MapInfoOp : OpenMP_Op<"map.info", [AttrSizedOperandSegments]> {
TypeAttr:$var_type,
Optional<OpenMP_PointerLikeType>:$var_ptr_ptr,
Variadic<OpenMP_PointerLikeType>:$members,
- OptionalAttr<AnyIntElementsAttr>:$members_index,
+ OptionalAttr<IndexListArrayAttr>:$members_index,
Variadic<OpenMP_MapBoundsType>:$bounds, /* rank-0 to rank-{n-1} */
OptionalAttr<UI64Attr>:$map_type,
OptionalAttr<VariableCaptureKindAttr>:$map_capture_type,
diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
index d516c8d9e0be6c..f9024dd93ae144 100644
--- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
+++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
@@ -1392,16 +1392,15 @@ static void printMapClause(OpAsmPrinter &p, Operation *op,
}
static ParseResult parseMembersIndex(OpAsmParser &parser,
- DenseIntElementsAttr &membersIdx) {
- SmallVector<APInt> values;
+ ArrayAttr &membersIdx) {
+ SmallVector<Attribute> values, memberIdxs;
int64_t value;
- int64_t shape[2] = {0, 0};
- unsigned shapeTmp = 0;
+
auto parseIndices = [&]() -> ParseResult {
if (parser.parseInteger(value))
return failure();
- shapeTmp++;
- values.push_back(APInt(32, value));
+ values.push_back(IntegerAttr::get(parser.getBuilder().getIntegerType(64),
+ mlir::APInt(64, value)));
return success();
};
@@ -1415,51 +1414,32 @@ static ParseResult parseMembersIndex(OpAsmParser &parser,
if (failed(parser.parseRSquare()))
return failure();
- // Only set once, if any indices are not the same size
- // we error out in the next check as that's unsupported
- if (shape[1] == 0)
- shape[1] = shapeTmp;
-
- // Verify that the recently parsed list is equal to the
- // first one we parsed, they must be equal lengths to
- // keep the rectangular shape DenseIntElementsAttr
- // requires
- if (shapeTmp != shape[1])
- return failure();
-
- shapeTmp = 0;
- shape[0]++;
+ memberIdxs.push_back(ArrayAttr::get(parser.getContext(), values));
+ values.clear();
} while (succeeded(parser.parseOptionalComma()));
- if (!values.empty()) {
- ShapedType valueType =
- VectorType::get(shape, IntegerType::get(parser.getContext(), 32));
- membersIdx = DenseIntElementsAttr::get(valueType, values);
- }
+ if (!memberIdxs.empty())
+ membersIdx = ArrayAttr::get(parser.getContext(), memberIdxs);
return success();
}
static void printMembersIndex(OpAsmPrinter &p, MapInfoOp op,
- DenseIntElementsAttr membersIdx) {
- llvm::ArrayRef<int64_t> shape = membersIdx.getShapedType().getShape();
- assert(shape.size() <= 2);
-
+ ArrayAttr membersIdx) {
if (!membersIdx)
return;
- for (int i = 0; i < shape[0]; ++i) {
+ SmallVector<std::string> idxs;
+ for (auto [i, v] : llvm::enumerate(membersIdx)) {
+ auto memberIdx = mlir::cast<mlir::ArrayAttr>(v);
p << "[";
- int rowOffset = i * shape[1];
- for (int j = 0; j < shape[1]; ++j) {
- p << membersIdx.getValues<int32_t>()[rowOffset + j];
- if ((j + 1) < shape[1])
- p << ",";
- }
- p << "]";
-
- if ((i + 1) < shape[0])
+ for (auto v2 : memberIdx.getValue())
+ idxs.push_back(
+ std::to_string(mlir::cast<mlir::IntegerAttr>(v2).getInt()));
+ p << llvm::join(idxs, ",") << "]";
+ if ((i + 1) < membersIdx.getValue().size())
p << ", ";
+ idxs.clear();
}
}
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 19d80fbbd699b0..a8995ae9646d64 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -2468,51 +2468,45 @@ static int getMapDataMemberIdx(MapInfoData &mapData, omp::MapInfoOp memberOp) {
return std::distance(mapData.MapClause.begin(), res);
}
-static omp::MapInfoOp getFirstOrLastMappedMemberPtr(omp::MapInfoOp mapInfo,
- bool first) {
- DenseIntElementsAttr indexAttr = mapInfo.getMembersIndexAttr();
-
+static mlir::omp::MapInfoOp
+getFirstOrLastMappedMemberPtr(mlir::omp::MapInfoOp mapInfo, bool first) {
+ mlir::ArrayAttr indexAttr = mapInfo.getMembersIndexAttr();
// Only 1 member has been mapped, we can return it.
if (indexAttr.size() == 1)
- if (auto mapOp =
- dyn_cast<omp::MapInfoOp>(mapInfo.getMembers()[0].getDefiningOp()))
+ if (auto mapOp = mlir::dyn_cast<mlir::omp::MapInfoOp>(
+ mapInfo.getMembers()[0].getDefiningOp()))
return mapOp;
- llvm::ArrayRef<int64_t> shape = indexAttr.getShapedType().getShape();
- llvm::SmallVector<size_t> indices(shape[0]);
+ llvm::SmallVector<size_t> indices(indexAttr.size());
std::iota(indices.begin(), indices.end(), 0);
llvm::sort(indices.begin(), indices.end(),
[&](const size_t a, const size_t b) {
- auto indexValues = indexAttr.getValues<int32_t>();
- for (int i = 0; i < shape[1]; ++i) {
- int aIndex = indexValues[a * shape[1] + i];
- int bIndex = indexValues[b * shape[1] + i];
+ auto memberIndicesA = mlir::cast<mlir::ArrayAttr>(indexAttr[a]);
+ auto memberIndicesB = mlir::cast<mlir::ArrayAttr>(indexAttr[b]);
+ for (const auto it : llvm::zip(memberIndicesA, memberIndicesB)) {
+ int64_t aIndex =
+ mlir::cast<mlir::IntegerAttr>(std::get<0>(it)).getInt();
+ int64_t bIndex =
+ mlir::cast<mlir::IntegerAttr>(std::get<1>(it)).getInt();
if (aIndex == bIndex)
continue;
- if (aIndex != -1 && bIndex == -1)
- return false;
-
- if (aIndex == -1 && bIndex != -1)
- return true;
-
- // A is earlier in the record type layout than B
if (aIndex < bIndex)
return first;
- if (bIndex < aIndex)
+ if (aIndex > bIndex)
return !first;
}
- // Iterated the entire list and couldn't make a decision, all
- // elements were likely the same. Return false, since the sort
- // comparator should return false for equal elements.
- return false;
+ // Iterated the up until the end of the smallest member and
+ // they were found to be equal up to that point, so select
+ // the member with the lowest index count, so the "parent"
+ return memberIndicesA.size() < memberIndicesB.size();
});
- return llvm::cast<omp::MapInfoOp>(
+ return llvm::cast<mlir::omp::MapInfoOp>(
mapInfo.getMembers()[indices.front()].getDefiningOp());
}
@@ -2663,6 +2657,8 @@ static llvm::omp::OpenMPOffloadMappingFlags mapParentWithMembers(
auto mapOp = dyn_cast<omp::MapInfoOp>(mapData.MapClause[mapDataIndex]);
int firstMemberIdx = getMapDataMemberIdx(
mapData, getFirstOrLastMappedMemberPtr(mapOp, true));
+ // NOTE/TODO: Should perhaps use OriginalValue here instead of Pointers to
+ // avoid offset or any manipulations interfering with the calculation.
lowAddr = builder.CreatePointerCast(mapData.Pointers[firstMemberIdx],
builder.getPtrTy());
int lastMemberIdx = getMapDataMemberIdx(
@@ -2680,17 +2676,8 @@ static llvm::omp::OpenMPOffloadMappingFlags mapParentWithMembers(
/*isSigned=*/false);
combinedInfo.Sizes.push_back(size);
- // TODO: This will need to be expanded to include the whole host of logic for
- // the map flags that Clang currently supports (e.g. it should take the map
- // flag of the parent map flag, remove the OMP_MAP_TARGET_PARAM and do some
- // further case specific flag modifications). For the moment, it handles what
- // we support as expected.
- llvm::omp::OpenMPOffloadMappingFlags mapFlag =
- llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO;
-
llvm::omp::OpenMPOffloadMappingFlags memberOfFlag =
ompBuilder.getMemberOfFlag(combinedInfo.BasePointers.size() - 1);
- ompBuilder.setCorrectMemberOfFlag(mapFlag, memberOfFlag);
// This creates the initial MEMBER_OF mapping that consists of
// the parent/top level container (same as above effectively, except
@@ -2699,6 +2686,12 @@ static llvm::omp::OpenMPOffloadMappingFlags mapParentWithMembers(
// only relevant if the structure in its totality is being mapped,
// otherwise the above suffices.
if (!parentClause.getPartialMap()) {
+ // TODO: This will need to be expanded to include the whole host of logic
+ // for the map flags that Clang currently supports (e.g. it should do some
+ // further case specific flag modifications). For the moment, it handles
+ // what we support as expected.
+ llvm::omp::OpenMPOffloadMappingFlags mapFlag = mapData.Types[mapDataIndex];
+ ompBuilder.setCorrectMemberOfFlag(mapFlag, memberOfFlag);
combinedInfo.Types.emplace_back(mapFlag);
combinedInfo.DevicePointers.emplace_back(
llvm::OpenMPIRBuilder::DeviceInfoTy::None);
@@ -2749,6 +2742,31 @@ static void processMapMembersWithParent(
assert(memberDataIdx >= 0 && "could not find mapped member of structure");
+ // If we're currently mapping a pointer to a block of data, we must
+ // initially map the pointer, and then attatch/bind the data with a
+ // subsequent map to the pointer. This segment of code generates the
+ // pointer mapping, which can in certain cases can be optimised
+ // out as Clang currently does in its lowering. However, for the moment
+ // we do not do so, in part as we currently have substantially less
+ // information on the data being mapped at this stage.
+ if (checkIfPointerMap(memberClause)) {
+ auto mapFlag = llvm::omp::OpenMPOffloadMappingFlags(
+ memberClause.getMapType().value());
+ mapFlag &= ~llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
+ mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
+ ompBuilder.setCorrectMemberOfFlag(mapFlag, memberOfFlag);
+ combinedInfo.Types.emplace_back(mapFlag);
+ combinedInfo.DevicePointers.emplace_back(
+ llvm::OpenMPIRBuilder::DeviceInfoTy::None);
+ combinedInfo.Names.emplace_back(
+ LLVM::createMappingInformation(memberClause.getLoc(), ompBuilder));
+ combinedInfo.BasePointers.emplace_back(
+ mapData.BasePointers[mapDataIndex]);
+ combinedInfo.Pointers.emplace_back(mapData.BasePointers[memberDataIdx]);
+ combinedInfo.Sizes.emplace_back(builder.getInt64(
+ moduleTranslation.getLLVMModule()->getDataLayout().getPointerSize()));
+ }
+
// Same MemberOfFlag to indicate its link with parent and other members
// of.
auto mapFlag =
@@ -2764,7 +2782,12 @@ static void processMapMembersWithParent(
mapData.DevicePointers[memberDataIdx]);
combinedInfo.Names.emplace_back(
LLVM::createMappingInformation(memberClause.getLoc(), ompBuilder));
- combinedInfo.BasePointers.emplace_back(mapData.BasePointers[mapDataIndex]);
+ if (checkIfPointerMap(memberClause))
+ combinedInfo.BasePointers.emplace_back(
+ mapData.BasePointers[memberDataIdx]);
+ else
+ combinedInfo.BasePointers.emplace_back(
+ mapData.BasePointers[mapDataIndex]);
combinedInfo.Pointers.emplace_back(mapData.Pointers[memberDataIdx]);
combinedInfo.Sizes.emplace_back(mapData.Sizes[memberDataIdx]);
}
diff --git a/mlir/test/Target/LLVMIR/omptarget-fortran-allocatable-record-type-mapping-host.mlir b/mlir/test/Target/LLVMIR/omptarget-fortran-allocatable-record-type-mapping-host.mlir
new file mode 100644
index 00000000000000..8c2d13fcfd6385
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/omptarget-fortran-allocatable-record-type-mapping-host.mlir
@@ -0,0 +1,66 @@
+// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
+
+// This test checks the offload sizes, map types and base pointers and pointers
+// provided to the OpenMP kernel argument structure are correct when lowering
+// to LLVM-IR from MLIR when performing explicit member mapping of a record type
+// that includes fortran allocatables in various locations of the record types
+// hierarchy.
+
+module attributes {omp.is_target_device = false, omp.target_triples = ["amdgcn-amd-amdhsa"]} {
+ llvm.func @omp_nested_derived_type_alloca_map(%arg0: !llvm.ptr) {
+ %0 = llvm.mlir.constant(4 : index) : i64
+ %1 = llvm.mlir.constant(1 : index) : i64
+ %2 = llvm.mlir.constant(2 : index) : i64
+ %3 = llvm.mlir.constant(0 : index) : i64
+ %4 = llvm.mlir.constant(6 : index) : i64
+ %5 = omp.map.bounds lower_bound(%3 : i64) upper_bound(%0 : i64) extent(%0 : i64) stride(%1 : i64) start_idx(%3 : i64) {stride_in_bytes = true}
+ %6 = llvm.getelementptr %arg0[0, 6] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTtop_layer", (f32, struct<(ptr, i64, i32, i8, i8, i8, i8)>, array<10 x i32>, f32, struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>, i32, struct<"_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTmiddle_layer", (f32, array<10 x i32>, struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>, i32)>)>
+ %7 = llvm.getelementptr %6[0, 2] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTmiddle_layer", (f32, array<10 x i32>, struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>, i32)>
+ %8 = llvm.getelementptr %7[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
+ %9 = omp.map.info var_ptr(%7 : !llvm.ptr, i32) var_ptr_ptr(%8 : !llvm.ptr) map_clauses(tofrom) capture(ByRef) bounds(%5) -> !llvm.ptr {name = ""}
+ %10 = omp.map.info var_ptr(%7 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = "one_l%nest%array_k"}
+ %11 = omp.map.info var_ptr(%arg0 : !llvm.ptr, !llvm.struct<"_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTtop_layer", (f32, struct<(ptr, i64, i32, i8, i8, i8, i8)>, array<10 x i32>, f32, struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>, i32, struct<"_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTmiddle_layer", (f32, array<10 x i32>, struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>, i32)>)>) map_clauses(tofrom) capture(ByRef) members(%10, %9 : [6,2], [6,2,0] : !llvm.ptr, !llvm.ptr) -> !llvm.ptr {name = "one_l", partial_map = true}
+ omp.target map_entries(%10 -> %arg1, %9 -> %arg2, %11 -> %arg3 : !llvm.ptr, !llvm.ptr, !llvm.ptr) {
+ omp.terminator
+ }
+ llvm.return
+ }
+}
+
+// CHECK: @.offload_sizes{{.*}} = private unnamed_addr constant [4 x i64] [i64 0, i64 48, i64 8, i64 20]
+// CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [4 x i64] [i64 32, i64 281474976710659, i64 281474976710659, i64 281474976710675]
+
+// CHECK: define void @omp_nested_derived_type_alloca_map(ptr %[[ARG:.*]]) {
+
+// CHECK: %[[NESTED_DTYPE_MEMBER_GEP:.*]] = getelementptr %_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTtop_layer, ptr %[[ARG]], i32 0, i32 6
+// CHECK: %[[NESTED_ALLOCATABLE_MEMBER_GEP:.*]] = getelementptr %_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTmiddle_layer, ptr %[[NESTED_DTYPE_MEMBER_GEP]], i32 0, i32 2
+// CHECK: %[[NESTED_ALLOCATABLE_MEMBER_BADDR_GEP:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr %[[NESTED_ALLOCATABLE_MEMBER_GEP]], i32 0, i32 0
+// CHECK: %[[NESTED_ALLOCATABLE_MEMBER_BADDR_LOAD:.*]] = load ptr, ptr %[[NESTED_ALLOCATABLE_MEMBER_BADDR_GEP]], align 8
+// CHECK: %[[ARR_OFFSET:.*]] = getelementptr inbounds i32, ptr %[[NESTED_ALLOCATABLE_MEMBER_BADDR_LOAD]], i64 0
+// CHECK: %[[DTYPE_SIZE_SEGMENT_CALC_1:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr %[[NESTED_ALLOCATABLE_MEMBER_GEP]], i64 1
+// CHECK: %[[DTYPE_SIZE_SEGMENT_CALC_2:.*]] = ptrtoint ptr %[[DTYPE_SIZE_SEGMENT_CALC_1]] to i64
+// CHECK: %[[DTYPE_SIZE_SEGMENT_CALC_3:.*]] = ptrtoint ptr %[[NESTED_ALLOCATABLE_MEMBER_GEP]] to i64
+// CHECK: %[[DTYPE_SIZE_SEGMENT_CALC_4:.*]] = sub i64 %[[DTYPE_SIZE_SEGMENT_CALC_2]], %[[DTYPE_SIZE_SEGMENT_CALC_3]]
+// CHECK: %[[DTYPE_SIZE_SEGMENT_CALC_5:.*]] = sdiv exact i64 %[[DTYPE_SIZE_SEGMENT_CALC_4]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
+
+// CHECK: %[[BASE_PTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_baseptrs, i32 0, i32 0
+// CHECK: store ptr %[[ARG]], ptr %[[BASE_PTRS]], align 8
+// CHECK: %[[OFFLOAD_PTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_ptrs, i32 0, i32 0
+// CHECK: store ptr %[[NESTED_ALLOCATABLE_MEMBER_GEP]], ptr %[[OFFLOAD_PTRS]], align 8
+// CHECK: %[[OFFLOAD_SIZES:.*]] = getelementptr inbounds [4 x i64], ptr %.offload_sizes, i32 0, i32 0
+// CHECK: store i64 %[[DTYPE_SIZE_SEGMENT_CALC_5]], ptr %[[OFFLOAD_SIZES]], align 8
+
+// CHECK: %[[BASE_PTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_baseptrs, i32 0, i32 1
+// CHECK: store ptr %[[ARG]], ptr %[[BASE_PTRS]], align 8
+// CHECK: %[[OFFLOAD_PTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_ptrs, i32 0, i32 1
+// CHECK: store ptr %[[NESTED_ALLOCATABLE_MEMBER_GEP]], ptr %[[OFFLOAD_PTRS]], align 8
+
+// CHECK: %[[BASE_PTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_baseptrs, i32 0, i32 2
+// CHECK: store ptr %[[ARG]], ptr %[[BASE_PTRS]], align 8
+// CHECK: %[[OFFLOAD_PTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_ptrs, i32 0, i32 2
+// CHECK: store ptr %[[NESTED_ALLOCATABLE_MEMBER_BADDR_GEP]], ptr %[[OFFLOAD_PTRS]], align 8
+
+// CHECK: %[[BASE_PTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_baseptrs, i32 0, i32 3
+// CHECK: store ptr %[[NESTED_ALLOCATABLE_MEMBER_BADDR_GEP]], ptr %[[BASE_PTRS]], align 8
+// CHECK: %[[OFFLOAD_PTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_ptrs, i32 0, i32 3
+// CHECK: store ptr %[[ARR_OFFSET]], ptr %[[OFFLOAD_PTRS]], align 8
diff --git a/mlir/test/Target/LLVMIR/omptarget-fortran-allocatable-types-host.mlir b/mlir/test/Target/LLVMIR/omptarget-fortran-allocatable-types-host.mlir
index 6fe77bd228ef2f..0db856b1b0fa42 100644
--- a/mlir/test/Target/LLVMIR/omptarget-fortran-allocatable-types-host.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-fortran-allocatable-types-host.mlir
@@ -1,9 +1,9 @@
// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
// This test checks the offload sizes, map types and base pointers and pointers
-// provided to the OpenMP kernel argument structure are correct when lowering
-// to LLVM-IR from MLIR when a fortran allocatable descriptor type is provided
-// alongside the omp.map.info, the test utilises mapping of array sections,
+// provided to the OpenMP kernel argument structure are correct when lowering
+// to LLVM-IR from MLIR when a fortran allocatable descriptor type is provided
+// alongside the omp.map.info, the test utilises mapping of array sections,
// full arrays and individual allocated scalars.
module attributes {omp.is_target_device = false, omp.target_triples = ["amdgcn-amd-amdhsa"]} {
@@ -59,9 +59,9 @@ module attributes {omp.is_target_device = false, omp.target_triples = ["amdgcn-a
// CHECK: @[[FULL_ARR_GLOB:.*]] = internal global { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] } undef
// CHECK: @[[ARR_SECT_GLOB:.*]] = internal global { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] } undef
-// CHECK: @.offloa...
[truncated]
|
|
This portion of the PR stack has remained largely unchanged, the only alteration is incorporating the final set of comments from the last iteration of the PR stack. |
TIFitis
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Added couple of nit comments. Otherwise happy with the patch. Thank you for the amazing work :)
| if (indexAttr.size() == 1) | ||
| if (auto mapOp = | ||
| dyn_cast<omp::MapInfoOp>(mapInfo.getMembers()[0].getDefiningOp())) | ||
| if (auto mapOp = mlir::dyn_cast<mlir::omp::MapInfoOp>( |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is there a scenario where the definingOp can be anything other than a MapInfoOp? If not, you can take get rid of the if statement and replace the dyn_cast with a cast.
| auto mapOp = dyn_cast<omp::MapInfoOp>(mapData.MapClause[mapDataIndex]); | ||
| int firstMemberIdx = getMapDataMemberIdx( | ||
| mapData, getFirstOrLastMappedMemberPtr(mapOp, true)); | ||
| // NOTE/TODO: Should perhaps use OriginalValue here instead of Pointers to |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is it possible to determine this definitively in this patch itself?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Unfortunately not in a way I can currently think of, but I've removed the comment for the time being, I imagine leaving it as pointer for now should be fine.
skatrak
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thank you Andrew, this looks good. I have a couple small suggestions, but nothing major, so I'm approving it. Please address Akash's comments as well before merging.
| SmallVector<APInt> values; | ||
| ArrayAttr &membersIdx) { | ||
| SmallVector<Attribute> values, memberIdxs; | ||
| int64_t value; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nit: Make value local to parseIndices.
| SmallVector<std::string> idxs; | ||
| for (auto [i, v] : llvm::enumerate(membersIdx)) { | ||
| auto memberIdx = mlir::cast<mlir::ArrayAttr>(v); | ||
| p << "["; | ||
| int rowOffset = i * shape[1]; | ||
| for (int j = 0; j < shape[1]; ++j) { | ||
| p << membersIdx.getValues<int32_t>()[rowOffset + j]; | ||
| if ((j + 1) < shape[1]) | ||
| p << ","; | ||
| } | ||
| p << "]"; | ||
|
|
||
| if ((i + 1) < shape[0]) | ||
| for (auto v2 : memberIdx.getValue()) | ||
| idxs.push_back( | ||
| std::to_string(mlir::cast<mlir::IntegerAttr>(v2).getInt())); | ||
| p << llvm::join(idxs, ",") << "]"; | ||
| if ((i + 1) < membersIdx.getValue().size()) | ||
| p << ", "; | ||
| idxs.clear(); | ||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think this can still be simplified a bit. Something like this:
llvm::interleaveComma(membersIdx, p, [&p](Attribute v) {
p << "[";
auto memberIdx = cast<ArrayAttr>(v);
llvm::interleaveComma(
memberIdx.getValue(), p,
[&p](Attribute v2) { p << cast<IntegerAttr>(v2).getInt(); });
p << "]";
});| bool first) { | ||
| DenseIntElementsAttr indexAttr = mapInfo.getMembersIndexAttr(); | ||
|
|
||
| static mlir::omp::MapInfoOp |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nit: Here and elsewhere in lines added to this file, we don't need to specify the mlir:: namespace.
| static mlir::omp::MapInfoOp | |
| static omp::MapInfoOp |
| // If we're currently mapping a pointer to a block of data, we must | ||
| // initially map the pointer, and then attatch/bind the data with a | ||
| // subsequent map to the pointer. This segment of code generates the | ||
| // pointer mapping, which can in certain cases can be optimised |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
| // pointer mapping, which can in certain cases can be optimised | |
| // pointer mapping, which can in certain cases be optimised |
| if (checkIfPointerMap(memberClause)) | ||
| combinedInfo.BasePointers.emplace_back( | ||
| mapData.BasePointers[memberDataIdx]); | ||
| else | ||
| combinedInfo.BasePointers.emplace_back( | ||
| mapData.BasePointers[mapDataIndex]); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nit: Consider refactoring this a bit, since at the moment it's a bit difficult to tell at first glance what the difference is between the two branches. Feel free to ignore if you disagree.
| if (checkIfPointerMap(memberClause)) | |
| combinedInfo.BasePointers.emplace_back( | |
| mapData.BasePointers[memberDataIdx]); | |
| else | |
| combinedInfo.BasePointers.emplace_back( | |
| mapData.BasePointers[mapDataIndex]); | |
| uint64_t basePointerIndex = checkIfPointerMap(memberClause)? memberDataIdx : mapDataIndex; | |
| combinedInfo.BasePointers.emplace_back(mapData.BasePointers[basePointerIndex]); |
| // This test checks the offload sizes, map types and base pointers and pointers | ||
| // provided to the OpenMP kernel argument structure are correct when lowering | ||
| // to LLVM-IR from MLIR when performing explicit member mapping of a record type | ||
| // that includes fortran allocatables in various locations of the record types | ||
| // hierarchy. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nit: Could this test be described in terms of omp, llvm or builtin dialect MLIR operations and types? This is a test of the OpenMP dialect, so it's odd to find references to Fortran allocatables when the important thing here is that we're translating omp.map.info operations taking arguments of some specific types or perhaps containing a var_ptr_ptr argument (not sure what exactly the new thing being tested by this is, in terms of MLIR input).
In general, it looks like names in the test could be simplified too, since we don't need them to look like the raw output of Flang lowering.
This is one of 3 PRs in a PR stack that aims to add support for explicit mapping of
allocatable members in derived types.
The primary changes in this PR are the OpenMPToLLVMIRTranslation.cpp changes,
which are small and seek to alter the current member mapping to add an
additional map insertion for pointers. Effectively, if the member is a pointer
(currently indicated by having a varPtrPtr field) we add an additional map for
the pointer and then alter the subsequent mapping of the member (the data)
to utilise the member rather than the parents base pointer. This appears to be
necessary in certain cases when mapping pointer data within record types to
avoid segfaulting on device (due to incorrect data mapping). In general this
record type mapping may be simplifiable in the future.
There are also additions of tests which should help to showcase the affect
of the changes above.