diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td index 66f63fc02fe2f..60acf59a7d93e 100644 --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td @@ -879,7 +879,7 @@ def MapInfoOp : OpenMP_Op<"map.info", [AttrSizedOperandSegments]> { TypeAttr:$var_type, Optional:$var_ptr_ptr, Variadic:$members, - OptionalAttr:$members_index, + OptionalAttr:$members_index, Variadic:$bounds, /* rank-0 to rank-{n-1} */ OptionalAttr:$map_type, OptionalAttr:$map_capture_type, diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp index d516c8d9e0be6..f9024dd93ae14 100644 --- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp +++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp @@ -1392,16 +1392,15 @@ static void printMapClause(OpAsmPrinter &p, Operation *op, } static ParseResult parseMembersIndex(OpAsmParser &parser, - DenseIntElementsAttr &membersIdx) { - SmallVector values; + ArrayAttr &membersIdx) { + SmallVector values, memberIdxs; int64_t value; - int64_t shape[2] = {0, 0}; - unsigned shapeTmp = 0; + auto parseIndices = [&]() -> ParseResult { if (parser.parseInteger(value)) return failure(); - shapeTmp++; - values.push_back(APInt(32, value)); + values.push_back(IntegerAttr::get(parser.getBuilder().getIntegerType(64), + mlir::APInt(64, value))); return success(); }; @@ -1415,51 +1414,32 @@ static ParseResult parseMembersIndex(OpAsmParser &parser, if (failed(parser.parseRSquare())) return failure(); - // Only set once, if any indices are not the same size - // we error out in the next check as that's unsupported - if (shape[1] == 0) - shape[1] = shapeTmp; - - // Verify that the recently parsed list is equal to the - // first one we parsed, they must be equal lengths to - // keep the rectangular shape DenseIntElementsAttr - // requires - if (shapeTmp != shape[1]) - return failure(); - - shapeTmp = 0; - shape[0]++; + memberIdxs.push_back(ArrayAttr::get(parser.getContext(), values)); + values.clear(); } while (succeeded(parser.parseOptionalComma())); - if (!values.empty()) { - ShapedType valueType = - VectorType::get(shape, IntegerType::get(parser.getContext(), 32)); - membersIdx = DenseIntElementsAttr::get(valueType, values); - } + if (!memberIdxs.empty()) + membersIdx = ArrayAttr::get(parser.getContext(), memberIdxs); return success(); } static void printMembersIndex(OpAsmPrinter &p, MapInfoOp op, - DenseIntElementsAttr membersIdx) { - llvm::ArrayRef shape = membersIdx.getShapedType().getShape(); - assert(shape.size() <= 2); - + ArrayAttr membersIdx) { if (!membersIdx) return; - for (int i = 0; i < shape[0]; ++i) { + SmallVector idxs; + for (auto [i, v] : llvm::enumerate(membersIdx)) { + auto memberIdx = mlir::cast(v); p << "["; - int rowOffset = i * shape[1]; - for (int j = 0; j < shape[1]; ++j) { - p << membersIdx.getValues()[rowOffset + j]; - if ((j + 1) < shape[1]) - p << ","; - } - p << "]"; - - if ((i + 1) < shape[0]) + for (auto v2 : memberIdx.getValue()) + idxs.push_back( + std::to_string(mlir::cast(v2).getInt())); + p << llvm::join(idxs, ",") << "]"; + if ((i + 1) < membersIdx.getValue().size()) p << ", "; + idxs.clear(); } } diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index 19d80fbbd699b..a8995ae9646d6 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -2468,51 +2468,45 @@ static int getMapDataMemberIdx(MapInfoData &mapData, omp::MapInfoOp memberOp) { return std::distance(mapData.MapClause.begin(), res); } -static omp::MapInfoOp getFirstOrLastMappedMemberPtr(omp::MapInfoOp mapInfo, - bool first) { - DenseIntElementsAttr indexAttr = mapInfo.getMembersIndexAttr(); - +static mlir::omp::MapInfoOp +getFirstOrLastMappedMemberPtr(mlir::omp::MapInfoOp mapInfo, bool first) { + mlir::ArrayAttr indexAttr = mapInfo.getMembersIndexAttr(); // Only 1 member has been mapped, we can return it. if (indexAttr.size() == 1) - if (auto mapOp = - dyn_cast(mapInfo.getMembers()[0].getDefiningOp())) + if (auto mapOp = mlir::dyn_cast( + mapInfo.getMembers()[0].getDefiningOp())) return mapOp; - llvm::ArrayRef shape = indexAttr.getShapedType().getShape(); - llvm::SmallVector indices(shape[0]); + llvm::SmallVector indices(indexAttr.size()); std::iota(indices.begin(), indices.end(), 0); llvm::sort(indices.begin(), indices.end(), [&](const size_t a, const size_t b) { - auto indexValues = indexAttr.getValues(); - for (int i = 0; i < shape[1]; ++i) { - int aIndex = indexValues[a * shape[1] + i]; - int bIndex = indexValues[b * shape[1] + i]; + auto memberIndicesA = mlir::cast(indexAttr[a]); + auto memberIndicesB = mlir::cast(indexAttr[b]); + for (const auto it : llvm::zip(memberIndicesA, memberIndicesB)) { + int64_t aIndex = + mlir::cast(std::get<0>(it)).getInt(); + int64_t bIndex = + mlir::cast(std::get<1>(it)).getInt(); if (aIndex == bIndex) continue; - if (aIndex != -1 && bIndex == -1) - return false; - - if (aIndex == -1 && bIndex != -1) - return true; - - // A is earlier in the record type layout than B if (aIndex < bIndex) return first; - if (bIndex < aIndex) + if (aIndex > bIndex) return !first; } - // Iterated the entire list and couldn't make a decision, all - // elements were likely the same. Return false, since the sort - // comparator should return false for equal elements. - return false; + // Iterated the up until the end of the smallest member and + // they were found to be equal up to that point, so select + // the member with the lowest index count, so the "parent" + return memberIndicesA.size() < memberIndicesB.size(); }); - return llvm::cast( + return llvm::cast( mapInfo.getMembers()[indices.front()].getDefiningOp()); } @@ -2663,6 +2657,8 @@ static llvm::omp::OpenMPOffloadMappingFlags mapParentWithMembers( auto mapOp = dyn_cast(mapData.MapClause[mapDataIndex]); int firstMemberIdx = getMapDataMemberIdx( mapData, getFirstOrLastMappedMemberPtr(mapOp, true)); + // NOTE/TODO: Should perhaps use OriginalValue here instead of Pointers to + // avoid offset or any manipulations interfering with the calculation. lowAddr = builder.CreatePointerCast(mapData.Pointers[firstMemberIdx], builder.getPtrTy()); int lastMemberIdx = getMapDataMemberIdx( @@ -2680,17 +2676,8 @@ static llvm::omp::OpenMPOffloadMappingFlags mapParentWithMembers( /*isSigned=*/false); combinedInfo.Sizes.push_back(size); - // TODO: This will need to be expanded to include the whole host of logic for - // the map flags that Clang currently supports (e.g. it should take the map - // flag of the parent map flag, remove the OMP_MAP_TARGET_PARAM and do some - // further case specific flag modifications). For the moment, it handles what - // we support as expected. - llvm::omp::OpenMPOffloadMappingFlags mapFlag = - llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO; - llvm::omp::OpenMPOffloadMappingFlags memberOfFlag = ompBuilder.getMemberOfFlag(combinedInfo.BasePointers.size() - 1); - ompBuilder.setCorrectMemberOfFlag(mapFlag, memberOfFlag); // This creates the initial MEMBER_OF mapping that consists of // the parent/top level container (same as above effectively, except @@ -2699,6 +2686,12 @@ static llvm::omp::OpenMPOffloadMappingFlags mapParentWithMembers( // only relevant if the structure in its totality is being mapped, // otherwise the above suffices. if (!parentClause.getPartialMap()) { + // TODO: This will need to be expanded to include the whole host of logic + // for the map flags that Clang currently supports (e.g. it should do some + // further case specific flag modifications). For the moment, it handles + // what we support as expected. + llvm::omp::OpenMPOffloadMappingFlags mapFlag = mapData.Types[mapDataIndex]; + ompBuilder.setCorrectMemberOfFlag(mapFlag, memberOfFlag); combinedInfo.Types.emplace_back(mapFlag); combinedInfo.DevicePointers.emplace_back( llvm::OpenMPIRBuilder::DeviceInfoTy::None); @@ -2749,6 +2742,31 @@ static void processMapMembersWithParent( assert(memberDataIdx >= 0 && "could not find mapped member of structure"); + // If we're currently mapping a pointer to a block of data, we must + // initially map the pointer, and then attatch/bind the data with a + // subsequent map to the pointer. This segment of code generates the + // pointer mapping, which can in certain cases can be optimised + // out as Clang currently does in its lowering. However, for the moment + // we do not do so, in part as we currently have substantially less + // information on the data being mapped at this stage. + if (checkIfPointerMap(memberClause)) { + auto mapFlag = llvm::omp::OpenMPOffloadMappingFlags( + memberClause.getMapType().value()); + mapFlag &= ~llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM; + mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF; + ompBuilder.setCorrectMemberOfFlag(mapFlag, memberOfFlag); + combinedInfo.Types.emplace_back(mapFlag); + combinedInfo.DevicePointers.emplace_back( + llvm::OpenMPIRBuilder::DeviceInfoTy::None); + combinedInfo.Names.emplace_back( + LLVM::createMappingInformation(memberClause.getLoc(), ompBuilder)); + combinedInfo.BasePointers.emplace_back( + mapData.BasePointers[mapDataIndex]); + combinedInfo.Pointers.emplace_back(mapData.BasePointers[memberDataIdx]); + combinedInfo.Sizes.emplace_back(builder.getInt64( + moduleTranslation.getLLVMModule()->getDataLayout().getPointerSize())); + } + // Same MemberOfFlag to indicate its link with parent and other members // of. auto mapFlag = @@ -2764,7 +2782,12 @@ static void processMapMembersWithParent( mapData.DevicePointers[memberDataIdx]); combinedInfo.Names.emplace_back( LLVM::createMappingInformation(memberClause.getLoc(), ompBuilder)); - combinedInfo.BasePointers.emplace_back(mapData.BasePointers[mapDataIndex]); + if (checkIfPointerMap(memberClause)) + combinedInfo.BasePointers.emplace_back( + mapData.BasePointers[memberDataIdx]); + else + combinedInfo.BasePointers.emplace_back( + mapData.BasePointers[mapDataIndex]); combinedInfo.Pointers.emplace_back(mapData.Pointers[memberDataIdx]); combinedInfo.Sizes.emplace_back(mapData.Sizes[memberDataIdx]); } diff --git a/mlir/test/Target/LLVMIR/omptarget-fortran-allocatable-record-type-mapping-host.mlir b/mlir/test/Target/LLVMIR/omptarget-fortran-allocatable-record-type-mapping-host.mlir new file mode 100644 index 0000000000000..8c2d13fcfd638 --- /dev/null +++ b/mlir/test/Target/LLVMIR/omptarget-fortran-allocatable-record-type-mapping-host.mlir @@ -0,0 +1,66 @@ +// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s + +// This test checks the offload sizes, map types and base pointers and pointers +// provided to the OpenMP kernel argument structure are correct when lowering +// to LLVM-IR from MLIR when performing explicit member mapping of a record type +// that includes fortran allocatables in various locations of the record types +// hierarchy. + +module attributes {omp.is_target_device = false, omp.target_triples = ["amdgcn-amd-amdhsa"]} { + llvm.func @omp_nested_derived_type_alloca_map(%arg0: !llvm.ptr) { + %0 = llvm.mlir.constant(4 : index) : i64 + %1 = llvm.mlir.constant(1 : index) : i64 + %2 = llvm.mlir.constant(2 : index) : i64 + %3 = llvm.mlir.constant(0 : index) : i64 + %4 = llvm.mlir.constant(6 : index) : i64 + %5 = omp.map.bounds lower_bound(%3 : i64) upper_bound(%0 : i64) extent(%0 : i64) stride(%1 : i64) start_idx(%3 : i64) {stride_in_bytes = true} + %6 = llvm.getelementptr %arg0[0, 6] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTtop_layer", (f32, struct<(ptr, i64, i32, i8, i8, i8, i8)>, array<10 x i32>, f32, struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>, i32, struct<"_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTmiddle_layer", (f32, array<10 x i32>, struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>, i32)>)> + %7 = llvm.getelementptr %6[0, 2] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTmiddle_layer", (f32, array<10 x i32>, struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>, i32)> + %8 = llvm.getelementptr %7[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> + %9 = omp.map.info var_ptr(%7 : !llvm.ptr, i32) var_ptr_ptr(%8 : !llvm.ptr) map_clauses(tofrom) capture(ByRef) bounds(%5) -> !llvm.ptr {name = ""} + %10 = omp.map.info var_ptr(%7 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = "one_l%nest%array_k"} + %11 = omp.map.info var_ptr(%arg0 : !llvm.ptr, !llvm.struct<"_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTtop_layer", (f32, struct<(ptr, i64, i32, i8, i8, i8, i8)>, array<10 x i32>, f32, struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>, i32, struct<"_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTmiddle_layer", (f32, array<10 x i32>, struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>, i32)>)>) map_clauses(tofrom) capture(ByRef) members(%10, %9 : [6,2], [6,2,0] : !llvm.ptr, !llvm.ptr) -> !llvm.ptr {name = "one_l", partial_map = true} + omp.target map_entries(%10 -> %arg1, %9 -> %arg2, %11 -> %arg3 : !llvm.ptr, !llvm.ptr, !llvm.ptr) { + omp.terminator + } + llvm.return + } +} + +// CHECK: @.offload_sizes{{.*}} = private unnamed_addr constant [4 x i64] [i64 0, i64 48, i64 8, i64 20] +// CHECK: @.offload_maptypes{{.*}} = private unnamed_addr constant [4 x i64] [i64 32, i64 281474976710659, i64 281474976710659, i64 281474976710675] + +// CHECK: define void @omp_nested_derived_type_alloca_map(ptr %[[ARG:.*]]) { + +// CHECK: %[[NESTED_DTYPE_MEMBER_GEP:.*]] = getelementptr %_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTtop_layer, ptr %[[ARG]], i32 0, i32 6 +// CHECK: %[[NESTED_ALLOCATABLE_MEMBER_GEP:.*]] = getelementptr %_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTmiddle_layer, ptr %[[NESTED_DTYPE_MEMBER_GEP]], i32 0, i32 2 +// CHECK: %[[NESTED_ALLOCATABLE_MEMBER_BADDR_GEP:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr %[[NESTED_ALLOCATABLE_MEMBER_GEP]], i32 0, i32 0 +// CHECK: %[[NESTED_ALLOCATABLE_MEMBER_BADDR_LOAD:.*]] = load ptr, ptr %[[NESTED_ALLOCATABLE_MEMBER_BADDR_GEP]], align 8 +// CHECK: %[[ARR_OFFSET:.*]] = getelementptr inbounds i32, ptr %[[NESTED_ALLOCATABLE_MEMBER_BADDR_LOAD]], i64 0 +// CHECK: %[[DTYPE_SIZE_SEGMENT_CALC_1:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr %[[NESTED_ALLOCATABLE_MEMBER_GEP]], i64 1 +// CHECK: %[[DTYPE_SIZE_SEGMENT_CALC_2:.*]] = ptrtoint ptr %[[DTYPE_SIZE_SEGMENT_CALC_1]] to i64 +// CHECK: %[[DTYPE_SIZE_SEGMENT_CALC_3:.*]] = ptrtoint ptr %[[NESTED_ALLOCATABLE_MEMBER_GEP]] to i64 +// CHECK: %[[DTYPE_SIZE_SEGMENT_CALC_4:.*]] = sub i64 %[[DTYPE_SIZE_SEGMENT_CALC_2]], %[[DTYPE_SIZE_SEGMENT_CALC_3]] +// CHECK: %[[DTYPE_SIZE_SEGMENT_CALC_5:.*]] = sdiv exact i64 %[[DTYPE_SIZE_SEGMENT_CALC_4]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) + +// CHECK: %[[BASE_PTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_baseptrs, i32 0, i32 0 +// CHECK: store ptr %[[ARG]], ptr %[[BASE_PTRS]], align 8 +// CHECK: %[[OFFLOAD_PTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_ptrs, i32 0, i32 0 +// CHECK: store ptr %[[NESTED_ALLOCATABLE_MEMBER_GEP]], ptr %[[OFFLOAD_PTRS]], align 8 +// CHECK: %[[OFFLOAD_SIZES:.*]] = getelementptr inbounds [4 x i64], ptr %.offload_sizes, i32 0, i32 0 +// CHECK: store i64 %[[DTYPE_SIZE_SEGMENT_CALC_5]], ptr %[[OFFLOAD_SIZES]], align 8 + +// CHECK: %[[BASE_PTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_baseptrs, i32 0, i32 1 +// CHECK: store ptr %[[ARG]], ptr %[[BASE_PTRS]], align 8 +// CHECK: %[[OFFLOAD_PTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_ptrs, i32 0, i32 1 +// CHECK: store ptr %[[NESTED_ALLOCATABLE_MEMBER_GEP]], ptr %[[OFFLOAD_PTRS]], align 8 + +// CHECK: %[[BASE_PTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_baseptrs, i32 0, i32 2 +// CHECK: store ptr %[[ARG]], ptr %[[BASE_PTRS]], align 8 +// CHECK: %[[OFFLOAD_PTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_ptrs, i32 0, i32 2 +// CHECK: store ptr %[[NESTED_ALLOCATABLE_MEMBER_BADDR_GEP]], ptr %[[OFFLOAD_PTRS]], align 8 + +// CHECK: %[[BASE_PTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_baseptrs, i32 0, i32 3 +// CHECK: store ptr %[[NESTED_ALLOCATABLE_MEMBER_BADDR_GEP]], ptr %[[BASE_PTRS]], align 8 +// CHECK: %[[OFFLOAD_PTRS:.*]] = getelementptr inbounds [4 x ptr], ptr %.offload_ptrs, i32 0, i32 3 +// CHECK: store ptr %[[ARR_OFFSET]], ptr %[[OFFLOAD_PTRS]], align 8 diff --git a/mlir/test/Target/LLVMIR/omptarget-fortran-allocatable-types-host.mlir b/mlir/test/Target/LLVMIR/omptarget-fortran-allocatable-types-host.mlir index 6fe77bd228ef2..0db856b1b0fa4 100644 --- a/mlir/test/Target/LLVMIR/omptarget-fortran-allocatable-types-host.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-fortran-allocatable-types-host.mlir @@ -1,9 +1,9 @@ // RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s // This test checks the offload sizes, map types and base pointers and pointers -// provided to the OpenMP kernel argument structure are correct when lowering -// to LLVM-IR from MLIR when a fortran allocatable descriptor type is provided -// alongside the omp.map.info, the test utilises mapping of array sections, +// provided to the OpenMP kernel argument structure are correct when lowering +// to LLVM-IR from MLIR when a fortran allocatable descriptor type is provided +// alongside the omp.map.info, the test utilises mapping of array sections, // full arrays and individual allocated scalars. module attributes {omp.is_target_device = false, omp.target_triples = ["amdgcn-amd-amdhsa"]} { @@ -59,9 +59,9 @@ module attributes {omp.is_target_device = false, omp.target_triples = ["amdgcn-a // CHECK: @[[FULL_ARR_GLOB:.*]] = internal global { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] } undef // CHECK: @[[ARR_SECT_GLOB:.*]] = internal global { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] } undef -// CHECK: @.offload_sizes = private unnamed_addr constant [9 x i64] [i64 0, i64 48, i64 0, i64 0, i64 48, i64 0, i64 0, i64 24, i64 4] -// CHECK: @.offload_maptypes = private unnamed_addr constant [9 x i64] [i64 32, i64 281474976710657, i64 281474976710675, i64 32, i64 1125899906842625, i64 1125899906842643, i64 32, i64 1970324836974593, i64 1970324836974611] -// CHECK: @.offload_mapnames = private constant [9 x ptr] [ptr @{{.*}}, ptr @{{.*}}, ptr @{{.*}}, ptr @{{.*}}, ptr @{{.*}}, ptr @{{.*}}, ptr @{{.*}}, ptr @{{.*}}, ptr @{{.*}}] +// CHECK: @.offload_sizes = private unnamed_addr constant [12 x i64] [i64 0, i64 48, i64 8, i64 0, i64 0, i64 48, i64 8, i64 0, i64 0, i64 24, i64 8, i64 4] +// CHECK: @.offload_maptypes = private unnamed_addr constant [12 x i64] [i64 32, i64 281474976710659, i64 281474976710659, i64 281474976710675, i64 32, i64 1407374883553283, i64 1407374883553283, i64 1407374883553299, i64 32, i64 2533274790395907, i64 2533274790395907, i64 2533274790395923] +// CHECK: @.offload_mapnames = private constant [12 x ptr] [ptr @{{.*}}, ptr @{{.*}}, ptr @{{.*}}, ptr @{{.*}}, ptr @{{.*}}, ptr @{{.*}}, ptr @{{.*}}, ptr @{{.*}}, ptr @{{.*}}, ptr @{{.*}}, ptr @{{.*}}, ptr @{{.*}}] // CHECK: define void @_QQmain() // CHECK: %[[SCALAR_ALLOCA:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8 }, i64 1, align 8 @@ -93,53 +93,63 @@ module attributes {omp.is_target_device = false, omp.target_triples = ["amdgcn-a // CHECK: %[[SCALAR_DESC_SZ1:.*]] = sub i64 %[[SCALAR_DESC_SZ3]], %[[SCALAR_DESC_SZ2]] // CHECK: %[[SCALAR_DESC_SZ:.*]] = sdiv exact i64 %[[SCALAR_DESC_SZ1]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_baseptrs, i32 0, i32 0 +// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [12 x ptr], ptr %.offload_baseptrs, i32 0, i32 0 // CHECK: store ptr @_QFEfull_arr, ptr %[[OFFLOADBASEPTRS]], align 8 -// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_ptrs, i32 0, i32 0 +// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [12 x ptr], ptr %.offload_ptrs, i32 0, i32 0 // CHECK: store ptr @_QFEfull_arr, ptr %[[OFFLOADPTRS]], align 8 - -// CHECK: %[[OFFLOADSIZES:.*]] = getelementptr inbounds [9 x i64], ptr %.offload_sizes, i32 0, i32 0 +// CHECK: %[[OFFLOADSIZES:.*]] = getelementptr inbounds [12 x i64], ptr %.offload_sizes, i32 0, i32 0 // CHECK: store i64 %[[FULL_ARR_DESC_SIZE]], ptr %[[OFFLOADSIZES]], align 8 - -// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_baseptrs, i32 0, i32 1 +// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [12 x ptr], ptr %.offload_baseptrs, i32 0, i32 1 +// CHECK: store ptr @_QFEfull_arr, ptr %[[OFFLOADBASEPTRS]], align 8 +// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [12 x ptr], ptr %.offload_ptrs, i32 0, i32 1 +// CHECK: store ptr @_QFEfull_arr, ptr %[[OFFLOADPTRS]], align 8 +// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [12 x ptr], ptr %.offload_baseptrs, i32 0, i32 2 // CHECK: store ptr @_QFEfull_arr, ptr %[[OFFLOADBASEPTRS]], align 8 -// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_ptrs, i32 0, i32 1 +// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [12 x ptr], ptr %.offload_ptrs, i32 0, i32 2 // CHECK: store ptr @_QFEfull_arr, ptr %[[OFFLOADPTRS]], align 8 -// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_baseptrs, i32 0, i32 2 +// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [12 x ptr], ptr %.offload_baseptrs, i32 0, i32 3 // CHECK: store ptr @_QFEfull_arr, ptr %[[OFFLOADBASEPTRS]], align 8 -// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_ptrs, i32 0, i32 2 +// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [12 x ptr], ptr %.offload_ptrs, i32 0, i32 3 // CHECK: store ptr %[[FULL_ARR_PTR]], ptr %[[OFFLOADPTRS]], align 8 -// CHECK: %[[OFFLOADSIZES:.*]] = getelementptr inbounds [9 x i64], ptr %.offload_sizes, i32 0, i32 2 +// CHECK: %[[OFFLOADSIZES:.*]] = getelementptr inbounds [12 x i64], ptr %.offload_sizes, i32 0, i32 3 // CHECK: store i64 %[[FULL_ARR_SIZE]], ptr %[[OFFLOADSIZES]], align 8 -// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_baseptrs, i32 0, i32 3 +// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [12 x ptr], ptr %.offload_baseptrs, i32 0, i32 4 // CHECK: store ptr @_QFEsect_arr, ptr %[[OFFLOADBASEPTRS]], align 8 -// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_ptrs, i32 0, i32 3 +// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [12 x ptr], ptr %.offload_ptrs, i32 0, i32 4 // CHECK: store ptr @_QFEsect_arr, ptr %[[OFFLOADPTRS]], align 8 -// CHECK: %[[OFFLOADSIZES:.*]] = getelementptr inbounds [9 x i64], ptr %.offload_sizes, i32 0, i32 3 +// CHECK: %[[OFFLOADSIZES:.*]] = getelementptr inbounds [12 x i64], ptr %.offload_sizes, i32 0, i32 4 // CHECK: store i64 %[[ARR_SECT_DESC_SIZE]], ptr %[[OFFLOADSIZES]], align 8 -// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_baseptrs, i32 0, i32 4 +// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [12 x ptr], ptr %.offload_baseptrs, i32 0, i32 5 +// CHECK: store ptr @_QFEsect_arr, ptr %[[OFFLOADBASEPTRS]], align 8 +// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [12 x ptr], ptr %.offload_ptrs, i32 0, i32 5 +// CHECK: store ptr @_QFEsect_arr, ptr %[[OFFLOADPTRS]], align 8 +// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [12 x ptr], ptr %.offload_baseptrs, i32 0, i32 6 // CHECK: store ptr @_QFEsect_arr, ptr %[[OFFLOADBASEPTRS]], align 8 -// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_ptrs, i32 0, i32 4 +// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [12 x ptr], ptr %.offload_ptrs, i32 0, i32 6 // CHECK: store ptr @_QFEsect_arr, ptr %[[OFFLOADPTRS]], align 8 -// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_baseptrs, i32 0, i32 5 +// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [12 x ptr], ptr %.offload_baseptrs, i32 0, i32 7 // CHECK: store ptr @_QFEsect_arr, ptr %[[OFFLOADBASEPTRS]], align 8 -// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_ptrs, i32 0, i32 5 +// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [12 x ptr], ptr %.offload_ptrs, i32 0, i32 7 // CHECK: store ptr %[[ARR_SECT_PTR]], ptr %[[OFFLOADPTRS]], align 8 -// CHECK: %[[OFFLOADSIZES:.*]] = getelementptr inbounds [9 x i64], ptr %.offload_sizes, i32 0, i32 5 +// CHECK: %[[OFFLOADSIZES:.*]] = getelementptr inbounds [12 x i64], ptr %.offload_sizes, i32 0, i32 7 // CHECK: store i64 %[[ARR_SECT_SIZE]], ptr %[[OFFLOADSIZES]], align 8 -// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_baseptrs, i32 0, i32 6 +// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [12 x ptr], ptr %.offload_baseptrs, i32 0, i32 8 // CHECK: store ptr %[[SCALAR_ALLOCA]], ptr %[[OFFLOADBASEPTRS]], align 8 -// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_ptrs, i32 0, i32 6 +// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [12 x ptr], ptr %.offload_ptrs, i32 0, i32 8 // CHECK: store ptr %[[SCALAR_ALLOCA]], ptr %[[OFFLOADPTRS]], align 8 -// CHECK: %[[OFFLOADSIZES:.*]] = getelementptr inbounds [9 x i64], ptr %.offload_sizes, i32 0, i32 6 +// CHECK: %[[OFFLOADSIZES:.*]] = getelementptr inbounds [12 x i64], ptr %.offload_sizes, i32 0, i32 8 // CHECK: store i64 %[[SCALAR_DESC_SZ]], ptr %[[OFFLOADSIZES]], align 8 -// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_baseptrs, i32 0, i32 7 +// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [12 x ptr], ptr %.offload_baseptrs, i32 0, i32 9 // CHECK: store ptr %[[SCALAR_ALLOCA]], ptr %[[OFFLOADBASEPTRS]], align 8 -// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_ptrs, i32 0, i32 7 +// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [12 x ptr], ptr %.offload_ptrs, i32 0, i32 9 // CHECK: store ptr %[[SCALAR_ALLOCA]], ptr %[[OFFLOADPTRS]], align 8 -// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_baseptrs, i32 0, i32 8 +// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [12 x ptr], ptr %.offload_baseptrs, i32 0, i32 10 // CHECK: store ptr %[[SCALAR_ALLOCA]], ptr %[[OFFLOADBASEPTRS]], align 8 -// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [9 x ptr], ptr %.offload_ptrs, i32 0, i32 8 +// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [12 x ptr], ptr %.offload_ptrs, i32 0, i32 10 +// CHECK: store ptr %[[SCALAR_BASE]], ptr %[[OFFLOADPTRS]], align 8 +// CHECK: %[[OFFLOADBASEPTRS:.*]] = getelementptr inbounds [12 x ptr], ptr %.offload_baseptrs, i32 0, i32 11 +// CHECK: store ptr %[[SCALAR_BASE]], ptr %[[OFFLOADBASEPTRS]], align 8 +// CHECK: %[[OFFLOADPTRS:.*]] = getelementptr inbounds [12 x ptr], ptr %.offload_ptrs, i32 0, i32 11 // CHECK: store ptr %[[SCALAR_PTR_LOAD]], ptr %[[OFFLOADPTRS]], align 8 diff --git a/mlir/test/Target/LLVMIR/omptarget-nested-record-type-mapping-host.mlir b/mlir/test/Target/LLVMIR/omptarget-nested-record-type-mapping-host.mlir index 8c1182c839a25..8837b42f70a44 100644 --- a/mlir/test/Target/LLVMIR/omptarget-nested-record-type-mapping-host.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-nested-record-type-mapping-host.mlir @@ -21,7 +21,7 @@ llvm.func @_QQmain() { %9 = llvm.getelementptr %4[0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(f32, array<10 x i32>, struct<(f32, i32)>, i32)> %10 = omp.map.bounds lower_bound(%2 : i64) upper_bound(%1 : i64) extent(%0 : i64) stride(%2 : i64) start_idx(%2 : i64) %11 = omp.map.info var_ptr(%9 : !llvm.ptr, !llvm.array<10 x i32>) map_clauses(tofrom) capture(ByRef) bounds(%10) -> !llvm.ptr - %12 = omp.map.info var_ptr(%4 : !llvm.ptr, !llvm.struct<(f32, array<10 x i32>, struct<(f32, i32)>, i32)>) map_clauses(tofrom) capture(ByRef) members(%6, %8, %11 : [3, -1], [2, 1], [1, -1] : !llvm.ptr, !llvm.ptr, !llvm.ptr) -> !llvm.ptr {partial_map = true} + %12 = omp.map.info var_ptr(%4 : !llvm.ptr, !llvm.struct<(f32, array<10 x i32>, struct<(f32, i32)>, i32)>) map_clauses(tofrom) capture(ByRef) members(%6, %8, %11 : [3], [2, 1], [1] : !llvm.ptr, !llvm.ptr, !llvm.ptr) -> !llvm.ptr {partial_map = true} omp.target map_entries(%6 -> %arg0, %8 -> %arg1, %11 -> %arg2, %12 -> %arg3 : !llvm.ptr, !llvm.ptr, !llvm.ptr, !llvm.ptr) { omp.terminator }