diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index f28454075f1d3..8edec990eaaba 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -4084,12 +4084,13 @@ static omp::MapInfoOp getFirstOrLastMappedMemberPtr(omp::MapInfoOp mapInfo, /// /// Fortran /// map(tofrom: array(2:5, 3:2)) -/// or -/// C++ -/// map(tofrom: array[1:4][2:3]) +/// /// We must calculate the initial pointer offset to pass across, this function /// performs this using bounds. /// +/// TODO/WARNING: This only supports Fortran's column major indexing currently +/// as is noted in the note below and comments in the function, we must extend +/// this function when we add a C++ frontend. /// NOTE: which while specified in row-major order it currently needs to be /// flipped for Fortran's column order array allocation and access (as /// opposed to C++'s row-major, hence the backwards processing where order is @@ -4125,46 +4126,28 @@ calculateBoundsOffset(LLVM::ModuleTranslation &moduleTranslation, // with a pointer that's being treated like an array and we have the // underlying type e.g. an i32, or f64 etc, e.g. a fortran descriptor base // address (pointer pointing to the actual data) so we must caclulate the - // offset using a single index which the following two loops attempts to - // compute. - - // Calculates the size offset we need to make per row e.g. first row or - // column only needs to be offset by one, but the next would have to be - // the previous row/column offset multiplied by the extent of current row. + // offset using a single index which the following loop attempts to + // compute using the standard column-major algorithm e.g for a 3D array: // - // For example ([1][10][100]): + // ((((c_idx * b_len) + b_idx) * a_len) + a_idx) // - // - First row/column we move by 1 for each index increment - // - Second row/column we move by 1 (first row/column) * 10 (extent/size of - // current) for 10 for each index increment - // - Third row/column we would move by 10 (second row/column) * - // (extent/size of current) 100 for 1000 for each index increment - std::vector dimensionIndexSizeOffset{builder.getInt64(1)}; - for (size_t i = 1; i < bounds.size(); ++i) { - if (auto boundOp = dyn_cast_if_present( - bounds[i].getDefiningOp())) { - dimensionIndexSizeOffset.push_back(builder.CreateMul( - moduleTranslation.lookupValue(boundOp.getExtent()), - dimensionIndexSizeOffset[i - 1])); - } - } - - // Now that we have calculated how much we move by per index, we must - // multiply each lower bound offset in indexes by the size offset we - // have calculated in the previous and accumulate the results to get - // our final resulting offset. + // It is of note that it's doing column-major rather than row-major at the + // moment, but having a way for the frontend to indicate which major format + // to use or standardizing/canonicalizing the order of the bounds to compute + // the offset may be useful in the future when there's other frontends with + // different formats. + std::vector dimensionIndexSizeOffset; for (int i = bounds.size() - 1; i >= 0; --i) { if (auto boundOp = dyn_cast_if_present( bounds[i].getDefiningOp())) { - if (idx.empty()) - idx.emplace_back(builder.CreateMul( - moduleTranslation.lookupValue(boundOp.getLowerBound()), - dimensionIndexSizeOffset[i])); + if (i == ((int)bounds.size() - 1)) + idx.emplace_back( + moduleTranslation.lookupValue(boundOp.getLowerBound())); else idx.back() = builder.CreateAdd( - idx.back(), builder.CreateMul(moduleTranslation.lookupValue( - boundOp.getLowerBound()), - dimensionIndexSizeOffset[i])); + builder.CreateMul(idx.back(), moduleTranslation.lookupValue( + boundOp.getExtent())), + moduleTranslation.lookupValue(boundOp.getLowerBound())); } } } diff --git a/mlir/test/Target/LLVMIR/omptarget-record-type-with-ptr-member-host.mlir b/mlir/test/Target/LLVMIR/omptarget-record-type-with-ptr-member-host.mlir index a1e415c35e4b6..9640f03311af7 100644 --- a/mlir/test/Target/LLVMIR/omptarget-record-type-with-ptr-member-host.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-record-type-with-ptr-member-host.mlir @@ -81,9 +81,8 @@ module attributes {omp.is_target_device = false, omp.target_triples = ["amdgcn-a // CHECK: %[[ARR_SECT_SIZE:.*]] = mul i64 %[[ARR_SECT_SIZE1]], 4 // CHECK: %[[LFULL_ARR:.*]] = load ptr, ptr @full_arr, align 8 // CHECK: %[[FULL_ARR_PTR:.*]] = getelementptr inbounds float, ptr %[[LFULL_ARR]], i64 0 -// CHECK: %[[ARR_SECT_OFFSET1:.*]] = mul i64 %[[ARR_SECT_OFFSET2]], 1 // CHECK: %[[LARR_SECT:.*]] = load ptr, ptr @sect_arr, align 8 -// CHECK: %[[ARR_SECT_PTR:.*]] = getelementptr inbounds i32, ptr %[[LARR_SECT]], i64 %[[ARR_SECT_OFFSET1]] +// CHECK: %[[ARR_SECT_PTR:.*]] = getelementptr inbounds i32, ptr %[[LARR_SECT]], i64 %[[ARR_SECT_OFFSET2]] // CHECK: %[[SCALAR_PTR_LOAD:.*]] = load ptr, ptr %[[SCALAR_BASE]], align 8 // CHECK: %[[FULL_ARR_DESC_SIZE:.*]] = sdiv exact i64 48, ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) // CHECK: %[[FULL_ARR_SIZE_CMP:.*]] = icmp eq ptr %[[FULL_ARR_PTR]], null diff --git a/offload/test/offloading/fortran/descriptor-array-slice-map.f90 b/offload/test/offloading/fortran/descriptor-array-slice-map.f90 new file mode 100644 index 0000000000000..69abb320adc35 --- /dev/null +++ b/offload/test/offloading/fortran/descriptor-array-slice-map.f90 @@ -0,0 +1,61 @@ +! Offloading test which aims to test that an allocatable/descriptor type map +! will allow the appropriate slicing behaviour. +! REQUIRES: flang, amdgpu + +subroutine slice_writer(n, a, b, c) + implicit none + integer, intent(in) :: n + real(8), intent(in) :: a(n) + real(8), intent(in) :: b(n) + real(8), intent(out) :: c(n) + integer :: i + + !$omp target teams distribute parallel do + do i=1,n + c(i) = b(i) + a(i) + end do +end subroutine slice_writer + +! RUN: %libomptarget-compile-fortran-run-and-check-generic +program main + implicit none + real(kind=8), allocatable :: a(:,:,:) + integer :: i, j, k, idx, idx1, idx2, idx3 + + i=50 + j=100 + k=2 + + allocate(a(1:i,1:j,1:k)) + + do idx1=1, i + do idx2=1, j + do idx3=1, k + a(idx1,idx2,idx3) = idx2 + end do + end do + end do + + do idx=1,k + !$omp target enter data map(alloc: a(1:i,:, idx)) + + !$omp target update to(a(1:i, 1:30, idx), & + !$omp& a(1:i, 61:100, idx)) + + call slice_writer(i, a(:, 1, idx), a(:, 61, idx), a(:, 31, idx)) + call slice_writer(i, a(:, 30, idx), a(:, 100, idx), a(:, 60, idx)) + + !$omp target update from(a(1:i, 31:60, idx)) + !$omp target exit data map(delete: a(1:i, :, idx)) + + print *, a(1, 31, idx), a(2, 31, idx), a(i, 31, idx) + print *, a(1, 60, idx), a(2, 60, idx), a(i, 60, idx) + enddo + + deallocate(a) +end program + +! CHECK: 62. 62. 62. +! CHECK: 130. 130. 130. +! CHECK: 62. 62. 62. +! CHECK: 130. 130. 130.