Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 19 additions & 36 deletions mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4084,12 +4084,13 @@ static omp::MapInfoOp getFirstOrLastMappedMemberPtr(omp::MapInfoOp mapInfo,
///
/// Fortran
/// map(tofrom: array(2:5, 3:2))
/// or
/// C++
/// map(tofrom: array[1:4][2:3])
///
/// We must calculate the initial pointer offset to pass across, this function
/// performs this using bounds.
///
/// TODO/WARNING: This only supports Fortran's column major indexing currently
/// as is noted in the note below and comments in the function, we must extend
/// this function when we add a C++ frontend.
/// NOTE: which while specified in row-major order it currently needs to be
/// flipped for Fortran's column order array allocation and access (as
/// opposed to C++'s row-major, hence the backwards processing where order is
Expand Down Expand Up @@ -4125,46 +4126,28 @@ calculateBoundsOffset(LLVM::ModuleTranslation &moduleTranslation,
// with a pointer that's being treated like an array and we have the
// underlying type e.g. an i32, or f64 etc, e.g. a fortran descriptor base
// address (pointer pointing to the actual data) so we must caclulate the
// offset using a single index which the following two loops attempts to
// compute.

// Calculates the size offset we need to make per row e.g. first row or
// column only needs to be offset by one, but the next would have to be
// the previous row/column offset multiplied by the extent of current row.
// offset using a single index which the following loop attempts to
// compute using the standard column-major algorithm e.g for a 3D array:
//
// For example ([1][10][100]):
// ((((c_idx * b_len) + b_idx) * a_len) + a_idx)
//
// - First row/column we move by 1 for each index increment
// - Second row/column we move by 1 (first row/column) * 10 (extent/size of
// current) for 10 for each index increment
// - Third row/column we would move by 10 (second row/column) *
// (extent/size of current) 100 for 1000 for each index increment
std::vector<llvm::Value *> dimensionIndexSizeOffset{builder.getInt64(1)};
for (size_t i = 1; i < bounds.size(); ++i) {
if (auto boundOp = dyn_cast_if_present<omp::MapBoundsOp>(
bounds[i].getDefiningOp())) {
dimensionIndexSizeOffset.push_back(builder.CreateMul(
moduleTranslation.lookupValue(boundOp.getExtent()),
dimensionIndexSizeOffset[i - 1]));
}
}

// Now that we have calculated how much we move by per index, we must
// multiply each lower bound offset in indexes by the size offset we
// have calculated in the previous and accumulate the results to get
// our final resulting offset.
// It is of note that it's doing column-major rather than row-major at the
// moment, but having a way for the frontend to indicate which major format
// to use or standardizing/canonicalizing the order of the bounds to compute
// the offset may be useful in the future when there's other frontends with
// different formats.
std::vector<llvm::Value *> dimensionIndexSizeOffset;
for (int i = bounds.size() - 1; i >= 0; --i) {
if (auto boundOp = dyn_cast_if_present<omp::MapBoundsOp>(
bounds[i].getDefiningOp())) {
if (idx.empty())
idx.emplace_back(builder.CreateMul(
moduleTranslation.lookupValue(boundOp.getLowerBound()),
dimensionIndexSizeOffset[i]));
if (i == ((int)bounds.size() - 1))
idx.emplace_back(
moduleTranslation.lookupValue(boundOp.getLowerBound()));
else
idx.back() = builder.CreateAdd(
idx.back(), builder.CreateMul(moduleTranslation.lookupValue(
boundOp.getLowerBound()),
dimensionIndexSizeOffset[i]));
builder.CreateMul(idx.back(), moduleTranslation.lookupValue(
boundOp.getExtent())),
moduleTranslation.lookupValue(boundOp.getLowerBound()));
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -81,9 +81,8 @@ module attributes {omp.is_target_device = false, omp.target_triples = ["amdgcn-a
// CHECK: %[[ARR_SECT_SIZE:.*]] = mul i64 %[[ARR_SECT_SIZE1]], 4
// CHECK: %[[LFULL_ARR:.*]] = load ptr, ptr @full_arr, align 8
// CHECK: %[[FULL_ARR_PTR:.*]] = getelementptr inbounds float, ptr %[[LFULL_ARR]], i64 0
// CHECK: %[[ARR_SECT_OFFSET1:.*]] = mul i64 %[[ARR_SECT_OFFSET2]], 1
// CHECK: %[[LARR_SECT:.*]] = load ptr, ptr @sect_arr, align 8
// CHECK: %[[ARR_SECT_PTR:.*]] = getelementptr inbounds i32, ptr %[[LARR_SECT]], i64 %[[ARR_SECT_OFFSET1]]
// CHECK: %[[ARR_SECT_PTR:.*]] = getelementptr inbounds i32, ptr %[[LARR_SECT]], i64 %[[ARR_SECT_OFFSET2]]
// CHECK: %[[SCALAR_PTR_LOAD:.*]] = load ptr, ptr %[[SCALAR_BASE]], align 8
// CHECK: %[[FULL_ARR_DESC_SIZE:.*]] = sdiv exact i64 48, ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
// CHECK: %[[FULL_ARR_SIZE_CMP:.*]] = icmp eq ptr %[[FULL_ARR_PTR]], null
Expand Down
61 changes: 61 additions & 0 deletions offload/test/offloading/fortran/descriptor-array-slice-map.f90
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
! Offloading test which aims to test that an allocatable/descriptor type map
! will allow the appropriate slicing behaviour.
! REQUIRES: flang, amdgpu

subroutine slice_writer(n, a, b, c)
implicit none
integer, intent(in) :: n
real(8), intent(in) :: a(n)
real(8), intent(in) :: b(n)
real(8), intent(out) :: c(n)
integer :: i

!$omp target teams distribute parallel do
do i=1,n
c(i) = b(i) + a(i)
end do
end subroutine slice_writer

! RUN: %libomptarget-compile-fortran-run-and-check-generic
program main
implicit none
real(kind=8), allocatable :: a(:,:,:)
integer :: i, j, k, idx, idx1, idx2, idx3

i=50
j=100
k=2

allocate(a(1:i,1:j,1:k))

do idx1=1, i
do idx2=1, j
do idx3=1, k
a(idx1,idx2,idx3) = idx2
end do
end do
end do

do idx=1,k
!$omp target enter data map(alloc: a(1:i,:, idx))

!$omp target update to(a(1:i, 1:30, idx), &
!$omp& a(1:i, 61:100, idx))

call slice_writer(i, a(:, 1, idx), a(:, 61, idx), a(:, 31, idx))
call slice_writer(i, a(:, 30, idx), a(:, 100, idx), a(:, 60, idx))

!$omp target update from(a(1:i, 31:60, idx))
!$omp target exit data map(delete: a(1:i, :, idx))

print *, a(1, 31, idx), a(2, 31, idx), a(i, 31, idx)
print *, a(1, 60, idx), a(2, 60, idx), a(i, 60, idx)
enddo

deallocate(a)
end program

! CHECK: 62. 62. 62.
! CHECK: 130. 130. 130.
! CHECK: 62. 62. 62.
! CHECK: 130. 130. 130.