Skip to content

Commit 09318c6

Browse files
authored
[MLIR][OpenMP] Fix and simplify bounds offset calculation for 1-D GEP offsets (#165486)
Currently this is being calculated incorrectly and will result in incorrect index offsets in more complicated array slices. This PR tries to address it by refactoring and changing the calculation to be more correct.
1 parent 45b1a4b commit 09318c6

File tree

3 files changed

+81
-38
lines changed

3 files changed

+81
-38
lines changed

mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp

Lines changed: 19 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -4084,12 +4084,13 @@ static omp::MapInfoOp getFirstOrLastMappedMemberPtr(omp::MapInfoOp mapInfo,
40844084
///
40854085
/// Fortran
40864086
/// map(tofrom: array(2:5, 3:2))
4087-
/// or
4088-
/// C++
4089-
/// map(tofrom: array[1:4][2:3])
4087+
///
40904088
/// We must calculate the initial pointer offset to pass across, this function
40914089
/// performs this using bounds.
40924090
///
4091+
/// TODO/WARNING: This only supports Fortran's column major indexing currently
4092+
/// as is noted in the note below and comments in the function, we must extend
4093+
/// this function when we add a C++ frontend.
40934094
/// NOTE: which while specified in row-major order it currently needs to be
40944095
/// flipped for Fortran's column order array allocation and access (as
40954096
/// opposed to C++'s row-major, hence the backwards processing where order is
@@ -4125,46 +4126,28 @@ calculateBoundsOffset(LLVM::ModuleTranslation &moduleTranslation,
41254126
// with a pointer that's being treated like an array and we have the
41264127
// underlying type e.g. an i32, or f64 etc, e.g. a fortran descriptor base
41274128
// address (pointer pointing to the actual data) so we must caclulate the
4128-
// offset using a single index which the following two loops attempts to
4129-
// compute.
4130-
4131-
// Calculates the size offset we need to make per row e.g. first row or
4132-
// column only needs to be offset by one, but the next would have to be
4133-
// the previous row/column offset multiplied by the extent of current row.
4129+
// offset using a single index which the following loop attempts to
4130+
// compute using the standard column-major algorithm e.g for a 3D array:
41344131
//
4135-
// For example ([1][10][100]):
4132+
// ((((c_idx * b_len) + b_idx) * a_len) + a_idx)
41364133
//
4137-
// - First row/column we move by 1 for each index increment
4138-
// - Second row/column we move by 1 (first row/column) * 10 (extent/size of
4139-
// current) for 10 for each index increment
4140-
// - Third row/column we would move by 10 (second row/column) *
4141-
// (extent/size of current) 100 for 1000 for each index increment
4142-
std::vector<llvm::Value *> dimensionIndexSizeOffset{builder.getInt64(1)};
4143-
for (size_t i = 1; i < bounds.size(); ++i) {
4144-
if (auto boundOp = dyn_cast_if_present<omp::MapBoundsOp>(
4145-
bounds[i].getDefiningOp())) {
4146-
dimensionIndexSizeOffset.push_back(builder.CreateMul(
4147-
moduleTranslation.lookupValue(boundOp.getExtent()),
4148-
dimensionIndexSizeOffset[i - 1]));
4149-
}
4150-
}
4151-
4152-
// Now that we have calculated how much we move by per index, we must
4153-
// multiply each lower bound offset in indexes by the size offset we
4154-
// have calculated in the previous and accumulate the results to get
4155-
// our final resulting offset.
4134+
// It is of note that it's doing column-major rather than row-major at the
4135+
// moment, but having a way for the frontend to indicate which major format
4136+
// to use or standardizing/canonicalizing the order of the bounds to compute
4137+
// the offset may be useful in the future when there's other frontends with
4138+
// different formats.
4139+
std::vector<llvm::Value *> dimensionIndexSizeOffset;
41564140
for (int i = bounds.size() - 1; i >= 0; --i) {
41574141
if (auto boundOp = dyn_cast_if_present<omp::MapBoundsOp>(
41584142
bounds[i].getDefiningOp())) {
4159-
if (idx.empty())
4160-
idx.emplace_back(builder.CreateMul(
4161-
moduleTranslation.lookupValue(boundOp.getLowerBound()),
4162-
dimensionIndexSizeOffset[i]));
4143+
if (i == ((int)bounds.size() - 1))
4144+
idx.emplace_back(
4145+
moduleTranslation.lookupValue(boundOp.getLowerBound()));
41634146
else
41644147
idx.back() = builder.CreateAdd(
4165-
idx.back(), builder.CreateMul(moduleTranslation.lookupValue(
4166-
boundOp.getLowerBound()),
4167-
dimensionIndexSizeOffset[i]));
4148+
builder.CreateMul(idx.back(), moduleTranslation.lookupValue(
4149+
boundOp.getExtent())),
4150+
moduleTranslation.lookupValue(boundOp.getLowerBound()));
41684151
}
41694152
}
41704153
}

mlir/test/Target/LLVMIR/omptarget-record-type-with-ptr-member-host.mlir

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -81,9 +81,8 @@ module attributes {omp.is_target_device = false, omp.target_triples = ["amdgcn-a
8181
// CHECK: %[[ARR_SECT_SIZE:.*]] = mul i64 %[[ARR_SECT_SIZE1]], 4
8282
// CHECK: %[[LFULL_ARR:.*]] = load ptr, ptr @full_arr, align 8
8383
// CHECK: %[[FULL_ARR_PTR:.*]] = getelementptr inbounds float, ptr %[[LFULL_ARR]], i64 0
84-
// CHECK: %[[ARR_SECT_OFFSET1:.*]] = mul i64 %[[ARR_SECT_OFFSET2]], 1
8584
// CHECK: %[[LARR_SECT:.*]] = load ptr, ptr @sect_arr, align 8
86-
// CHECK: %[[ARR_SECT_PTR:.*]] = getelementptr inbounds i32, ptr %[[LARR_SECT]], i64 %[[ARR_SECT_OFFSET1]]
85+
// CHECK: %[[ARR_SECT_PTR:.*]] = getelementptr inbounds i32, ptr %[[LARR_SECT]], i64 %[[ARR_SECT_OFFSET2]]
8786
// CHECK: %[[SCALAR_PTR_LOAD:.*]] = load ptr, ptr %[[SCALAR_BASE]], align 8
8887
// CHECK: %[[FULL_ARR_DESC_SIZE:.*]] = sdiv exact i64 48, ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
8988
// CHECK: %[[FULL_ARR_SIZE_CMP:.*]] = icmp eq ptr %[[FULL_ARR_PTR]], null
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
! Offloading test which aims to test that an allocatable/descriptor type map
2+
! will allow the appropriate slicing behaviour.
3+
! REQUIRES: flang, amdgpu
4+
5+
subroutine slice_writer(n, a, b, c)
6+
implicit none
7+
integer, intent(in) :: n
8+
real(8), intent(in) :: a(n)
9+
real(8), intent(in) :: b(n)
10+
real(8), intent(out) :: c(n)
11+
integer :: i
12+
13+
!$omp target teams distribute parallel do
14+
do i=1,n
15+
c(i) = b(i) + a(i)
16+
end do
17+
end subroutine slice_writer
18+
19+
! RUN: %libomptarget-compile-fortran-run-and-check-generic
20+
program main
21+
implicit none
22+
real(kind=8), allocatable :: a(:,:,:)
23+
integer :: i, j, k, idx, idx1, idx2, idx3
24+
25+
i=50
26+
j=100
27+
k=2
28+
29+
allocate(a(1:i,1:j,1:k))
30+
31+
do idx1=1, i
32+
do idx2=1, j
33+
do idx3=1, k
34+
a(idx1,idx2,idx3) = idx2
35+
end do
36+
end do
37+
end do
38+
39+
do idx=1,k
40+
!$omp target enter data map(alloc: a(1:i,:, idx))
41+
42+
!$omp target update to(a(1:i, 1:30, idx), &
43+
!$omp& a(1:i, 61:100, idx))
44+
45+
call slice_writer(i, a(:, 1, idx), a(:, 61, idx), a(:, 31, idx))
46+
call slice_writer(i, a(:, 30, idx), a(:, 100, idx), a(:, 60, idx))
47+
48+
!$omp target update from(a(1:i, 31:60, idx))
49+
!$omp target exit data map(delete: a(1:i, :, idx))
50+
51+
print *, a(1, 31, idx), a(2, 31, idx), a(i, 31, idx)
52+
print *, a(1, 60, idx), a(2, 60, idx), a(i, 60, idx)
53+
enddo
54+
55+
deallocate(a)
56+
end program
57+
58+
! CHECK: 62. 62. 62.
59+
! CHECK: 130. 130. 130.
60+
! CHECK: 62. 62. 62.
61+
! CHECK: 130. 130. 130.

0 commit comments

Comments
 (0)