Skip to content

Commit cf5da43

Browse files
committed
[MLIR][OpenMP] Fix and simplify bounds offset calculation for 1-D GEP offsets
Currently this is being calculated incorrectly and will result in incorrect index offsets in more complicated array slices. This PR tries to address it by refactoring and changing the calculation.
1 parent 10afda0 commit cf5da43

File tree

3 files changed

+77
-35
lines changed

3 files changed

+77
-35
lines changed

mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp

Lines changed: 15 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -4125,46 +4125,28 @@ calculateBoundsOffset(LLVM::ModuleTranslation &moduleTranslation,
41254125
// with a pointer that's being treated like an array and we have the
41264126
// underlying type e.g. an i32, or f64 etc, e.g. a fortran descriptor base
41274127
// address (pointer pointing to the actual data) so we must caclulate the
4128-
// offset using a single index which the following two loops attempts to
4129-
// compute.
4130-
4131-
// Calculates the size offset we need to make per row e.g. first row or
4132-
// column only needs to be offset by one, but the next would have to be
4133-
// the previous row/column offset multiplied by the extent of current row.
4128+
// offset using a single index which the following loop attempts to
4129+
// compute using the standard column-major algorihtm e.g for a 3D array:
41344130
//
4135-
// For example ([1][10][100]):
4131+
// ((((c-idx * b-len) + b-idx) * a-len) + a_idx)
41364132
//
4137-
// - First row/column we move by 1 for each index increment
4138-
// - Second row/column we move by 1 (first row/column) * 10 (extent/size of
4139-
// current) for 10 for each index increment
4140-
// - Third row/column we would move by 10 (second row/column) *
4141-
// (extent/size of current) 100 for 1000 for each index increment
4142-
std::vector<llvm::Value *> dimensionIndexSizeOffset{builder.getInt64(1)};
4143-
for (size_t i = 1; i < bounds.size(); ++i) {
4144-
if (auto boundOp = dyn_cast_if_present<omp::MapBoundsOp>(
4145-
bounds[i].getDefiningOp())) {
4146-
dimensionIndexSizeOffset.push_back(builder.CreateMul(
4147-
moduleTranslation.lookupValue(boundOp.getExtent()),
4148-
dimensionIndexSizeOffset[i - 1]));
4149-
}
4150-
}
4151-
4152-
// Now that we have calculated how much we move by per index, we must
4153-
// multiply each lower bound offset in indexes by the size offset we
4154-
// have calculated in the previous and accumulate the results to get
4155-
// our final resulting offset.
4133+
// It is of note that it's doing column-major rather than row-major at the
4134+
// moment, but having a way for the frontend to indicate which major format
4135+
// to use or standardizing/canonicalizing the order of the bounds to compute
4136+
// the offset may be useful in the future when there's other frontends with
4137+
// different formats.
4138+
std::vector<llvm::Value *> dimensionIndexSizeOffset;
41564139
for (int i = bounds.size() - 1; i >= 0; --i) {
41574140
if (auto boundOp = dyn_cast_if_present<omp::MapBoundsOp>(
41584141
bounds[i].getDefiningOp())) {
4159-
if (idx.empty())
4160-
idx.emplace_back(builder.CreateMul(
4161-
moduleTranslation.lookupValue(boundOp.getLowerBound()),
4162-
dimensionIndexSizeOffset[i]));
4142+
if (i == ((int)bounds.size() - 1))
4143+
idx.emplace_back(
4144+
moduleTranslation.lookupValue(boundOp.getLowerBound()));
41634145
else
41644146
idx.back() = builder.CreateAdd(
4165-
idx.back(), builder.CreateMul(moduleTranslation.lookupValue(
4166-
boundOp.getLowerBound()),
4167-
dimensionIndexSizeOffset[i]));
4147+
builder.CreateMul(idx.back(), moduleTranslation.lookupValue(
4148+
boundOp.getExtent())),
4149+
moduleTranslation.lookupValue(boundOp.getLowerBound()));
41684150
}
41694151
}
41704152
}

mlir/test/Target/LLVMIR/omptarget-record-type-with-ptr-member-host.mlir

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -81,9 +81,8 @@ module attributes {omp.is_target_device = false, omp.target_triples = ["amdgcn-a
8181
// CHECK: %[[ARR_SECT_SIZE:.*]] = mul i64 %[[ARR_SECT_SIZE1]], 4
8282
// CHECK: %[[LFULL_ARR:.*]] = load ptr, ptr @full_arr, align 8
8383
// CHECK: %[[FULL_ARR_PTR:.*]] = getelementptr inbounds float, ptr %[[LFULL_ARR]], i64 0
84-
// CHECK: %[[ARR_SECT_OFFSET1:.*]] = mul i64 %[[ARR_SECT_OFFSET2]], 1
8584
// CHECK: %[[LARR_SECT:.*]] = load ptr, ptr @sect_arr, align 8
86-
// CHECK: %[[ARR_SECT_PTR:.*]] = getelementptr inbounds i32, ptr %[[LARR_SECT]], i64 %[[ARR_SECT_OFFSET1]]
85+
// CHECK: %[[ARR_SECT_PTR:.*]] = getelementptr inbounds i32, ptr %[[LARR_SECT]], i64 %[[ARR_SECT_OFFSET2]]
8786
// CHECK: %[[SCALAR_PTR_LOAD:.*]] = load ptr, ptr %[[SCALAR_BASE]], align 8
8887
// CHECK: %[[FULL_ARR_DESC_SIZE:.*]] = sdiv exact i64 48, ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
8988
// CHECK: %[[FULL_ARR_SIZE_CMP:.*]] = icmp eq ptr %[[FULL_ARR_PTR]], null
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
! Offloading test which aims to test that an allocatable/descriptor type map
2+
! will allow the appropriate slicing behaviour.
3+
! REQUIRES: flang, amdgpu
4+
5+
subroutine slice_writer(n, a, b, c)
6+
implicit none
7+
integer, intent(in) :: n
8+
real(8), intent(in) :: a(n)
9+
real(8), intent(in) :: b(n)
10+
real(8), intent(out) :: c(n)
11+
integer :: i
12+
13+
!$omp target teams distribute parallel do
14+
do i=1,n
15+
c(i) = b(i) + a(i)
16+
end do
17+
end subroutine slice_writer
18+
19+
! RUN: %libomptarget-compile-fortran-run-and-check-generic
20+
program main
21+
implicit none
22+
real(kind=8), allocatable :: a(:,:,:)
23+
integer :: i, j, k, idx, idx1, idx2, idx3
24+
25+
i=50
26+
j=100
27+
k=2
28+
29+
allocate(a(1:i,1:j,1:k))
30+
31+
do idx1=1, i
32+
do idx2=1, j
33+
do idx3=1, k
34+
a(idx1,idx2,idx3) = idx2
35+
end do
36+
end do
37+
end do
38+
39+
do idx=1,k
40+
!$omp target enter data map(alloc: a(1:i,:, idx))
41+
42+
!$omp target update to(a(1:i, 1:30, idx), &
43+
!$omp& a(1:i, 61:100, idx))
44+
45+
call slice_writer(i, a(:, 1, idx), a(:, 61, idx), a(:, 31, idx))
46+
call slice_writer(i, a(:, 30, idx), a(:, 100, idx), a(:, 60, idx))
47+
48+
!$omp target update from(a(1:i, 31:60, idx))
49+
!$omp target exit data map(delete: a(1:i, :, idx))
50+
51+
print *, a(1, 31, idx), a(2, 31, idx), a(i, 31, idx)
52+
print *, a(1, 60, idx), a(2, 60, idx), a(i, 60, idx)
53+
enddo
54+
55+
deallocate(a)
56+
end program
57+
58+
! CHECK: 62. 62. 62.
59+
! CHECK: 130. 130. 130.
60+
! CHECK: 62. 62. 62.
61+
! CHECK: 130. 130. 130.

0 commit comments

Comments
 (0)