[MLIR][OpenMP] Fix and simplify bounds offset calculation for 1-D GEP offsets

agozillon · agozillon · commit cf5da430f736 · 2025-10-28T18:39:55.000-05:00
Currently this is being calculated incorrectly and will result in incorrect index
offsets in more complicated array slices. This PR tries to address it by refactoring
and changing the calculation.
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -4125,46 +4125,28 @@ calculateBoundsOffset(LLVM::ModuleTranslation &moduleTranslation,
     // with a pointer that's being treated like an array and we have the
     // underlying type e.g. an i32, or f64 etc, e.g. a fortran descriptor base
     // address (pointer pointing to the actual data) so we must caclulate the
-    // offset using a single index which the following two loops attempts to
-    // compute.
-
-    // Calculates the size offset we need to make per row e.g. first row or
-    // column only needs to be offset by one, but the next would have to be
-    // the previous row/column offset multiplied by the extent of current row.
+    // offset using a single index which the following loop attempts to
+    // compute using the standard column-major algorihtm e.g for a 3D array:
     //
-    // For example ([1][10][100]):
+    // ((((c-idx * b-len) + b-idx) * a-len) + a_idx)
     //
-    //  - First row/column we move by 1 for each index increment
-    //  - Second row/column we move by 1 (first row/column) * 10 (extent/size of
-    //  current) for 10 for each index increment
-    //  - Third row/column we would move by 10 (second row/column) *
-    //  (extent/size of current) 100 for 1000 for each index increment
-    std::vector<llvm::Value *> dimensionIndexSizeOffset{builder.getInt64(1)};
-    for (size_t i = 1; i < bounds.size(); ++i) {
-      if (auto boundOp = dyn_cast_if_present<omp::MapBoundsOp>(
-              bounds[i].getDefiningOp())) {
-        dimensionIndexSizeOffset.push_back(builder.CreateMul(
-            moduleTranslation.lookupValue(boundOp.getExtent()),
-            dimensionIndexSizeOffset[i - 1]));
-      }
-    }
-
-    // Now that we have calculated how much we move by per index, we must
-    // multiply each lower bound offset in indexes by the size offset we
-    // have calculated in the previous and accumulate the results to get
-    // our final resulting offset.
+    // It is of note that it's doing column-major rather than row-major at the
+    // moment, but having a way for the frontend to indicate which major format
+    // to use or standardizing/canonicalizing the order of the bounds to compute
+    // the offset may be useful in the future when there's other frontends with
+    // different formats.
+    std::vector<llvm::Value *> dimensionIndexSizeOffset;
     for (int i = bounds.size() - 1; i >= 0; --i) {
       if (auto boundOp = dyn_cast_if_present<omp::MapBoundsOp>(
               bounds[i].getDefiningOp())) {
-        if (idx.empty())
-          idx.emplace_back(builder.CreateMul(
-              moduleTranslation.lookupValue(boundOp.getLowerBound()),
-              dimensionIndexSizeOffset[i]));
+        if (i == ((int)bounds.size() - 1))
+          idx.emplace_back(
+              moduleTranslation.lookupValue(boundOp.getLowerBound()));
         else
           idx.back() = builder.CreateAdd(
-              idx.back(), builder.CreateMul(moduleTranslation.lookupValue(
-                                                boundOp.getLowerBound()),
-                                            dimensionIndexSizeOffset[i]));
+              builder.CreateMul(idx.back(), moduleTranslation.lookupValue(
+                                                boundOp.getExtent())),
+              moduleTranslation.lookupValue(boundOp.getLowerBound()));
       }
     }
   }
diff --git a/mlir/test/Target/LLVMIR/omptarget-record-type-with-ptr-member-host.mlir b/mlir/test/Target/LLVMIR/omptarget-record-type-with-ptr-member-host.mlir
@@ -81,9 +81,8 @@ module attributes {omp.is_target_device = false, omp.target_triples = ["amdgcn-a
 // CHECK: %[[ARR_SECT_SIZE:.*]] = mul i64 %[[ARR_SECT_SIZE1]], 4
 // CHECK: %[[LFULL_ARR:.*]] = load ptr, ptr @full_arr, align 8
 // CHECK: %[[FULL_ARR_PTR:.*]] = getelementptr inbounds float, ptr %[[LFULL_ARR]], i64 0
-// CHECK: %[[ARR_SECT_OFFSET1:.*]] = mul i64 %[[ARR_SECT_OFFSET2]], 1
 // CHECK: %[[LARR_SECT:.*]] = load ptr, ptr @sect_arr, align 8
-// CHECK: %[[ARR_SECT_PTR:.*]] = getelementptr inbounds i32, ptr %[[LARR_SECT]], i64 %[[ARR_SECT_OFFSET1]]
+// CHECK: %[[ARR_SECT_PTR:.*]] = getelementptr inbounds i32, ptr %[[LARR_SECT]], i64 %[[ARR_SECT_OFFSET2]]
 // CHECK: %[[SCALAR_PTR_LOAD:.*]] = load ptr, ptr %[[SCALAR_BASE]], align 8
 // CHECK: %[[FULL_ARR_DESC_SIZE:.*]] = sdiv exact i64 48, ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
 // CHECK: %[[FULL_ARR_SIZE_CMP:.*]] = icmp eq ptr %[[FULL_ARR_PTR]], null
diff --git a/offload/test/offloading/fortran/descriptor-array-slice-map.f90 b/offload/test/offloading/fortran/descriptor-array-slice-map.f90
@@ -0,0 +1,61 @@
+! Offloading test which aims to test that an allocatable/descriptor type map
+! will allow the appropriate slicing behaviour.
+! REQUIRES: flang, amdgpu
+
+subroutine slice_writer(n, a, b, c)
+    implicit none
+    integer, intent(in) :: n
+    real(8), intent(in) :: a(n)
+    real(8), intent(in) :: b(n)
+    real(8), intent(out) :: c(n)
+    integer :: i
+
+    !$omp target teams distribute parallel do
+    do i=1,n
+       c(i) = b(i) + a(i)
+    end do
+end subroutine slice_writer
+
+! RUN: %libomptarget-compile-fortran-run-and-check-generic
+program main
+    implicit none
+    real(kind=8), allocatable :: a(:,:,:)
+    integer :: i, j, k, idx, idx1, idx2, idx3
+
+    i=50
+    j=100
+    k=2
+
+    allocate(a(1:i,1:j,1:k))
+
+    do idx1=1, i
+        do idx2=1, j
+            do idx3=1, k
+                a(idx1,idx2,idx3) = idx2
+            end do
+        end do
+    end do
+
+    do idx=1,k
+        !$omp target enter data map(alloc: a(1:i,:, idx))
+
+        !$omp target update to(a(1:i, 1:30, idx), &
+        !$omp&                 a(1:i, 61:100, idx))
+
+        call slice_writer(i, a(:, 1, idx), a(:, 61, idx), a(:, 31, idx))
+        call slice_writer(i, a(:, 30, idx), a(:, 100, idx), a(:, 60, idx))
+
+        !$omp target update from(a(1:i, 31:60, idx))
+        !$omp target exit data map(delete: a(1:i, :, idx))
+
+        print *, a(1, 31, idx), a(2, 31, idx), a(i, 31, idx)
+        print *, a(1, 60, idx), a(2, 60, idx), a(i, 60, idx)
+    enddo
+
+    deallocate(a)
+end program
+
+! CHECK: 62. 62. 62.
+! CHECK: 130. 130. 130.
+! CHECK: 62. 62. 62.
+! CHECK: 130. 130. 130.