fixing ndarray tests

fschlimb · fschlimb · commit f155fe1fa1e3 · 2025-03-26T17:45:31.000+01:00
diff --git a/include/imex/Conversion/Passes.h b/include/imex/Conversion/Passes.h
@@ -17,6 +17,7 @@
 
 #include "mlir/Pass/Pass.h"
 
+#include <imex/Conversion/ArithToVC/ArithToVC.h>
 #include <imex/Conversion/DropRegions/DropRegions.h>
 #include <imex/Conversion/GPUToGPUX/GPUToGPUX.h>
 #include <imex/Conversion/GPUToSPIRV/GPUToSPIRVPass.h>
diff --git a/test/Conversion/NDArrayToLinalg/NDArrayToLinalg.mlir b/test/Conversion/NDArrayToLinalg/NDArrayToLinalg.mlir
@@ -1,21 +1,5 @@
 // RUN: imex-opt --split-input-file --convert-ndarray-to-linalg %s -verify-diagnostics -o -| FileCheck %s
 
-// -----
-func.func @test_subview(%arg0: tensor<?xi64>) -> tensor<?xi64> {
-    %c0 = arith.constant 0 : index
-    %c3 = arith.constant 3 : index
-    %0 = ndarray.subview %arg0[%c0][%c3][%c3] : tensor<?xi64> to tensor<?xi64>
-    return %0 : tensor<?xi64>
-}
-// CHECK-LABEL: @test_subview
-// CHECK-SAME: ([[V:%.*]]: tensor<?xi64>) -> tensor<?xi64> {
-// CHECK-NEXT: [[C0:%.*]] = arith.constant
-// CHECK-NEXT: [[C1:%.*]] = arith.constant
-// CHECK-NEXT: [[V0:%.*]] = bufferization.to_memref [[V]] : tensor<?xi64> to memref<?xi64, strided<[?], offset: ?>>
-// CHECK-NEXT: [[S0:%.*]] = memref.subview [[V0]][[[C0]]] [[[C1]]] [[[C1]]] : memref<?xi64, strided<[?], offset: ?>> to memref<?xi64, strided<[?], offset: ?>>
-// CHECK-NEXT: [[V1:%.*]] = bufferization.to_tensor [[S0]] restrict writable : memref<?xi64, strided<[?], offset: ?>>
-// CHECK-NEXT: return [[V1]] : tensor<?xi64>
-
 // -----
 func.func @test_linspace(%arg0: i64, %arg1: i64, %arg2: index) -> tensor<?xindex> {
     %0 = ndarray.linspace %arg0 %arg1 %arg2 false : (i64, i64, index) -> tensor<?xindex>
@@ -72,42 +56,6 @@ func.func @test_reshape2(%arg0: index) -> tensor<?x?xi64> {
 // CHECK: tensor.reshape
 // CHECK-SAME: -> tensor<?x?xi64>
 
-// -----
-func.func @test_insert_slice(%arg0: tensor<?xi64>, %arg1: tensor<?xi64>) {
-    %i0 = arith.constant 0 : index
-    %i1 = arith.constant 1 : index
-    %i3 = arith.constant 3 : index
-    ndarray.insert_slice %arg1 into %arg0[%i0] [%i3] [%i1] : tensor<?xi64> into tensor<?xi64>
-    return
-}
-// CHECK-LABEL: @test_insert_slice
-// CHECK-SAME: ([[V:%.*]]: tensor<?xi64>, [[VV:%.*]]: tensor<?xi64>) {
-// CHECK-NEXT: [[C0:%.*]] = arith.constant
-// CHECK-NEXT: [[C1:%.*]] = arith.constant
-// CHECK-NEXT: [[C3:%.*]] = arith.constant
-// CHECK-NEXT: [[V0:%.*]] = bufferization.to_memref [[VV]]
-// CHECK-NEXT: [[V1:%.*]] = bufferization.to_memref [[V]]
-// CHECK-NEXT: [[SV:%.*]] = memref.subview [[V1]][[[C0]]] [[[C3]]] [[[C1]]] : memref<?xi64, strided<[?], offset: ?>> to memref<?xi64, strided<[?], offset: ?>>
-// CHECK: memref.copy [[V0]], [[SV]] : memref<?xi64, strided<[?], offset: ?>> to memref<?xi64, strided<[?], offset: ?>>
-
-// -----
-func.func @test_insert_slice_scalar(%arg0: tensor<?xi64>, %arg1: tensor<i64>) {
-    %i0 = arith.constant 0 : index
-    %i1 = arith.constant 1 : index
-    %i3 = arith.constant 3 : index
-    ndarray.insert_slice %arg1 into %arg0[%i0] [%i3] [%i1] : tensor<i64> into tensor<?xi64>
-    return
-}
-// CHECK-LABEL: @test_insert_slice_scalar
-// CHECK-SAME: ([[V:%.*]]: tensor<?xi64>, [[VV:%.*]]: tensor<i64>) {
-// CHECK-NEXT: [[C0:%.*]] = arith.constant
-// CHECK-NEXT: [[C1:%.*]] = arith.constant
-// CHECK-NEXT: [[C3:%.*]] = arith.constant
-// CHECK-NEXT: [[V0:%.*]] = bufferization.to_memref [[VV]]
-// CHECK-NEXT: [[V1:%.*]] = bufferization.to_memref [[V]]
-// CHECK-NEXT: [[SV:%.*]] = memref.subview [[V1]][[[C0]]] [[[C3]]] [[[C1]]] : memref<?xi64, strided<[?], offset: ?>> to memref<?xi64, strided<[?], offset: ?>>
-// CHECK-NEXT: linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel"]} ins([[V0]] : memref<i64, strided<[], offset: ?>>) outs([[SV]] : memref<?xi64, strided<[?], offset: ?>>)
-
 // -----
 #GPUENV = #ndarray.envs<#region.gpu_env<device = "g">>
 func.func @test_env() -> (tensor<16x16xf32, #GPUENV>, tensor<256xf32, #GPUENV>) attributes {llvm.emit_c_interface} {
@@ -170,41 +118,36 @@ func.func @test_env() -> (tensor<16x16xf32, #GPUENV>, tensor<256xf32, #GPUENV>)
 // COM: CHECK-SAME: memref<?xi64, strided<[?], offset: ?>>
 
 // -----
-func.func @test_copy(%a: !ndarray.ndarray<?xi64>) -> !ndarray.ndarray<?xi64> {
-    %0 = ndarray.copy %a: !ndarray.ndarray<?xi64> -> !ndarray.ndarray<?xi64>
-    %1 = ndarray.copy %0: !ndarray.ndarray<?xi64> -> !ndarray.ndarray<?xi64, #region.gpu_env<device = "XeGPU">>
-    %2 = ndarray.copy %1: !ndarray.ndarray<?xi64, #region.gpu_env<device = "XeGPU">> -> !ndarray.ndarray<?xi64>
-    return %0 : !ndarray.ndarray<?xi64>
+func.func @test_copy(%a: tensor<?xi64>) -> tensor<?xi64> {
+    %0 = ndarray.copy %a: tensor<?xi64> -> tensor<?xi64>
+    %1 = ndarray.copy %0: tensor<?xi64> -> tensor<?xi64, #region.gpu_env<device = "XeGPU">>
+    %2 = ndarray.copy %1: tensor<?xi64, #region.gpu_env<device = "XeGPU">> -> tensor<?xi64>
+    return %0 : tensor<?xi64>
 }
-// CHECK-LABEL: func.func @test_copy
-// CHECK-NEXT: bufferization.to_tensor
-// CHECK-NEXT: arith.constant 0 : index
-// CHECK-NEXT: tensor.dim
-// CHECK-NEXT: memref.alloc
-// CHECK-NEXT: bufferization.to_memref
-// CHECK-NEXT: region.env_region "protect_copy_op"
-// CHECK-NEXT: memref.copy
-// CHECK-NEXT: }
-// CHECK-NEXT: bufferization.to_tensor
-// CHECK-NEXT: bufferization.to_memref
-// CHECK-NEXT: arith.constant 0 : index
-// CHECK-NEXT: tensor.dim
-// CHECK-NEXT: memref.alloc
-// CHECK-NEXT: bufferization.to_memref
-// CHECK-NEXT: region.env_region "gpu_copy_op"
-// CHECK-NEXT: memref.copy
-// CHECK-NEXT: }
-// CHECK-NEXT: bufferization.to_tensor
-// CHECK-NEXT: arith.constant 0 : index
-// CHECK-NEXT: tensor.dim
-// CHECK-NEXT: memref.alloc
-// CHECK-NEXT: bufferization.to_memref
-// CHECK-NEXT: region.env_region "gpu_copy_op"
-// CHECK-NEXT: memref.copy
-// CHECK-NEXT: }
-// CHECK-NEXT: bufferization.to_tensor
-// CHECK-NEXT: return
-// CHECK-SAME: memref<?xi64, strided<[?], offset: ?>>
+// CHECK-LABEL: func.func @test_copy(
+// CHECK-SAME: [[varg0:%.*]]: tensor<?xi64>) -> tensor<?xi64> {
+// CHECK-NEXT: [[vc0:%.*]] = arith.constant 0 : index
+// CHECK-NEXT: [[vdim:%.*]] = tensor.dim [[varg0]], [[vc0]] : tensor<?xi64>
+// CHECK-NEXT: [[valloc:%.*]] = memref.alloc([[vdim]]) {alignment = 8 : i64} : memref<?xi64>
+// CHECK-NEXT: [[v0:%.*]] = bufferization.to_memref [[varg0]] : tensor<?xi64> to memref<?xi64, strided<[?], offset: ?>>
+// CHECK-NEXT: region.env_region "protect_copy_op" {
+// CHECK-NEXT: memref.copy [[v0]], [[valloc]] : memref<?xi64, strided<[?], offset: ?>> to memref<?xi64>
+// CHECK: [[v1:%.*]] = bufferization.to_tensor [[valloc]] restrict writable : memref<?xi64> to tensor<?xi64>
+// CHECK-NEXT: [[vc0_0:%.*]] = arith.constant 0 : index
+// CHECK-NEXT: [[vdim_1:%.*]] = tensor.dim [[v1]], [[vc0_0]] : tensor<?xi64>
+// CHECK-NEXT: [[valloc_2:%.*]] = memref.alloc([[vdim_1]]) {alignment = 8 : i64} : memref<?xi64>
+// CHECK-NEXT: [[v2:%.*]] = bufferization.to_memref [[v1]] : tensor<?xi64> to memref<?xi64, strided<[?], offset: ?>>
+// CHECK-NEXT: region.env_region "protect_copy_op" {
+// CHECK-NEXT: memref.copy [[v2]], [[valloc_2]] : memref<?xi64, strided<[?], offset: ?>> to memref<?xi64>
+// CHECK: [[v3:%.*]] = bufferization.to_tensor [[valloc_2]] restrict writable : memref<?xi64> to tensor<?xi64, #region.gpu_env<device = "XeGPU">>
+// CHECK-NEXT: [[vc0_3:%.*]] = arith.constant 0 : index
+// CHECK-NEXT: [[vdim_4:%.*]] = tensor.dim [[v3]], [[vc0_3]] : tensor<?xi64, #region.gpu_env<device = "XeGPU">>
+// CHECK-NEXT: [[valloc_5:%.*]] = memref.alloc([[vdim_4]]) {alignment = 8 : i64} : memref<?xi64>
+// CHECK-NEXT: [[v4:%.*]] = bufferization.to_memref [[v3]] : tensor<?xi64, #region.gpu_env<device = "XeGPU">> to memref<?xi64, strided<[?], offset: ?>>
+// CHECK-NEXT: region.env_region "protect_copy_op" {
+// CHECK-NEXT: memref.copy [[v4]], [[valloc_5]] : memref<?xi64, strided<[?], offset: ?>> to memref<?xi64>
+// CHECK: [[v5:%.*]] = bufferization.to_tensor [[valloc_5]] restrict writable : memref<?xi64> to tensor<?xi64>
+// CHECK-NEXT: return [[v1]] : tensor<?xi64>
 
 // -----
 func.func @test_delete(%arg0: tensor<?xi64>) {
diff --git a/test/Dialect/NDArray/Extensions/mesh-spmdization.mlir b/test/Dialect/NDArray/Extensions/mesh-spmdization.mlir
@@ -49,44 +49,47 @@ func.func @test_cast_elemtypeop(%arg0: tensor<1024x1024xi64>) -> tensor<1024x102
 func.func @test_linspace() -> tensor<?xi64> {
     %c0 = arith.constant 0 : i64
     %c10 = arith.constant 10 : i64
-    // CHECK: [[vcst_0:%.*]] = arith.constant 4.000000e+00 : f64
-    // CHECK-NEXT: [[vcst:%.*]] = arith.constant 3.000000e+00 : f64
-    // CHECK-NEXT: [[vcst_1:%.*]] = arith.constant 7.000000e+00 : f64
-    // CHECK-NEXT: [[v0:%.*]] = ndarray.linspace [[vcst_0]] [[vcst_1]] [[vcst]] false : (f64, f64, f64) -> tensor<?xi64>
+    // CHECK-DAG: [[vcst_0:%.*]] = arith.constant 4.000000e+00 : f64
+    // CHECK-DAG: [[vcst:%.*]] = arith.constant 3 : index
+    // CHECK-DAG: [[vcst_1:%.*]] = arith.constant 7.000000e+00 : f64
+    // CHECK: [[v0:%.*]] = ndarray.linspace [[vcst_0]] [[vcst_1]] [[vcst]] false : (f64, f64, index) -> tensor<3xi64>
     %0 = ndarray.linspace %c0 %c10 %c10 false : (i64, i64, i64) -> tensor<?xi64>
     %s = mesh.sharding @mesh4 split_axes = [[0]] : !mesh.sharding
     %1 = mesh.shard %0 to %s : tensor<?xi64>
-    // CHECK-NEXT: return [[v0]] : tensor<?xi64>
+    // CHECK: [[cast:%.*]] = tensor.cast [[v0]] : tensor<3xi64> to tensor<?xi64>
+    // CHECK-NEXT: return [[cast]] : tensor<?xi64>
     return %1 : tensor<?xi64>
 }
 
 // CHECK-LABEL: @test_linspace_halos
 func.func @test_linspace_halos() -> tensor<?xi64> {
     %c0 = arith.constant 0 : i64
     %c10 = arith.constant 10 : i64
-    // CHECK: [[vcst:%.*]] = arith.constant 3.000000e+00 : f64
-    // CHECK-NEXT: [[vcst_0:%.*]] = arith.constant 7.000000e+00 : f64
-    // CHECK-NEXT: [[vcst_1:%.*]] = arith.constant 1.000000e+01 : f64
-    // CHECK-NEXT: [[v0:%.*]] = ndarray.linspace [[vcst]] [[vcst_1]] [[vcst_0]] false : (f64, f64, f64) -> tensor<?xi64>
+    // CHECK-DAG: [[vcst:%.*]] = arith.constant 3.000000e+00 : f64
+    // CHECK-DAG: [[vcst_0:%.*]] = arith.constant 7 : index
+    // CHECK-DAG: [[vcst_1:%.*]] = arith.constant 1.000000e+01 : f64
+    // CHECK: [[v0:%.*]] = ndarray.linspace [[vcst]] [[vcst_1]] [[vcst_0]] false : (f64, f64, index) -> tensor<7xi64>
     %0 = ndarray.linspace %c0 %c10 %c10 false : (i64, i64, i64) -> tensor<?xi64>
     %s = mesh.sharding @mesh4 split_axes = [[0]] halo_sizes = [1, 3]: !mesh.sharding
     %1 = mesh.shard %0 to %s : tensor<?xi64>
-    // CHECK-NEXT: return [[v0]] : tensor<?xi64>
+    // CHECK: [[cast:%.*]] = tensor.cast [[v0]] : tensor<7xi64> to tensor<?xi64>
+    // CHECK-NEXT: return [[cast]] : tensor<?xi64>
     return %1 : tensor<?xi64>
 }
 
 // CHECK-LABEL: @test_linspace_offsets
 func.func @test_linspace_offsets() -> tensor<?xi64> {
     %c0 = arith.constant 0 : i64
     %c10 = arith.constant 10 : i64
-    // CHECK: [[vcst:%.*]] = arith.constant 1.000000e+00 : f64
-    // CHECK-NEXT: [[vcst_0:%.*]] = arith.constant 5.000000e+00 : f64
-    // CHECK-NEXT: [[vcst_1:%.*]] = arith.constant 6.000000e+00 : f64
-    // CHECK-NEXT: [[v0:%.*]] = ndarray.linspace [[vcst_0]] [[vcst_1]] [[vcst]] false : (f64, f64, f64) -> tensor<?xi64>
+    // CHECK-DAG: [[vcst:%.*]] = arith.constant 1 : index
+    // CHECK-DAG: [[vcst_0:%.*]] = arith.constant 5.000000e+00 : f64
+    // CHECK-DAG: [[vcst_1:%.*]] = arith.constant 6.000000e+00 : f64
+    // CHECK-NEXT: [[v0:%.*]] = ndarray.linspace [[vcst_0]] [[vcst_1]] [[vcst]] false : (f64, f64, index) -> tensor<1xi64>
     %0 = ndarray.linspace %c0 %c10 %c10 false : (i64, i64, i64) -> tensor<?xi64>
     %s = mesh.sharding @mesh4 split_axes = [[0]] sharded_dims_offsets = [0, 0, 5, 6, 10]: !mesh.sharding
     %1 = mesh.shard %0 to %s : tensor<?xi64>
-    // CHECK-NEXT: return [[v0]] : tensor<?xi64>
+    // CHECK: [[cast:%.*]] = tensor.cast [[v0]] : tensor<1xi64> to tensor<?xi64>
+    // CHECK-NEXT: return [[cast]] : tensor<?xi64>
     return %1 : tensor<?xi64>
 }
 
diff --git a/test/Dialect/NDArray/Transforms/bufferize.mlir b/test/Dialect/NDArray/Transforms/bufferize.mlir
@@ -0,0 +1,53 @@
+// RUN: imex-opt --split-input-file --one-shot-bufferize="bufferize-function-boundaries=1" %s -verify-diagnostics -o -| FileCheck %s
+
+// -----
+func.func @test_subview(%arg0: tensor<?xi64>) -> tensor<?xi64> {
+    %c0 = arith.constant 0 : index
+    %c3 = arith.constant 3 : index
+    %0 = ndarray.subview %arg0[%c0][%c3][%c3] : tensor<?xi64> to tensor<?xi64>
+    return %0 : tensor<?xi64>
+}
+// CHECK-LABEL: func.func @test_subview(
+// CHECK-SAME: [[varg0:%.*]]: memref<?xi64, strided<[?], offset: ?>>) -> memref<?xi64, strided<[?], offset: ?>> {
+// CHECK-NEXT: [[vc0:%.*]] = arith.constant 0 : index
+// CHECK-NEXT: [[vc3:%.*]] = arith.constant 3 : index
+// CHECK-NEXT: [[vsubview:%.*]] = memref.subview [[varg0]][[[vc0]]] [[[vc3]]] [[[vc3]]] : memref<?xi64, strided<[?], offset: ?>> to memref<?xi64, strided<[?], offset: ?>>
+// CHECK-NEXT: return [[vsubview]] : memref<?xi64, strided<[?], offset: ?>>
+
+
+// -----
+func.func @test_insert_slice(%arg0: tensor<?xi64>, %arg1: tensor<?xi64>) {
+    %i0 = arith.constant 0 : index
+    %i1 = arith.constant 1 : index
+    %i3 = arith.constant 3 : index
+    ndarray.insert_slice %arg1 into %arg0[%i0] [%i3] [%i1] : tensor<?xi64> into tensor<?xi64>
+    return
+}
+// CHECK-LABEL: func.func @test_insert_slice(
+// CHECK-SAME: [[varg0:%.*]]: memref<?xi64, strided<[?], offset: ?>>, [[varg1:%.*]]: memref<?xi64, strided<[?], offset: ?>>) {
+// CHECK-NEXT: [[vc0:%.*]] = arith.constant 0 : index
+// CHECK-NEXT: [[vc1:%.*]] = arith.constant 1 : index
+// CHECK-NEXT: [[vc3:%.*]] = arith.constant 3 : index
+// CHECK-NEXT: [[vsubview:%.*]] = memref.subview [[varg0]][[[vc0]]] [[[vc3]]] [[[vc1]]] : memref<?xi64, strided<[?], offset: ?>> to memref<?xi64, strided<[?], offset: ?>>
+// CHECK-NEXT: memref.copy [[varg1]], [[vsubview]] : memref<?xi64, strided<[?], offset: ?>> to memref<?xi64, strided<[?], offset: ?>>
+// CHECK-NEXT: return
+
+
+// -----
+func.func @test_insert_slice_scalar(%arg0: tensor<?xi64>, %arg1: tensor<i64>) {
+    %i0 = arith.constant 0 : index
+    %i1 = arith.constant 1 : index
+    %i3 = arith.constant 3 : index
+    ndarray.insert_slice %arg1 into %arg0[%i0] [%i3] [%i1] : tensor<i64> into tensor<?xi64>
+    return
+}
+// CHECK-LABEL: func.func @test_insert_slice_scalar(
+// CHECK-SAME: [[varg0:%.*]]: memref<?xi64, strided<[?], offset: ?>>, [[varg1:%.*]]: memref<i64, strided<[], offset: ?>>) {
+// CHECK-NEXT: [[vc0:%.*]] = arith.constant 0 : index
+// CHECK-NEXT: [[vc1:%.*]] = arith.constant 1 : index
+// CHECK-NEXT: [[vc3:%.*]] = arith.constant 3 : index
+// CHECK-NEXT: [[vsubview:%.*]] = memref.subview [[varg0]][[[vc0]]] [[[vc3]]] [[[vc1]]] : memref<?xi64, strided<[?], offset: ?>> to memref<?xi64, strided<[?], offset: ?>>
+// CHECK-NEXT: linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel"]} ins([[varg1]] : memref<i64, strided<[], offset: ?>>) outs([[vsubview]] : memref<?xi64, strided<[?], offset: ?>>) {
+// CHECK-NEXT: ^bb0([[vin:%.*]]: i64, [[vout:%.*]]: i64):
+// CHECK-NEXT: linalg.yield [[vin]] : i64
+// CHECK: return
diff --git a/test/imex-runner/dist_to_fusion.mlir b/test/imex-runner/dist_to_fusion.mlir