Skip to content

Auto-inserted deallocation of GPU memory leads to crash #664

@fschlimb

Description

@fschlimb

The following IR does not work with the below pipeline. The returned pointer had been deallocated. It works fine when the generation of the gpu-dealloc op/call is omitted.

// RUN: %python_executable %imex_runner --requires=l0-runtime -i %s --pass-pipeline-file=%p/ptensor-gpu.pp --runner imex-cpu-runner -e main --entry-point-result=void --shared-libs=%mlir_runner_utils,%mlir_c_runner_utils,%levelzero_runtime --filecheck --O3
// RUN: %python_executable %imex_runner --requires=sycl-runtime -i %s --pass-pipeline-file=%p/ptensor-gpu.pp \
// RUN:                                        --runner imex-cpu-runner -e main \
// RUN:                                        --entry-point-result=void \
// RUN:                                        --shared-libs=%mlir_runner_utils,%mlir_c_runner_utils,%sycl_runtime --filecheck
#map = affine_map<(d0, d1) -> (d0, d1)>
module {
  func.func private @printMemrefI32(tensor<*xi32>)
  func.func private @printMemrefF32(tensor<*xf32>)
  func.func @main() {
    %0:4 = call @ddpt_jit() : () -> (memref<?x?xi32, strided<[?, ?], offset: ?>>, memref<?x?xi32, strided<[?, ?], offset: ?>>, memref<?x?xi32, strided<[?, ?], offset: ?>>, memref<2xindex>)
    %1 = bufferization.to_tensor %0#1 : memref<?x?xi32, strided<[?, ?], offset: ?>>
    %cast = tensor.cast %1 : tensor<?x?xi32> to tensor<*xi32>
    call @printMemrefI32(%cast) : (tensor<*xi32>) -> ()
    // CHECK: Unranked Memref base@ = {{(0x)?[-9a-f]*}}
    return
  }
  func.func @ddpt_jit() -> (memref<?x?xi32, strided<[?, ?], offset: ?>>, memref<?x?xi32, strided<[?, ?], offset: ?>>, memref<?x?xi32, strided<[?, ?], offset: ?>>, memref<2xindex>) attributes {llvm.emit_c_interface} {
    %c1 = arith.constant 1 : index
    %c0 = arith.constant 0 : index
    %c0_i32 = arith.constant 0 : i32
    %0 = tensor.empty() : tensor<16x16xi32>
    %1 = linalg.generic {indexing_maps = [#map], iterator_types = ["parallel", "parallel"]} outs(%0 : tensor<16x16xi32>) {
    ^bb0(%out: i32):
      linalg.yield %c0_i32 : i32
    } -> tensor<16x16xi32>
    %2 = tensor.empty() : tensor<0x0xi32>
    %3 = bufferization.to_memref %2 : memref<0x0xi32>
    %cast = memref.cast %3 : memref<0x0xi32> to memref<?x?xi32, strided<[?, ?], offset: ?>>
    %4 = bufferization.to_memref %1 : memref<16x16xi32>
    %cast_0 = memref.cast %4 : memref<16x16xi32> to memref<?x?xi32, strided<[?, ?], offset: ?>>
    %alloc = memref.alloc() {alignment = 8 : i64} : memref<2xindex>
    memref.store %c0, %alloc[%c0] : memref<2xindex>
    memref.store %c0, %alloc[%c1] : memref<2xindex>
    return %cast, %cast_0, %cast, %alloc : memref<?x?xi32, strided<[?, ?], offset: ?>>, memref<?x?xi32, strided<[?, ?], offset: ?>>, memref<?x?xi32, strided<[?, ?], offset: ?>>, memref<2xindex>
  }
}
builtin.module(
    func.func(tosa-make-broadcastable)
    func.func(tosa-to-linalg)
    func.func(tosa-to-tensor)
    canonicalize
    linalg-fuse-elementwise-ops
    arith-expand
    memref-expand
    arith-bufferize
    func-bufferize
    func.func(empty-tensor-to-alloc-tensor)
    func.func(scf-bufferize)
    func.func(tensor-bufferize)
    func.func(bufferization-bufferize)
    func.func(linalg-bufferize)
    func.func(linalg-detensorize)
    func.func(tensor-bufferize)
    func.func(finalizing-bufferize)
    imex-remove-temporaries
    func.func(convert-linalg-to-parallel-loops)
    func.func(scf-parallel-loop-fusion)
// GPU
    func.func(imex-add-outer-parallel-loop)
    func.func(gpu-map-parallel-loops)
    func.func(convert-parallel-loops-to-gpu)
// insert-gpu-allocs pass can have client-api = opencl or vulkan args
    func.func(insert-gpu-allocs{client-api=opencl})
    canonicalize
    normalize-memrefs
// Unstride memrefs does not seem to be needed.
//  func.func(unstride-memrefs)
    func.func(lower-affine)
    gpu-kernel-outlining
    canonicalize
    cse
// The following set-spirv-* passes can have client-api = opencl or vulkan args
    set-spirv-capabilities{client-api=opencl}
    gpu.module(set-spirv-abi-attrs{client-api=opencl})
    canonicalize
    fold-memref-alias-ops
    imex-convert-gpu-to-spirv
    spirv.module(spirv-lower-abi-attrs
             spirv-update-vce)
    func.func(llvm-request-c-wrappers)
    serialize-spirv
    convert-gpu-to-gpux
    convert-func-to-llvm
    convert-math-to-llvm
    convert-gpux-to-llvm
    expand-strided-metadata
    lower-affine
    finalize-memref-to-llvm
    reconcile-unrealized-casts)

Metadata

Metadata

Assignees

Labels

bugSomething isn't working

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions