Address review comments.

mshahneo · mshahneo · commit 8e83c9ca3c70 · 2025-08-06T20:54:37.000Z
diff --git a/mlir/lib/ExecutionEngine/LevelZeroRuntimeWrappers.cpp b/mlir/lib/ExecutionEngine/LevelZeroRuntimeWrappers.cpp
@@ -12,12 +12,12 @@
 
 #include "llvm/ADT/Twine.h"
 
+#include "level_zero/ze_api.h"
 #include <cassert>
 #include <deque>
 #include <exception>
 #include <functional>
 #include <iostream>
-#include <level_zero/ze_api.h>
 #include <limits>
 #include <unordered_set>
 #include <vector>
diff --git a/mlir/test/Integration/GPU/LEVELZERO/gpu-addf32-to-spirv.mlir b/mlir/test/Integration/GPU/LEVELZERO/gpu-addf32-to-spirv.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -pass-pipeline='builtin.module(spirv-attach-target{ver=v1.0 caps=Addresses,Int64,Kernel},convert-gpu-to-spirv{use-64bit-index=true},gpu.module(spirv.module(spirv-lower-abi-attrs,spirv-update-vce)),func.func(llvm-request-c-wrappers),convert-scf-to-cf,convert-cf-to-llvm,convert-arith-to-llvm,convert-math-to-llvm,convert-func-to-llvm,gpu-to-llvm{use-bare-pointers-for-kernels=true},gpu-module-to-binary,expand-strided-metadata,lower-affine,finalize-memref-to-llvm,reconcile-unrealized-casts)' \
+// RUN: mlir-opt %s -pass-pipeline='builtin.module(spirv-attach-target{ver=v1.0 caps=Addresses,Int64,Kernel},convert-gpu-to-spirv{use-64bit-index=true},gpu.module(spirv.module(spirv-lower-abi-attrs,spirv-update-vce)),func.func(llvm-request-c-wrappers),convert-scf-to-cf,convert-to-llvm,gpu-to-llvm{use-bare-pointers-for-kernels=true},gpu-module-to-binary,expand-strided-metadata,lower-affine,reconcile-unrealized-casts)' \
 // RUN: | mlir-runner \
 // RUN:   --shared-libs=%mlir_levelzero_runtime \
 // RUN:   --shared-libs=%mlir_runner_utils \
@@ -26,7 +26,9 @@ module @add attributes {gpu.container_module} {
     memref.copy %arg0, %memref_0 : memref<2x2x2xf32> to memref<2x2x2xf32>
     %memref_2 = gpu.alloc host_shared () : memref<2x2x2xf32>
     %2 = gpu.wait async
+
     %3 = gpu.launch_func async [%2] @test_kernel::@test_kernel blocks in (%c2, %c2, %c2) threads in (%c1, %c1, %c1) args(%memref_0 : memref<2x2x2xf32>, %mem : memref<2x2x2xf32>, %memref_2 : memref<2x2x2xf32>)
+
     gpu.wait [%3]
     %alloc = memref.alloc() : memref<2x2x2xf32>
     memref.copy %memref_2, %alloc : memref<2x2x2xf32> to memref<2x2x2xf32>
diff --git a/mlir/test/Integration/GPU/LEVELZERO/gpu-addi64-to-spirv.mlir b/mlir/test/Integration/GPU/LEVELZERO/gpu-addi64-to-spirv.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -pass-pipeline='builtin.module(spirv-attach-target{ver=v1.0 caps=Addresses,Int64,Kernel},convert-gpu-to-spirv{use-64bit-index=true},gpu.module(spirv.module(spirv-lower-abi-attrs,spirv-update-vce)),func.func(llvm-request-c-wrappers),convert-scf-to-cf,convert-cf-to-llvm,convert-arith-to-llvm,convert-math-to-llvm,convert-func-to-llvm,gpu-to-llvm{use-bare-pointers-for-kernels=true},gpu-module-to-binary,expand-strided-metadata,lower-affine,finalize-memref-to-llvm,reconcile-unrealized-casts)' \
+// RUN: mlir-opt %s -pass-pipeline='builtin.module(spirv-attach-target{ver=v1.0 caps=Addresses,Int64,Kernel},convert-gpu-to-spirv{use-64bit-index=true},gpu.module(spirv.module(spirv-lower-abi-attrs,spirv-update-vce)),func.func(llvm-request-c-wrappers),convert-scf-to-cf,convert-to-llvm,gpu-to-llvm{use-bare-pointers-for-kernels=true},gpu-module-to-binary,expand-strided-metadata,lower-affine,reconcile-unrealized-casts)' \
 // RUN: | mlir-runner \
 // RUN:   --shared-libs=%mlir_levelzero_runtime \
 // RUN:   --shared-libs=%mlir_runner_utils \
@@ -26,7 +26,9 @@ module @add attributes {gpu.container_module} {
     memref.copy %arg0, %memref_0 : memref<3x3xi64> to memref<3x3xi64>
     %memref_2 = gpu.alloc host_shared () : memref<3x3xi64>
     %2 = gpu.wait async
+
     %3 = gpu.launch_func async [%2] @test_kernel::@test_kernel blocks in (%c3, %c3, %c1) threads in (%c1, %c1, %c1) args(%memref_0 : memref<3x3xi64>, %mem : memref<3x3xi64>, %memref_2 : memref<3x3xi64>)
+
     gpu.wait [%3]
     %alloc = memref.alloc() : memref<3x3xi64>
     memref.copy %memref_2, %alloc : memref<3x3xi64> to memref<3x3xi64>
diff --git a/mlir/test/Integration/GPU/LEVELZERO/gpu-memcpy-addf32-to-spirv.mlir b/mlir/test/Integration/GPU/LEVELZERO/gpu-memcpy-addf32-to-spirv.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -pass-pipeline='builtin.module(func.func(gpu-async-region),spirv-attach-target{ver=v1.0 caps=Addresses,Int64,Kernel},convert-gpu-to-spirv{use-64bit-index=true},gpu.module(spirv.module(spirv-lower-abi-attrs,spirv-update-vce)),func.func(llvm-request-c-wrappers),convert-scf-to-cf,convert-cf-to-llvm,convert-arith-to-llvm,convert-math-to-llvm,convert-func-to-llvm,gpu-to-llvm{use-bare-pointers-for-kernels=true},gpu-module-to-binary,expand-strided-metadata,lower-affine,finalize-memref-to-llvm,reconcile-unrealized-casts)' \
+// RUN: mlir-opt %s -pass-pipeline='builtin.module(func.func(gpu-async-region),spirv-attach-target{ver=v1.0 caps=Addresses,Int64,Kernel},convert-gpu-to-spirv{use-64bit-index=true},gpu.module(spirv.module(spirv-lower-abi-attrs,spirv-update-vce)),func.func(llvm-request-c-wrappers),convert-scf-to-cf,convert-to-llvm,gpu-to-llvm{use-bare-pointers-for-kernels=true},gpu-module-to-binary,expand-strided-metadata,lower-affine,reconcile-unrealized-casts)' \
 // RUN: | mlir-runner \
 // RUN:   --shared-libs=%mlir_levelzero_runtime \
 // RUN:   --shared-libs=%mlir_runner_utils \
@@ -26,7 +26,9 @@ module @add attributes {gpu.container_module} {
     %memref_0 = gpu.alloc () : memref<2x2x2xf32>
     gpu.memcpy %memref_0, %arg1 : memref<2x2x2xf32>, memref<2x2x2xf32>
     %memref_1 = gpu.alloc () : memref<2x2x2xf32>
+
     gpu.launch_func @test_kernel::@test_kernel blocks in (%c2, %c2, %c2) threads in (%c1, %c1, %c1) args(%memref : memref<2x2x2xf32>, %memref_0 : memref<2x2x2xf32>, %memref_1 : memref<2x2x2xf32>)
+
     %alloc = memref.alloc() : memref<2x2x2xf32>
     gpu.memcpy %alloc, %memref_1 : memref<2x2x2xf32>, memref<2x2x2xf32>
     gpu.dealloc %memref_1 : memref<2x2x2xf32>
diff --git a/mlir/test/Integration/GPU/LEVELZERO/gpu-reluf32-to-spirv.mlir b/mlir/test/Integration/GPU/LEVELZERO/gpu-reluf32-to-spirv.mlir
@@ -1,6 +1,6 @@
-// RUN: mlir-opt %s -pass-pipeline='builtin.module(spirv-attach-target{ver=v1.0 caps=Addresses,Int64,Kernel},convert-gpu-to-spirv{use-64bit-index=true},gpu.module(spirv.module(spirv-lower-abi-attrs,spirv-update-vce)),func.func(llvm-request-c-wrappers),convert-scf-to-cf,convert-cf-to-llvm,convert-arith-to-llvm,convert-math-to-llvm,convert-func-to-llvm,gpu-to-llvm{use-bare-pointers-for-kernels=true},gpu-module-to-binary,expand-strided-metadata,lower-affine,finalize-memref-to-llvm,reconcile-unrealized-casts)' \
+// RUN: mlir-opt %s -pass-pipeline='builtin.module(spirv-attach-target{ver=v1.0 caps=Addresses,Int64,Kernel},convert-gpu-to-spirv{use-64bit-index=true},gpu.module(spirv.module(spirv-lower-abi-attrs,spirv-update-vce)),func.func(llvm-request-c-wrappers),convert-scf-to-cf,convert-to-llvm,gpu-to-llvm{use-bare-pointers-for-kernels=true},gpu-module-to-binary,expand-strided-metadata,lower-affine,reconcile-unrealized-casts)' \
 // RUN: | mlir-runner \
-// RUN:   --shared-libs=%mlir_sycl_runtime \
+// RUN:   --shared-libs=%mlir_levelzero_runtime \
 // RUN:   --shared-libs=%mlir_runner_utils \
 // RUN:   --entry-point-result=void \
 // RUN: | FileCheck %s
@@ -41,11 +41,16 @@ module @relu attributes {gpu.container_module} {
     memref.copy %arg0, %memref : memref<4x5xf32> to memref<4x5xf32>
     %memref_0 = gpu.alloc host_shared () : memref<4x5xi1>
     %2 = gpu.wait async
+
     %3 = gpu.launch_func async [%2]  @test_kernel::@test_kernel blocks in (%c4, %c5, %c1) threads in (%c1, %c1, %c1) args(%memref : memref<4x5xf32>, %cst : f32, %memref_0 : memref<4x5xi1>)
+
     gpu.wait [%3]
     %memref_1 = gpu.alloc host_shared () : memref<4x5xf32>
     %4 = gpu.wait async
-    %5 = gpu.launch_func async [%4]  @test_kernel_0::@test_kernel blocks in (%c4, %c5, %c1) threads in (%c1, %c1, %c1) args(%memref_0 : memref<4x5xi1>, %memref : memref<4x5xf32>, %cst : f32, %memref_1 : memref<4x5xf32>)
+
+    %5 = gpu.launch_func async [%4]  @test_kernel_0::@test_kernel blocks in (%c4, %c5, %c1) threads in (%c1, %c1, %c1) args(%memref_0 : memref<4x5xi1>, %memref : memref<4x5xf32>, %cst : f32,
+
+    %memref_1 : memref<4x5xf32>)
     gpu.wait [%5]
     %alloc = memref.alloc() : memref<4x5xf32>
     memref.copy %memref_1, %alloc : memref<4x5xf32> to memref<4x5xf32>