Address review comments.

mshahneo · mshahneo · commit b24f09b2fcb9 · 2025-10-17T17:56:38.000Z
diff --git a/mlir/include/mlir/Dialect/GPU/Pipelines/Passes.h b/mlir/include/mlir/Dialect/GPU/Pipelines/Passes.h
@@ -1,4 +1,4 @@
-//===- Passes.h - GPU pipeline entry points----------------------===//
+//===- Passes.h - GPU pipeline entry points--------------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
diff --git a/mlir/lib/Dialect/GPU/Pipelines/GPUToXeVMPipeline.cpp b/mlir/lib/Dialect/GPU/Pipelines/GPUToXeVMPipeline.cpp
@@ -50,7 +50,7 @@ void buildCommonPassPipeline(
     xevmTargetOptions.cmdOptions = options.cmdOptions;
     pm.addPass(createGpuXeVMAttachTarget(xevmTargetOptions));
   }
-  pm.addNestedPass<gpu::GPUModuleOp>(createLowerAffinePass());
+  pm.addPass(createLowerAffinePass());
   pm.addNestedPass<func::FuncOp>(createGpuAsyncRegionPass());
 }
 
@@ -84,14 +84,14 @@ void buildGpuPassPipeline(OpPassManager &pm,
         createConvertGpuOpsToLLVMSPVOps(gpuToLLVMSPVOptions));
   }
   pm.addNestedPass<gpu::GPUModuleOp>(createCSEPass());
+  pm.addPass(createReconcileUnrealizedCastsPass());
 }
 
 //===----------------------------------------------------------------------===//
 // Host Post-GPU pipeline
 //===----------------------------------------------------------------------===//
 void buildHostPostPipeline(OpPassManager &pm,
                            const mlir::gpu::GPUToXeVMPipelineOptions &options) {
-  pm.addPass(createReconcileUnrealizedCastsPass());
   pm.addPass(createSCFToControlFlowPass());
   pm.addPass(memref::createExpandStridedMetadataPass());
   {
diff --git a/mlir/test/Integration/Dialect/XeGPU/WG/simple_gemm.mlir b/mlir/test/Integration/Dialect/XeGPU/WG/simple_gemm.mlir
@@ -26,7 +26,9 @@ module @gemm attributes {gpu.container_module} {
     gpu.memcpy %B_gpu, %B : memref<256x256xf16>, memref<256x256xf16>
     %C_gpu = gpu.alloc () : memref<256x256xf32>
     gpu.memcpy %C_gpu, %C : memref<256x256xf32>, memref<256x256xf32>
-    // NOTE: Here we can't use [8, 64] wi threads following the SG thread layout of [8, 4]. Because runtime will linearize the x dimension first (we need y dimension to be linearized first).
+    // NOTE: Here we can't use [8, 64] wi threads following
+    // the SG thread layout of [8, 4]. Because runtime will linearize
+    // the x dimension first (we need y dimension to be linearized first).
     // So just use linearized thread layout of [512, 1] wi threads.
     gpu.launch_func  @test_kernel::@test_kernel blocks in (%c1, %c1, %c1) threads in (%c512, %c1, %c1) args(%A_gpu : memref<256x256xf16>, %B_gpu : memref<256x256xf16>, %C_gpu : memref<256x256xf32>)
     gpu.wait // Wait for the kernel to finish.

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-//===- Passes.h - GPU pipeline entry points----------------------===//`
	`1`	`+//===- Passes.h - GPU pipeline entry points--------------------------------===//`
`2`	`2`	`//`
`3`	`3`	`// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.`
`4`	`4`	`// See https://llvm.org/LICENSE.txt for license information.`
Original file line number	Diff line number	Diff line change
`@@ -50,7 +50,7 @@ void buildCommonPassPipeline(`
`50`	`50`	`xevmTargetOptions.cmdOptions = options.cmdOptions;`
`51`	`51`	`pm.addPass(createGpuXeVMAttachTarget(xevmTargetOptions));`
`52`	`52`	`}`
`53`		`- pm.addNestedPass<gpu::GPUModuleOp>(createLowerAffinePass());`
	`53`	`+ pm.addPass(createLowerAffinePass());`
`54`	`54`	`pm.addNestedPass<func::FuncOp>(createGpuAsyncRegionPass());`
`55`	`55`	`}`
`56`	`56`
`@@ -84,14 +84,14 @@ void buildGpuPassPipeline(OpPassManager &pm,`
`84`	`84`	`createConvertGpuOpsToLLVMSPVOps(gpuToLLVMSPVOptions));`
`85`	`85`	`}`
`86`	`86`	`pm.addNestedPass<gpu::GPUModuleOp>(createCSEPass());`
	`87`	`+ pm.addPass(createReconcileUnrealizedCastsPass());`
`87`	`88`	`}`
`88`	`89`
`89`	`90`	`//===----------------------------------------------------------------------===//`
`90`	`91`	`// Host Post-GPU pipeline`
`91`	`92`	`//===----------------------------------------------------------------------===//`
`92`	`93`	`void buildHostPostPipeline(OpPassManager &pm,`
`93`	`94`	`const mlir::gpu::GPUToXeVMPipelineOptions &options) {`
`94`		`- pm.addPass(createReconcileUnrealizedCastsPass());`
`95`	`95`	`pm.addPass(createSCFToControlFlowPass());`
`96`	`96`	`pm.addPass(memref::createExpandStridedMetadataPass());`
`97`	`97`	`{`