Skip to content

Commit b24f09b

Browse files
committed
Address review comments.
1 parent 14487e3 commit b24f09b

File tree

3 files changed

+6
-4
lines changed

3 files changed

+6
-4
lines changed

mlir/include/mlir/Dialect/GPU/Pipelines/Passes.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
//===- Passes.h - GPU pipeline entry points----------------------===//
1+
//===- Passes.h - GPU pipeline entry points--------------------------------===//
22
//
33
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
44
// See https://llvm.org/LICENSE.txt for license information.

mlir/lib/Dialect/GPU/Pipelines/GPUToXeVMPipeline.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ void buildCommonPassPipeline(
5050
xevmTargetOptions.cmdOptions = options.cmdOptions;
5151
pm.addPass(createGpuXeVMAttachTarget(xevmTargetOptions));
5252
}
53-
pm.addNestedPass<gpu::GPUModuleOp>(createLowerAffinePass());
53+
pm.addPass(createLowerAffinePass());
5454
pm.addNestedPass<func::FuncOp>(createGpuAsyncRegionPass());
5555
}
5656

@@ -84,14 +84,14 @@ void buildGpuPassPipeline(OpPassManager &pm,
8484
createConvertGpuOpsToLLVMSPVOps(gpuToLLVMSPVOptions));
8585
}
8686
pm.addNestedPass<gpu::GPUModuleOp>(createCSEPass());
87+
pm.addPass(createReconcileUnrealizedCastsPass());
8788
}
8889

8990
//===----------------------------------------------------------------------===//
9091
// Host Post-GPU pipeline
9192
//===----------------------------------------------------------------------===//
9293
void buildHostPostPipeline(OpPassManager &pm,
9394
const mlir::gpu::GPUToXeVMPipelineOptions &options) {
94-
pm.addPass(createReconcileUnrealizedCastsPass());
9595
pm.addPass(createSCFToControlFlowPass());
9696
pm.addPass(memref::createExpandStridedMetadataPass());
9797
{

mlir/test/Integration/Dialect/XeGPU/WG/simple_gemm.mlir

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,9 @@ module @gemm attributes {gpu.container_module} {
2626
gpu.memcpy %B_gpu, %B : memref<256x256xf16>, memref<256x256xf16>
2727
%C_gpu = gpu.alloc () : memref<256x256xf32>
2828
gpu.memcpy %C_gpu, %C : memref<256x256xf32>, memref<256x256xf32>
29-
// NOTE: Here we can't use [8, 64] wi threads following the SG thread layout of [8, 4]. Because runtime will linearize the x dimension first (we need y dimension to be linearized first).
29+
// NOTE: Here we can't use [8, 64] wi threads following
30+
// the SG thread layout of [8, 4]. Because runtime will linearize
31+
// the x dimension first (we need y dimension to be linearized first).
3032
// So just use linearized thread layout of [512, 1] wi threads.
3133
gpu.launch_func @test_kernel::@test_kernel blocks in (%c1, %c1, %c1) threads in (%c512, %c1, %c1) args(%A_gpu : memref<256x256xf16>, %B_gpu : memref<256x256xf16>, %C_gpu : memref<256x256xf32>)
3234
gpu.wait // Wait for the kernel to finish.

0 commit comments

Comments
 (0)