Add GPUToXeVM lowering pipeline pass.

mshahneo · mshahneo · commit 82f34dd9c37c · 2025-10-14T21:15:43.000Z
It's the default GPU to XeVM lowering pipeline. It starts by lowering GPU
code to the specified compilation target (default is fatbin),
then lowers the host code.
If XeGPU ops are used, it expects the MLIR code to have
XeGPU ops already embedded in gpu code.
diff --git a/mlir/include/mlir/Dialect/GPU/Pipelines/Passes.h b/mlir/include/mlir/Dialect/GPU/Pipelines/Passes.h
@@ -1,4 +1,4 @@
-//===- Passes.h - GPU NVVM pipeline entry points --------------------------===//
+//===- Passes.h - GPU NVVM/XeVM pipeline entry points----------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -60,6 +60,47 @@ struct GPUToNVVMPipelineOptions
       llvm::cl::init(false)};
 };
 
+// Options for the gpu to xevm pipeline.
+struct GPUToXeVMPipelineOptions
+    : public PassPipelineOptions<GPUToXeVMPipelineOptions> {
+  // General lowering controls.
+  PassOptions::Option<int64_t> indexBitWidth{
+      *this, "index-bitwidth",
+      llvm::cl::desc("Bitwidth of the index type (host & device)"),
+      llvm::cl::init(64)};
+  PassOptions::Option<bool> kernelBarePtrCallConv{
+      *this, "kernel-bare-ptr-calling-convention",
+      llvm::cl::desc("Use bare pointer calling convention for device kernels"),
+      llvm::cl::init(false)};
+  PassOptions::Option<bool> hostBarePtrCallConv{
+      *this, "host-bare-ptr-calling-convention",
+      llvm::cl::desc("Use bare pointer calling convention for host launches"),
+      llvm::cl::init(false)};
+  PassOptions::Option<std::string> binaryFormat{
+      *this, "binary-format",
+      llvm::cl::desc("Final GPU binary emission format (e.g. fatbin)"),
+      llvm::cl::init("fatbin")};
+  // Options mirroring xevm-attach-target (GpuXeVMAttachTarget).
+  PassOptions::Option<std::string> xevmModuleMatcher{
+      *this, "xevm-module-matcher",
+      llvm::cl::desc("Regex to match gpu.module names for XeVM target attach"),
+      llvm::cl::init("")};
+  PassOptions::Option<std::string> zebinTriple{
+      *this, "zebin-triple", llvm::cl::desc("Target triple for XeVM codegen"),
+      llvm::cl::init("spirv64-unknown-unknown")};
+  PassOptions::Option<std::string> zebinChip{
+      *this, "zebin-chip", llvm::cl::desc("Target chip (e.g. pvc, bmg)"),
+      llvm::cl::init("bmg")};
+  PassOptions::Option<unsigned> optLevel{
+      *this, "opt-level",
+      llvm::cl::desc("Optimization level for attached target/codegen"),
+      llvm::cl::init(2)};
+  PassOptions::Option<std::string> cmdOptions{
+      *this, "igc-cmd-options",
+      llvm::cl::desc("Additional downstream compiler command line options"),
+      llvm::cl::init("")};
+};
+
 //===----------------------------------------------------------------------===//
 // Building and Registering.
 //===----------------------------------------------------------------------===//
@@ -70,8 +111,15 @@ struct GPUToNVVMPipelineOptions
 void buildLowerToNVVMPassPipeline(OpPassManager &pm,
                                   const GPUToNVVMPipelineOptions &options);
 
+/// Adds the GPU to XeVM pipeline to the given pass manager. Transforms main
+/// dialects into XeVM targets. Begins with GPU code regions, then handles host
+/// code.
+void buildLowerToXeVMPassPipeline(OpPassManager &pm,
+                                  const GPUToXeVMPipelineOptions &options);
+
 /// Register all pipeleines for the `gpu` dialect.
 void registerGPUToNVVMPipeline();
+void registerGPUToXeVMPipeline();
 
 } // namespace gpu
 } // namespace mlir
diff --git a/mlir/lib/Dialect/GPU/Pipelines/CMakeLists.txt b/mlir/lib/Dialect/GPU/Pipelines/CMakeLists.txt
@@ -1,5 +1,6 @@
 add_mlir_dialect_library(MLIRGPUPipelines
   GPUToNVVMPipeline.cpp
+  GPUToXeVMPipeline.cpp
 
   ADDITIONAL_HEADER_DIRS
   ${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/GPU
@@ -12,11 +13,15 @@ add_mlir_dialect_library(MLIRGPUPipelines
   MLIRLinalgTransforms
   MLIRAffineToStandard
   MLIRGPUToNVVMTransforms
+  MLIRXeGPUToXeVM
   MLIRIndexToLLVM
   MLIRMathToLLVM
   MLIRNVGPUToNVVM
   MLIRNVVMToLLVM
   MLIRReconcileUnrealizedCasts
   MLIRSCFToControlFlow
   MLIRVectorToSCF
+  MLIRXeGPUTransforms
+  MLIRXeGPUToXeVM
+  MLIRXeVMToLLVM
 )
diff --git a/mlir/lib/Dialect/GPU/Pipelines/GPUToXeVMPipeline.cpp b/mlir/lib/Dialect/GPU/Pipelines/GPUToXeVMPipeline.cpp
@@ -0,0 +1,138 @@
+//===- GPUToXeVMPipeline.cpp - Lowering pipeline to XeVM/LLVM -------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a pass for testing the lowering to XeVM as a generally
+// usable sink pass. If XeGPU ops are used, it expects the MLIR code to have
+// XeGPU ops already embedded in gpu code.
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Conversion/AffineToStandard/AffineToStandard.h"
+#include "mlir/Conversion/ArithToLLVM/ArithToLLVM.h"
+#include "mlir/Conversion/ControlFlowToLLVM/ControlFlowToLLVM.h"
+#include "mlir/Conversion/FuncToLLVM/ConvertFuncToLLVMPass.h"
+#include "mlir/Conversion/IndexToLLVM/IndexToLLVM.h"
+#include "mlir/Conversion/MathToLLVM/MathToLLVM.h"
+#include "mlir/Conversion/MemRefToLLVM/MemRefToLLVM.h"
+#include "mlir/Conversion/Passes.h"
+#include "mlir/Conversion/SCFToControlFlow/SCFToControlFlow.h"
+#include "mlir/Conversion/VectorToSCF/VectorToSCF.h"
+#include "mlir/Conversion/XeGPUToXeVM/XeGPUToXeVM.h"
+#include "mlir/Conversion/XeVMToLLVM/XeVMToLLVM.h"
+#include "mlir/Dialect/Func/IR/FuncOps.h"
+#include "mlir/Dialect/GPU/IR/GPUDialect.h"
+#include "mlir/Dialect/GPU/Pipelines/Passes.h"
+#include "mlir/Dialect/GPU/Transforms/Passes.h"
+#include "mlir/Dialect/LLVMIR/Transforms/RequestCWrappers.h"
+#include "mlir/Dialect/MemRef/Transforms/Passes.h"
+#include "mlir/Dialect/XeGPU/Transforms/Passes.h"
+#include "mlir/Pass/PassManager.h"
+#include "mlir/Pass/PassOptions.h"
+#include "mlir/Target/LLVM/XeVM/Target.h"
+#include "mlir/Transforms/Passes.h"
+
+using namespace mlir;
+
+namespace {
+//===----------------------------------------------------------------------===//
+// Common pipeline
+//===----------------------------------------------------------------------===//
+void buildCommonPassPipeline(
+    OpPassManager &pm, const mlir::gpu::GPUToXeVMPipelineOptions &options) {
+  // builtin.module scope passes
+  pm.addPass(createCSEPass());
+  {
+    GpuXeVMAttachTargetOptions xevmTargetOptions;
+    xevmTargetOptions.moduleMatcher = options.xevmModuleMatcher;
+    xevmTargetOptions.triple = options.zebinTriple;
+    xevmTargetOptions.chip = options.zebinChip;
+    xevmTargetOptions.optLevel = options.optLevel;
+    xevmTargetOptions.cmdOptions = options.cmdOptions;
+    pm.addPass(createGpuXeVMAttachTarget(xevmTargetOptions));
+  }
+}
+
+//===----------------------------------------------------------------------===//
+// GPUModule-specific stuff.
+//===----------------------------------------------------------------------===//
+void buildGpuPassPipeline(OpPassManager &pm,
+                          const mlir::gpu::GPUToXeVMPipelineOptions &options) {
+  pm.addNestedPass<gpu::GPUModuleOp>(xegpu::createXeGPUWgToSgDistribute());
+  pm.addNestedPass<gpu::GPUModuleOp>(createCSEPass());
+  pm.addNestedPass<gpu::GPUModuleOp>(createLowerAffinePass());
+  pm.addNestedPass<gpu::GPUModuleOp>(xegpu::createXeGPUBlocking());
+  pm.addNestedPass<gpu::GPUModuleOp>(createCanonicalizerPass());
+  pm.addNestedPass<gpu::GPUModuleOp>(createCSEPass());
+  pm.addNestedPass<gpu::GPUModuleOp>(xegpu::createXeGPUPropagateLayout());
+  pm.addNestedPass<gpu::GPUModuleOp>(xegpu::createXeGPUSubgroupDistribute());
+  pm.addNestedPass<gpu::GPUModuleOp>(createCanonicalizerPass());
+  pm.addNestedPass<gpu::GPUModuleOp>(createCSEPass());
+  pm.addNestedPass<gpu::GPUModuleOp>(createLoopInvariantCodeMotionPass());
+  pm.addNestedPass<gpu::GPUModuleOp>(createCSEPass());
+  pm.addNestedPass<gpu::GPUModuleOp>(xegpu::createXeGPUVectorLinearize());
+  pm.addNestedPass<gpu::GPUModuleOp>(createConvertXeGPUToXeVMPass());
+  ConvertGpuOpsToLLVMSPVOpsOptions gpuToLLVMSPVOptions;
+  gpuToLLVMSPVOptions.use64bitIndex = options.indexBitWidth;
+  pm.addNestedPass<gpu::GPUModuleOp>(
+      createConvertGpuOpsToLLVMSPVOps(gpuToLLVMSPVOptions));
+  pm.addNestedPass<gpu::GPUModuleOp>(createConvertXeVMToLLVMPass());
+  pm.addNestedPass<gpu::GPUModuleOp>(createCSEPass());
+}
+
+//===----------------------------------------------------------------------===//
+// Host Post-GPU pipeline
+//===----------------------------------------------------------------------===//
+void buildHostPostPipeline(OpPassManager &pm,
+                           const mlir::gpu::GPUToXeVMPipelineOptions &options) {
+  pm.addNestedPass<func::FuncOp>(LLVM::createLLVMRequestCWrappersPass());
+  pm.addNestedPass<func::FuncOp>(createGpuAsyncRegionPass());
+  pm.addPass(createReconcileUnrealizedCastsPass());
+  pm.addPass(createConvertVectorToSCFPass());
+  pm.addPass(createSCFToControlFlowPass());
+  pm.addPass(memref::createExpandStridedMetadataPass());
+  pm.addPass(createFinalizeMemRefToLLVMConversionPass());
+  {
+    GpuToLLVMConversionPassOptions gpuToLLVMOptions;
+    gpuToLLVMOptions.hostBarePtrCallConv = options.hostBarePtrCallConv;
+    gpuToLLVMOptions.kernelBarePtrCallConv = options.kernelBarePtrCallConv;
+    pm.addPass(createGpuToLLVMConversionPass(gpuToLLVMOptions));
+  }
+  pm.addPass(createConvertToLLVMPass());
+  pm.addPass(createLowerAffinePass());
+  // gpu-module-to-binary
+  {
+    GpuModuleToBinaryPassOptions gpuToModuleBinOptions;
+    gpuToModuleBinOptions.compilationTarget = options.binaryFormat;
+    gpuToModuleBinOptions.cmdOptions = options.cmdOptions;
+    pm.addPass(createGpuModuleToBinaryPass(gpuToModuleBinOptions));
+  }
+  pm.addPass(createReconcileUnrealizedCastsPass());
+}
+} // namespace
+
+void mlir::gpu::buildLowerToXeVMPassPipeline(
+    OpPassManager &pm, const GPUToXeVMPipelineOptions &options) {
+  // Common pipelines
+  buildCommonPassPipeline(pm, options);
+
+  // GPUModule-specific stuff
+  buildGpuPassPipeline(pm, options);
+
+  // Host post-GPUModule-specific stuff
+  buildHostPostPipeline(pm, options);
+}
+
+void mlir::gpu::registerGPUToXeVMPipeline() {
+  PassPipelineRegistration<GPUToXeVMPipelineOptions>(
+      "gpu-lower-to-xevm-pipeline",
+      "The default GPU to XeVM lowering pipeline. It starts by lowering GPU "
+      "code to the "
+      "specified compilation target (default is fatbin) then lowers the host "
+      "code.",
+      buildLowerToXeVMPassPipeline);
+}
diff --git a/mlir/lib/RegisterAllPasses.cpp b/mlir/lib/RegisterAllPasses.cpp
@@ -98,4 +98,5 @@ void mlir::registerAllPasses() {
   sparse_tensor::registerSparseTensorPipelines();
   tosa::registerTosaToLinalgPipelines();
   gpu::registerGPUToNVVMPipeline();
+  gpu::registerGPUToXeVMPipeline();
 }

Original file line number	Diff line number	Diff line change
`@@ -98,4 +98,5 @@ void mlir::registerAllPasses() {`
`98`	`98`	`sparse_tensor::registerSparseTensorPipelines();`
`99`	`99`	`tosa::registerTosaToLinalgPipelines();`
`100`	`100`	`gpu::registerGPUToNVVMPipeline();`
	`101`	`+ gpu::registerGPUToXeVMPipeline();`
`101`	`102`	`}`