Linter fix + add annotate_module for gluon

mieshkiwrk · mieshkiwrk · commit 024f495fee62 · 2025-12-01T09:30:38.000Z
diff --git a/python/src/gluon_ir.cc b/python/src/gluon_ir.cc
@@ -900,12 +900,14 @@ void init_gluon_ir(py::module &&m) {
              return self.create<ttag::ArriveBarrierOp>(memDesc, count);
            })
       .def("create_prefetch",
-           [](GluonOpBuilder &self, Value tensorDesc, std::vector<Value> &offsets,
-              bool isVolatile) {
+           [](GluonOpBuilder &self, Value tensorDesc,
+              std::vector<Value> &offsets, bool isVolatile) {
              // Get the base pointer from tensor descriptor
-             auto makeTensorDescOp = tensorDesc.getDefiningOp<triton::MakeTensorDescOp>();
+             auto makeTensorDescOp =
+                 tensorDesc.getDefiningOp<triton::MakeTensorDescOp>();
              if (!makeTensorDescOp) {
-               throw std::runtime_error("Expected tensor descriptor from MakeTensorDescOp");
+               throw std::runtime_error(
+                   "Expected tensor descriptor from MakeTensorDescOp");
              }
 
              Value base = makeTensorDescOp.getBase();
@@ -918,7 +920,8 @@ void init_gluon_ir(py::module &&m) {
              // variadic of 64-bit signless integer, but got 'i32'
              SmallVector<Value> i64Shape;
              for (auto shapeVal : shape) {
-               auto i64Val = self.create<arith::ExtSIOp>(self.getBuilder().getI64Type(), shapeVal);
+               auto i64Val = self.create<arith::ExtSIOp>(
+                   self.getBuilder().getI64Type(), shapeVal);
                i64Shape.push_back(i64Val);
              }
 
@@ -944,11 +947,14 @@ void init_gluon_ir(py::module &&m) {
              // Empty mask
              Value maskVal = Value();
 
-             auto tensorPtrOp = self.create<mlir::triton::MakeTensorPtrOp>(base, /*shape*/i64Shape, strides, offsets,
-                                                 /*tensor_shape*/blockShape, order);
+             auto tensorPtrOp = self.create<mlir::triton::MakeTensorPtrOp>(
+                 base, /*shape*/ i64Shape, strides, offsets,
+                 /*tensor_shape*/ blockShape, order);
 
              auto op = self.create<ttgi::PrefetchOp>(
-                  /*base*/tensorPtrOp.getResult(), maskVal, tt::CacheModifier::NONE, tt::EvictionPolicy::NORMAL, isVolatile);
+                 /*base*/ tensorPtrOp.getResult(), maskVal,
+                 tt::CacheModifier::NONE, tt::EvictionPolicy::NORMAL,
+                 isVolatile);
              return op.getOperation();
            })
       // Example for passing block_ptr
@@ -961,7 +967,8 @@ void init_gluon_ir(py::module &&m) {
       //        Value maskVal = Value();
       //
       //        self.create<ttgi::PrefetchOp>(
-      //            ptr, maskVal, tt::CacheModifier::NONE, tt::EvictionPolicy::NORMAL, isVolatile);
+      //            ptr, maskVal, tt::CacheModifier::NONE,
+      //            tt::EvictionPolicy::NORMAL, isVolatile);
       //      })
       ;
 
diff --git a/python/src/ir.cc b/python/src/ir.cc
@@ -649,10 +649,8 @@ void init_triton_ir(py::module &&m) {
                return py::none();
              return py::str(ret.getValue().str());
            })
-      .def("set_attr",
-         [](Operation &self, const std::string &name, Attribute &attr) {
-           self.setAttr(name, attr);
-         });
+      .def("set_attr", [](Operation &self, const std::string &name,
+                          Attribute &attr) { self.setAttr(name, attr); });
 
   // dynamic_attr is used to transfer ownership of the MLIR context to the
   // module
@@ -1534,7 +1532,7 @@ void init_triton_ir(py::module &&m) {
            })
       .def("create_descriptor_store",
            [](TritonOpBuilder &self, Value desc, Value value,
-              std::vector<Value> &indices) -> Operation* {//void {
+              std::vector<Value> &indices) -> Operation * { // void {
              auto op = self.create<DescriptorStoreOp>(desc, value, indices);
              return op.getOperation();
            })
diff --git a/python/triton/experimental/gluon/language/intel/xpu/__init__.py b/python/triton/experimental/gluon/language/intel/xpu/__init__.py
@@ -1,4 +1,3 @@
 from . import xe
 
 __all__ = ["xe"]
-
diff --git a/python/triton/experimental/gluon/language/intel/xpu/xe.py b/python/triton/experimental/gluon/language/intel/xpu/xe.py
@@ -2,23 +2,19 @@
 
 from typing import List, Tuple, Sequence
 from dataclasses import dataclass
-import triton.language.core as tl_core
 
 import triton.experimental.gluon.language._core as ttgl
 from triton.experimental.gluon.language._layouts import DotOperandLayout
 from triton.experimental.gluon.language.intel._layouts import IntelDPASLayout
 from triton.experimental.gluon.language._core import builtin, _unwrap_if_constexpr
 from triton.language.core import ir, constexpr, tensor_descriptor_base, block_type, tensor, tuple
 
-
 # load_tensor_descriptor = builtin(tl_core.load_tensor_descriptor)
 # store_tensor_descriptor = builtin(tl_core.store_tensor_descriptor)
 
-
 __all__ = ["make_tensor_descriptor", "dot_fma"]
 
 
-
 class tensor_descriptor(tensor_descriptor_base):
     """A descriptor representing a tensor in global memory."""
 
@@ -31,12 +27,9 @@ def __init__(self, handle, shape: List[tensor], strides: List[tensor], block_typ
         self.strides = tuple(strides)
         self.layout = layout
 
-        self.type = tensor_descriptor_type(
-            block_type,
-            shape_type=self.shape.type,
-            strides_type=self.strides.type,
-            layout=self.layout, # comment
-        )
+        self.type = tensor_descriptor_type(block_type, shape_type=self.shape.type, strides_type=self.strides.type,
+                                           layout=self.layout,  # comment
+                                           )
 
     def _flatten_ir(self, handles: List[ir.value]) -> None:
         handles.append(self.handle)
@@ -72,14 +65,14 @@ def store(self, offsets: Sequence[constexpr | tensor], value: tensor, is_2d_bloc
         return op
 
     @builtin
-    def prefetch(self, offsets: Sequence[constexpr | tensor], mask=None, cache=None, evict=None, is_volatile=False, is_2d_block=False, _semantic=None):
+    def prefetch(self, offsets: Sequence[constexpr | tensor], mask=None, cache=None, evict=None, is_volatile=False,
+                 is_2d_block=False, _semantic=None):
         # TODO: handle other ttig.prefetch params
         # ptr is just temporary, support for tensor descriptor is needed
         # calculate offsets like tt.advance
         # maybe add support for mask, seems optional
         # also 2d block attr and others
         #return _semantic.builder.create_prefetch(ptr.handle, False)
-
         """
         pyton/triton/language/semantic.py @ load:1077 (TritonSemantic)
         cache_modifier: str, eviction_policy: str
@@ -98,7 +91,6 @@ def prefetch(self, offsets: Sequence[constexpr | tensor], mask=None, cache=None,
         return op
 
 
-
 @dataclass(eq=True)
 class tensor_descriptor_type(ttgl.base_type):
     """The type for a tensor descriptor."""
@@ -137,9 +129,8 @@ def mangle(self) -> str:
 
 
 @builtin
-def make_tensor_descriptor(ptr: ttgl.tensor, shape: List[int], strides: List[int],
-                          block_shape: List[int], layout: IntelDPASLayout,
-                          _semantic=None) -> tensor_descriptor:
+def make_tensor_descriptor(ptr: ttgl.tensor, shape: List[int], strides: List[int], block_shape: List[int],
+                           layout: IntelDPASLayout, _semantic=None) -> tensor_descriptor:
     # Unwrap constexpr if needed
     layout = _unwrap_if_constexpr(layout)
 
@@ -168,20 +159,16 @@ def make_tensor_descriptor(ptr: ttgl.tensor, shape: List[int], strides: List[int
     shape_tuple = ttgl.tuple(shape_tensors, shape_type)
     strides_tuple = ttgl.tuple(stride_tensors, strides_type)
 
-    desc_type = tensor_descriptor_type(block_type, shape_type, strides_type, layout) #, shape_handles)
+    desc_type = tensor_descriptor_type(block_type, shape_type, strides_type, layout)  #, shape_handles)
 
     # Create the descriptor
     padding = _semantic._str_to_padding_option("zero")
-    desc_handle = _semantic.builder.create_make_tensor_descriptor(
-        desc_type._to_ir(_semantic.builder),
-        ptr_handle,
-        shape_handles,
-        stride_handles,
-        padding
-    )
+    desc_handle = _semantic.builder.create_make_tensor_descriptor(desc_type._to_ir(_semantic.builder), ptr_handle,
+                                                                  shape_handles, stride_handles, padding)
 
     return tensor_descriptor(desc_handle, shape_tuple, strides_tuple, block_type, layout)
 
+
 @builtin
 def dot_fma(a, b, acc, _semantic=None):
     assert isinstance(a, tensor), "a must be a tensor"
@@ -199,4 +186,3 @@ def dot_fma(a, b, acc, _semantic=None):
 
     handle = _semantic.dot(a, b, acc, input_precision=None, max_num_imprecise_acc=None, out_dtype=acc.dtype).handle
     return tensor(handle, acc.type)
-
diff --git a/third_party/intel/backend/compiler.py b/third_party/intel/backend/compiler.py
@@ -29,7 +29,7 @@ class XPUOptions:
     num_ctas: int = 1
     num_stages: int = 2
     cluster_dims: tuple = (1, 1, 1)
-    warp_size: int = 16 #32 # TODO:[mdziado]
+    warp_size: int = 16  #32 # TODO:[mdziado]
     optimize_epilogue: bool = False
     enable_fp_fusion: bool = True
     launch_cooperative_grid: bool = False
@@ -306,8 +306,15 @@ def make_ttgir(cls, mod, metadata, opt, properties):
         metadata["cluster_dims"] = (cluster_info.clusterDimX, cluster_info.clusterDimY, cluster_info.clusterDimZ)
         return mod
 
-    def gluon_to_ttgir(self, src, metadata, options):
-        mod = src
+    def gluon_to_ttgir(self, mod, metadata, options):
+        pm = ir.pass_manager(mod.context)
+        pm.enable_debug()
+
+        module_opts = intel.passes.ttgpuir.AnnotateModuleOptions()
+        self.annotate_module(module_opts, self.properties, options)
+        intel.passes.ttgpuir.add_triton_annotate_module(pm, module_opts)
+        pm.run(mod, 'annotate_module')
+
         pm = ir.pass_manager(mod.context)
         pm.enable_debug()
 
diff --git a/third_party/intel/lib/Dialect/Triton/Transforms/TensorDescToBlockPointer.cpp b/third_party/intel/lib/Dialect/Triton/Transforms/TensorDescToBlockPointer.cpp
@@ -1,13 +1,13 @@
 #include "intel/include/Dialect/Triton/Transforms/Passes.h"
 #include "intel/include/Dialect/TritonGEN/IR/TritonGENDialect.h"
+#include "intel/include/Dialect/TritonIntelGPU/IR/Dialect.h"
 #include "intel/include/Utils/Utility.h"
 #include "mlir/IR/BuiltinTypes.h"
 #include "mlir/IR/Verifier.h"
 #include "mlir/Interfaces/LoopLikeInterface.h"
 #include "triton/Dialect/Triton/IR/Dialect.h"
-#include "intel/include/Dialect/TritonIntelGPU/IR/Dialect.h"
-#include "triton/Dialect/TritonGPU/IR/Dialect.h"
 #include "triton/Dialect/Triton/IR/Types.h"
+#include "triton/Dialect/TritonGPU/IR/Dialect.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/TypeSwitch.h"
 #include "llvm/Support/Debug.h"
@@ -143,7 +143,8 @@ struct TritonIntelTensorDescToBlockPointer
       auto tensorType = RankedTensorType::get(
           SmallVector<int64_t>(sizes.begin(), sizes.end()),
           pointerType.getPointeeType(), encoding);
-      auto resultType = mlir::triton::PointerType::get(tensorType, pointerType.getAddressSpace());
+      auto resultType = mlir::triton::PointerType::get(
+          tensorType, pointerType.getAddressSpace());
 
       auto makeTensorPtr = builder.create<tt::MakeTensorPtrOp>(
           loc, resultType, base, shape, strides, offsets,
@@ -278,8 +279,9 @@ struct TritonIntelTensorDescToBlockPointer
           /*padding*/ std::nullopt, op.getCache(), op.getEvict(),
           /*volatile*/ false);
       if (blockIOAttr) {
-          auto* loadOpInst = loadOp.getDefiningOp();
-          loadOpInst->setAttr(ttgi::TritonIntelGPUDialect::getBlockIOAttrName(), blockIOAttr);
+        auto *loadOpInst = loadOp.getDefiningOp();
+        loadOpInst->setAttr(ttgi::TritonIntelGPUDialect::getBlockIOAttrName(),
+                            blockIOAttr);
       }
       LLVM_DEBUG(llvm::dbgs().indent(2) << loadOp << "\n");
       op.replaceAllUsesWith(loadOp);
@@ -288,7 +290,8 @@ struct TritonIntelTensorDescToBlockPointer
           loc, ptr, op.getSrc(), boundaryCheck, tt::CacheModifier::NONE,
           tt::EvictionPolicy::NORMAL);
       if (blockIOAttr) {
-          storeOp->setAttr(ttgi::TritonIntelGPUDialect::getBlockIOAttrName(), blockIOAttr);
+        storeOp->setAttr(ttgi::TritonIntelGPUDialect::getBlockIOAttrName(),
+                         blockIOAttr);
       }
       LLVM_DEBUG(llvm::dbgs().indent(2) << storeOp << "\n");
     }

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,3 @@`
`1`	`1`	`from . import xe`
`2`	`2`
`3`	`3`	`__all__ = ["xe"]`
`4`		`-`