save work

charithaintc · charithaintc · commit 5f0d164fbecc · 2025-04-24T21:44:40.000Z
diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPU.h b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPU.h
@@ -34,12 +34,4 @@ class TensorDescType;
 #define GET_OP_CLASSES
 #include <mlir/Dialect/XeGPU/IR/XeGPU.h.inc>
 
-namespace mlir {
-namespace xegpu {
-FailureOr<VectorType> getDistributedVectorType(VectorType originalType,
-                                               LayoutAttr layout);
-FailureOr<VectorType> getDistributedVectorType(xegpu::TensorDescType tdescTy);
-} // namespace xegpu
-} // namespace mlir
-
 #endif // MLIR_DIALECT_XEGPU_IR_XEGPU_H
diff --git a/mlir/lib/Dialect/XeGPU/CMakeLists.txt b/mlir/lib/Dialect/XeGPU/CMakeLists.txt
@@ -1,2 +1,3 @@
 add_subdirectory(IR)
 add_subdirectory(Transforms)
+add_subdirectory(Utils)
diff --git a/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp b/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp
@@ -312,87 +312,6 @@ LogicalResult TensorDescType::verify(
   return success();
 }
 
-// If tensor descriptor has a layout attribute it is used in SIMT mode.
-// In this mode, the distributed vector shape is determined as follows:
-// Definitions:
-//        lane_data_size = lane_data[0] × lane_data[1]
-//        subgroup_size = lane_layout[0] × lane_layout[1]
-//        distribution_unit_size = subgroup_size × lane_data_size
-// ---------------------------------------------------------------------
-// Case 1: Regular loads/stores.
-// ---------------------------------------------------------------------
-// The following conditions must be met:
-//        * tensor_desc[0] == lane_layout[0]
-// Distributed vector is a 1D vector with shape:
-//        [chunk_size]
-// ---------------------------------------------------------------------
-// Case 2: Block loads/stores
-// ---------------------------------------------------------------------
-// Additional definitions:
-//        tensor_size = tensor_desc[0] * .. * tensor_desc[r-1] * array_length
-//        n_distribution_units = tensor_size / distribution_unit_size
-//        fragment_size = n_distribution_units * lane_data_size
-// Given above definitions, the following conditions must be met:
-//        * tensor_desc[0] % (lane_layout[0] × lane_data[0]) == 0
-//        * tensor_desc[1] % (lane_layout[1] × lane_data[1]) == 0
-// Distributed vector is a 1D vector with shape:
-//        [fragment_size]
-FailureOr<VectorType> getDistributedVectorType(xegpu::TensorDescType tdescTy) {
-  auto layout = llvm::dyn_cast_if_present<LayoutAttr>(tdescTy.getLayout());
-  // It only works for subgroup level layout, which only has lane_layout
-  // and lane_data, and is to distribute a SIMD code into SIMT code.
-  if (!layout || !layout.isSgLayout())
-    return failure();
-
-  SmallVector<int64_t> laneData(layout.getLaneData().asArrayRef());
-  SmallVector<int64_t> laneLayout(layout.getLaneLayout().asArrayRef());
-  auto tdescShape = tdescTy.getShape();
-  auto elementType = tdescTy.getElementType();
-
-  // compute sgSize by multiply elements of laneLayout
-  // e.g. for 2D layout, sgSize = laneLayout[0] * laneLayout[1]
-  // e.g. for 1D layout, sgSize = laneLayout[0]
-  auto sgSize = std::accumulate(laneLayout.begin(), laneLayout.end(), 1,
-                                std::multiplies<int64_t>());
-
-  // Case 1: regular loads/stores
-  auto scatterAttr = tdescTy.getEncodingAsScatterTensorDescAttr();
-  if (scatterAttr) {
-    auto chunkSize = scatterAttr.getChunkSize().getInt();
-    // Verify if the first dimension of the tensor descriptor shape is
-    // distributable.
-    assert(tdescShape[0] == laneLayout[0] &&
-           "tensor descriptor shape is not distributable");
-    return VectorType::get({chunkSize}, elementType);
-  }
-
-  // Case 2: block loads/stores
-  // Check if the tensor descriptor shape is distributable.
-  int64_t tensorSize = 1;
-  for (auto [tdescDim, laneDim, laneDataDim] :
-       llvm::zip_equal(tdescShape, laneLayout, laneData)) {
-    assert((tdescDim % (laneDim * laneDataDim) == 0) &&
-           "tensor descriptor shape is not distributable");
-    tensorSize *= tdescDim;
-  }
-  // tensorSize must be adjusted for array_length.
-  tensorSize *= tdescTy.getArrayLength();
-
-  return VectorType::get({tensorSize / sgSize}, elementType);
-}
-
-// Helper to get the distributed vector type for a given vector type according
-// to a given LayoutAttr.
-FailureOr<VectorType> getDistributedVectorType(VectorType originalType,
-                                               LayoutAttr layout) {
-  auto shape = originalType.getShape();
-  auto helperTdescTy = xegpu::TensorDescType::get(
-      shape, originalType.getElementType(),
-      /*array_length=*/1, /*boundary_check=*/true,
-      /*memory_space=*/xegpu::MemorySpace::Global, layout);
-  return xegpu::getDistributedVectorType(helperTdescTy);
-}
-
 } // namespace xegpu
 } // namespace mlir
 
diff --git a/mlir/lib/Dialect/XeGPU/Transforms/CMakeLists.txt b/mlir/lib/Dialect/XeGPU/Transforms/CMakeLists.txt
@@ -16,5 +16,5 @@ add_mlir_dialect_library(MLIRXeGPUTransforms
   MLIRPass
   MLIRTransforms
   MLIRGPUDialect
-  MLIRXeGPUDialect
+  MLIRXeGPUUtils
 )
diff --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUSubgroupDistribute.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUSubgroupDistribute.cpp
@@ -17,6 +17,7 @@
 #include "mlir/Dialect/XeGPU/IR/XeGPU.h"
 #include "mlir/Dialect/XeGPU/Transforms/Passes.h"
 #include "mlir/Dialect/XeGPU/Transforms/Transforms.h"
+#include "mlir/Dialect/XeGPU/Utils/XeGPUUtils.h"
 #include "mlir/IR/AffineMap.h"
 #include "mlir/IR/Attributes.h"
 #include "mlir/IR/Builders.h"

Original file line number	Diff line number	Diff line change
`@@ -1,2 +1,3 @@`
`1`	`1`	`add_subdirectory(IR)`
`2`	`2`	`add_subdirectory(Transforms)`
	`3`	`+add_subdirectory(Utils)`
Original file line number	Diff line number	Diff line change
`@@ -16,5 +16,5 @@ add_mlir_dialect_library(MLIRXeGPUTransforms`
`16`	`16`	`MLIRPass`
`17`	`17`	`MLIRTransforms`
`18`	`18`	`MLIRGPUDialect`
`19`		`- MLIRXeGPUDialect`
	`19`	`+ MLIRXeGPUUtils`
`20`	`20`	`)`