rebase

nsmithtt · nsmithtt · commit b2e0a2b17033 · 2026-03-05T20:26:55.000Z
diff --git a/lib/Dialect/D2M/Transforms/GridSelection.cpp b/lib/Dialect/D2M/Transforms/GridSelection.cpp
@@ -552,9 +552,9 @@ static void insertViewForTTNNDRAMTensor(Value operand,
       fakeShardedShape, metalTensor.getElementType(), viewOutputLayout);
 
   builder.setInsertionPointAfter(castOp);
-  auto viewOp = builder.create<d2m::ViewLayoutOp>(
-      castOp.getLoc(), viewOutputTensor, castOp.getResult(),
-      AffineMapAttr::get(reblockMap));
+  auto viewOp = d2m::ViewLayoutOp::create(builder, castOp.getLoc(),
+                                          viewOutputTensor, castOp.getResult(),
+                                          AffineMapAttr::get(reblockMap));
   castOp.getResult().replaceAllUsesExcept(viewOp.getResult(), viewOp);
 }
 
@@ -580,18 +580,20 @@ static void optimizeTTNNMetalLayoutCastOpGrid(
 
   builder.setInsertionPointAfter(castOp);
 
-  auto newViewLayoutOp = builder.create<d2m::ViewLayoutOp>(
-      castOp.getLoc(), newTensorType, castOp.getResult(), gridRemapping);
+  auto newViewLayoutOp =
+      d2m::ViewLayoutOp::create(builder, castOp.getLoc(), newTensorType,
+                                castOp.getResult(), gridRemapping);
 
   // Reblock it back to original shape to preserve IR correctness.
   auto viewOutputType = utils::reblockTensor(
       newTensorType, outputLayout.getGridShape(outputType));
   auto reblockMap = ttmlir::utils::calculateReblockMap(
       newTensorType.getShape(), viewOutputType.getShape(),
       builder.getContext());
-  auto revertingView = builder.create<d2m::ViewLayoutOp>(
-      castOp.getLoc(), viewOutputType, newViewLayoutOp.getResult(), reblockMap,
-      /*reinterpretLayout=*/false);
+  auto revertingView =
+      d2m::ViewLayoutOp::create(builder, castOp.getLoc(), viewOutputType,
+                                newViewLayoutOp.getResult(), reblockMap,
+                                /*reinterpretLayout=*/false);
 
   castOp.getResult().replaceAllUsesExcept(revertingView.getResult(),
                                           newViewLayoutOp);
@@ -1244,223 +1246,6 @@ recreateGenericOp(d2m::GenericOp genericOp,
   }
 }
 
-static bool hasTTNNOperands(d2m::GenericOp genericOp) {
-  for (Value operand : genericOp.getInputsAndOutputs()) {
-    if (operand.getDefiningOp<ttir::TTNNMetalLayoutCastOp>()) {
-      return true;
-    }
-    // Check if view operand's input is the result of a TTNNMetalLayoutCastOp.
-    if (auto view = operand.getDefiningOp<d2m::ViewLayoutOp>();
-        view && view.getInput().getDefiningOp<ttir::TTNNMetalLayoutCastOp>()) {
-      return true;
-    }
-  }
-  return false;
-}
-
-// Computes the expected TTNN generic output grid shape for the given tensor.
-static llvm::SmallVector<llvm::SmallVector<int64_t>>
-computeTTNNGenericGridShapes(GenericOp genericOp,
-                             ArrayRef<int64_t> targetSquareGridShape) {
-
-  auto optimalOperandGrids = llvm::SmallVector<llvm::SmallVector<int64_t>>(
-      genericOp.getInputsAndOutputs().size());
-
-  // Determine dim size constraints based on L1 operands. L1 operands are
-  // assumed fixed and already legal; DRAM operand streams are aligned to match
-  // L1 shapes.
-  auto maybeConstrainedDims = genericOp.computeGridDimConstraints(
-      [&](ttcore::MetalLayoutAttr baseMetalLayout, bool isOutputOperand) {
-        return baseMetalLayout.getMemorySpace() ==
-               ttcore::MemorySpace::DeviceL1;
-      });
-  // this should be guaranteed if GenericOp verification is working.
-  TT_assertv(maybeConstrainedDims.has_value(),
-             "GenericOp dim constraints are cannot be satisfied.");
-  auto constrainedDims = maybeConstrainedDims.value();
-
-  auto indexingMaps = genericOp.getIndexingMapsValue();
-  auto getConstrainedDims = [&](int64_t operandIdx) {
-    return indexingMaps[operandIdx].compose(constrainedDims);
-  };
-  auto allDimsConstrained = [&](int64_t operandIdx) {
-    return llvm::all_of(getConstrainedDims(operandIdx),
-                        [](int64_t dim) { return dim != 0; });
-  };
-
-  // Set all grid shapes according to constraints
-  OpBuilder builder(genericOp->getContext());
-  for (auto [operandIdx, operand] :
-       llvm::enumerate(genericOp.getInputsAndOutputs())) {
-
-    auto constrainedDims = getConstrainedDims(operandIdx);
-    // if all dims are constrained, use the constrained dims.
-    if (allDimsConstrained(operandIdx)) {
-      optimalOperandGrids[operandIdx] = getConstrainedDims(operandIdx);
-    } else {
-      // if not all dims are constrained, shard to an optimal grid.
-      auto metalTensorType =
-          mlir::cast<mlir::RankedTensorType>(operand.getType());
-      auto baseMetalLayout =
-          mlir::cast<ttcore::MetalLayoutAttr>(metalTensorType.getEncoding());
-      auto constrainedDims = getConstrainedDims(operandIdx);
-
-      // Compute constrained target grid shape as min of targetSquareGridShape
-      // and constrainedDims (if constrainedDim > 0), else use
-      // targetSquareGridShape.
-      llvm::SmallVector<int64_t> constrainedTargetGridShape =
-          llvm::to_vector(targetSquareGridShape);
-      if (constrainedDims.size() == targetSquareGridShape.size()) {
-        for (size_t i = 0; i < targetSquareGridShape.size(); ++i) {
-          if (constrainedDims[i] > 0) {
-            constrainedTargetGridShape[i] =
-                std::min(constrainedDims[i], targetSquareGridShape[i]);
-          }
-        }
-      }
-
-      llvm::SmallVector<int64_t> physicalShape;
-      // If operand is DRAM interleaved operand that is the result of a
-      // ttnn->metal cast, we must generate a view of the underlying ttnn tensor
-      // _without_ padding, as the underlying tensor also is unpadded.
-      bool isNonPaddableTTNNDRAMOperand =
-          operand.getDefiningOp<ttir::TTNNMetalLayoutCastOp>() &&
-          baseMetalLayout.getMemorySpace() == ttcore::MemorySpace::DeviceDRAM &&
-          baseMetalLayout.getMemoryLayout() ==
-              ttcore::TensorMemoryLayout::Interleaved;
-      if (isNonPaddableTTNNDRAMOperand) {
-        llvm::SmallVector<int64_t> tileShape;
-        if (auto tileType = mlir::dyn_cast<ttcore::TileType>(
-                metalTensorType.getElementType())) {
-          tileShape = llvm::to_vector(tileType.getShape());
-        } else {
-          tileShape = llvm::to_vector(ttcore::TileType::getDefaultShape());
-        }
-        physicalShape = baseMetalLayout.getPhysicalShape(tileShape);
-      } else {
-        physicalShape =
-            computePhysicalShape(baseMetalLayout, metalTensorType,
-                                 constrainedTargetGridShape, builder);
-      }
-
-      optimalOperandGrids[operandIdx] = computeOptimalGrid(
-          metalTensorType, physicalShape, constrainedTargetGridShape);
-    }
-  }
-
-  return optimalOperandGrids;
-}
-
-// Finds and erases all unit reblocking views inserted by TTIRToD2M,
-// passing each view's input as the new operands.
-static void eraseUnitGridReblockingViews(d2m::GenericOp genericOp) {
-  // Use vector here to avoid invalidating iterator with erasures.
-  auto operands = llvm::to_vector(genericOp.getInputsAndOutputs());
-  for (Value operand : operands) {
-    if (auto viewOp = operand.getDefiningOp<d2m::ViewLayoutOp>()) {
-      auto originalOperand = viewOp.getInput();
-      viewOp.getResult().replaceAllUsesWith(originalOperand);
-      viewOp.erase();
-    }
-  }
-}
-
-// TTNN DRAM interleaved tensors are represented as having a 1x1 grid. This
-// leads to the genericOp having a worker grid of 1x1 since it must match the
-// output tensor grid. This is obviously not optimal. We match genericOps that
-// have TTNN DRAM interleaved tensors as operands and:
-// 1. Compute the "optimal" grid for the tensor as if it were a regular Metal
-// sharded tensor.
-// 2. Insert a view layout op to represent the tensor with the "optimal" grid.
-// 3. Update the genericOp to use the view output as an operand.
-//
-// Note the cast op is NOT erased as it represents the canonical layout mapping
-// between TTNN and Metal layouts.
-//
-// For a given TTNN DRAM interleaved tensor, we end up with the following
-// representations:
-// 1. The canonical translation of the TTNN tensor to a Metal tensor, having
-// a metal layout, DRAM memory space, and a 1x1 grid.
-//
-// 2. The "reblocked" version of tensor 1, having a metal layout, DRAM memory
-// space, an inferred grid, and an index map to index into the original
-// tensor.
-//
-// A view layout op  is used here so that the Allocator pass retains
-// ownership of stream insertion and buffer count selection.
-static llvm::SmallVector<llvm::SmallVector<int64_t>>
-insertTTNNDRAMViews(d2m::GenericOp genericOp,
-                    ArrayRef<int64_t> targetSquareGridShape) {
-
-  eraseUnitGridReblockingViews(genericOp);
-
-  auto optimalOperandGrids =
-      computeTTNNGenericGridShapes(genericOp, targetSquareGridShape);
-
-  OpBuilder builder(genericOp->getContext());
-  for (auto [operandIdx, operand] :
-       llvm::enumerate(genericOp.getInputsAndOutputs())) {
-    auto metalTensor = mlir::cast<mlir::RankedTensorType>(operand.getType());
-    auto baseMetalLayout =
-        mlir::cast<ttcore::MetalLayoutAttr>(metalTensor.getEncoding());
-    if (baseMetalLayout.getMemorySpace() != ttcore::MemorySpace::DeviceDRAM) {
-      continue;
-    }
-
-    // Do not "restream" metal -> ttnn -> metal sequences. This happens when the
-    // output of a generic is the input to another generic. The output is
-    // already streamed, but the cast back to ttnn silently erases the index
-    // map. Instead, we just forward the already streamed metal tensor to the
-    // current generic.
-    auto castOp = operand.getDefiningOp<ttir::TTNNMetalLayoutCastOp>();
-    TT_assertv(
-        castOp,
-        "If one d2m.generic operand is from TTNN, they must all be from TTNN.");
-    auto producerCastOp =
-        castOp.getInput().getDefiningOp<ttir::TTNNMetalLayoutCastOp>();
-    if (producerCastOp) {
-      castOp.getResult().replaceAllUsesExcept(producerCastOp.getInput(),
-                                              producerCastOp);
-      continue;
-    }
-
-    // TTNN DRAM interleaved tensors are represented as having a 1x1 grid.
-    llvm::SmallVector<int64_t> unitGridShape{1, 1};
-    llvm::SmallVector<int64_t> unShardedShapeWithGrid =
-        baseMetalLayout.getDeviceShape(unitGridShape,
-                                       ttcore::TileType::getDefaultShape());
-
-    llvm::SmallVector<int64_t> fakeShardedShape =
-        baseMetalLayout.getDeviceShape(optimalOperandGrids[operandIdx],
-                                       ttcore::TileType::getDefaultShape());
-
-    auto reblockMap = ttmlir::utils::calculateReblockMap(
-        unShardedShapeWithGrid, fakeShardedShape, builder.getContext());
-    auto viewOutputLayout = ttcore::MetalLayoutAttr::get(
-        builder.getContext(), baseMetalLayout.getLogicalShape(),
-        baseMetalLayout.getOobVal(), ttcore::MemorySpace::DeviceDRAM,
-        ttcore::TensorMemoryLayout::Interleaved,
-        baseMetalLayout.getCollapsedIntervals(),
-        baseMetalLayout.getDimAlignments());
-
-    auto viewOutputTensor = mlir::RankedTensorType::get(
-        fakeShardedShape, metalTensor.getElementType(), viewOutputLayout);
-
-    builder.setInsertionPointAfter(castOp);
-    auto viewOp =
-        d2m::ViewLayoutOp::create(builder, castOp.getLoc(), viewOutputTensor,
-                                  castOp.getResult(), reblockMap);
-    castOp.getResult().replaceAllUsesExcept(viewOp.getResult(), viewOp);
-  }
-
-  TT_assertv(llvm::all_of(optimalOperandGrids,
-                          [](const llvm::SmallVector<int64_t> &grid) {
-                            return !grid.empty();
-                          }),
-             "Optimal grids must be populated for all operands.");
-  return optimalOperandGrids;
-}
-
 // Assign optimized grids to all ToLayoutOps feeding into a GenericOp by
 // computing the optimal grid per tensor independently, mirroring the old
 // TTIRToD2M behavior.
diff --git a/lib/Target/TTKernel/TTKernelToCpp.cpp b/lib/Target/TTKernel/TTKernelToCpp.cpp
@@ -264,7 +264,7 @@ void dprint(Arg &&arg, ArgV&&... argv) {
       auto experimentalPackUntilizeLLKs =
           StringRef(experimental_pack_untilize_llks_generated,
                     experimental_pack_untilize_llks_generated_len);
-      builder->create<emitc::VerbatimOp>(loc, experimentalPackUntilizeLLKs);
+      emitc::VerbatimOp::create(*builder, loc, experimentalPackUntilizeLLKs);
     }
 
     if (hasCall("experimental::get_noc_multicast_addr")) {

Original file line number	Diff line number	Diff line change
`@@ -264,7 +264,7 @@ void dprint(Arg &&arg, ArgV&&... argv) {`
`264`	`264`	`auto experimentalPackUntilizeLLKs =`
`265`	`265`	`StringRef(experimental_pack_untilize_llks_generated,`
`266`	`266`	`experimental_pack_untilize_llks_generated_len);`
`267`		`- builder->create<emitc::VerbatimOp>(loc, experimentalPackUntilizeLLKs);`
	`267`	`+ emitc::VerbatimOp::create(*builder, loc, experimentalPackUntilizeLLKs);`
`268`	`268`	`}`
`269`	`269`
`270`	`270`	`if (hasCall("experimental::get_noc_multicast_addr")) {`