tenstorrent
diff --git a/‎env/CMakeLists.txt‎
Lines changed: 4 additions & 4 deletions b/‎env/CMakeLists.txt‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎env/patches/shardy.patch‎
Lines changed: 53 additions & 69 deletions b/‎env/patches/shardy.patch‎
Lines changed: 53 additions & 69 deletions
diff --git a/‎env/patches/shardy_mpmd_pybinds.patch‎
Lines changed: 4 additions & 0 deletions b/‎env/patches/shardy_mpmd_pybinds.patch‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎include/ttmlir/AffineMapUtils.h‎
Lines changed: 2 additions & 2 deletions b/‎include/ttmlir/AffineMapUtils.h‎
Lines changed: 2 additions & 2 deletions
@@ -2,9 +2,9 @@ cmake_minimum_required(VERSION 3.20.0)
 project(ttmlir-toolchain LANGUAGES CXX C)
 
 set(FLATBUFFERS_VERSION "fb9afbafc7dfe226b9db54d4923bfb8839635274")
-set(LLVM_PROJECT_VERSION "4efe170d858eb54432f520abb4e7f0086236748b")
-set(STABLEHLO_VERSION "0a4440a5c8de45c4f9649bf3eb4913bf3f97da0d")
-set(SHARDY_VERSION "edfd6730ddfc39da5fbea8b6b202357fdf1cdb90")
+set(LLVM_PROJECT_VERSION "1053047a4be7d1fece3adaf5e7597f838058c947")
+set(STABLEHLO_VERSION "43550117525e77084ac1f83ba50febc0688f7958")
+set(SHARDY_VERSION "4069e470c94d7b08f129ef1607aa2be8f4c06b53")
 set(LLVM_BUILD_TYPE MinSizeRel CACHE STRING "Build type for LLVM")
 
 include(ExternalProject)
@@ -52,7 +52,7 @@ if(TTMLIR_BUILD_LLVM)
         llvm-project
         # Super hacky way to install the python dependencies before the build
         # Sync nanobind with tt-metal's pinned version to avoid ODR violations
-        PATCH_COMMAND bash -c "source ${CMAKE_CURRENT_SOURCE_DIR}/activate && pip install -r mlir/python/requirements.txt && pip install --force-reinstall 'nanobind==2.10.2' && git config user.email \"tt-mlir@tenstorrent.com\" && git config user.name \"tenstorrent\" && git apply --index \"${CMAKE_CURRENT_LIST_DIR}/patches/affine-allow-symbol-vars.patch\" && git commit -m \"tt-mlir related patch\""
+        PATCH_COMMAND bash -c "source ${CMAKE_CURRENT_SOURCE_DIR}/activate && pip install -r mlir/python/requirements.txt && pip install --force-reinstall 'nanobind==2.10.2' && git config user.email \"tt-mlir@tenstorrent.com\" && git config user.name \"tenstorrent\" "
         CMAKE_GENERATOR Ninja
         CMAKE_ARGS
         -DCMAKE_BUILD_TYPE=${LLVM_BUILD_TYPE}
 
@@ -635,22 +635,37 @@ index 0000000..dc33501
 +  SdyDialect
 +  SdyTransformsPropagationShardingProjection
 +)
-diff --git a/shardy/dialect/sdy/transforms/propagation/debugging/source_sharding.cc b/shardy/dialect/sdy/transforms/propagation/debugging/source_sharding.cc
-index a73917b..78cc827 100644
---- a/shardy/dialect/sdy/transforms/propagation/debugging/source_sharding.cc
-+++ b/shardy/dialect/sdy/transforms/propagation/debugging/source_sharding.cc
-@@ -346,6 +346,7 @@ void saveShardingOriginsOnModule(
- // the case for the target of the edge, because if the source appears multiple
- // times, then it's because it effects multiple other operands/results in the
- // op.
-+[[maybe_unused]]
- bool insertSeenValue(Operation* op, const PropagationEdge& edge,
-                      llvm::SmallDenseSet<Value>& seenValues) {
-   EdgeNode target = edge.target;
 diff --git a/shardy/dialect/sdy/transforms/propagation/op_sharding_rule_registry.cc b/shardy/dialect/sdy/transforms/propagation/op_sharding_rule_registry.cc
-index ab93067..b181797 100644
+index e77a8ee..4779d09 100644
 --- a/shardy/dialect/sdy/transforms/propagation/op_sharding_rule_registry.cc
 +++ b/shardy/dialect/sdy/transforms/propagation/op_sharding_rule_registry.cc
+@@ -51,7 +51,7 @@ namespace {
+
+ // Helper to check if reduction window dim can be a passthrough dim.
+ // When window size is 1, stride is 1 and there is no padding on the operand, it
+-// is a 1-1 mapping between operand and result.
++// is a 1-1 mapping between operand and instead of kReduction which would insert all-reduce after gather.
+ bool isWindowPassthroughDim(std::optional<DenseIntElementsAttr> operandPadding,
+                             ArrayRef<int64_t> windowDimensions,
+                             ArrayRef<int64_t> windowStrides, int64_t dim) {
+@@ -146,7 +146,7 @@ void addGatherScatterFactors(
+                              slicesDim, slicesDimSize,
+                              FactorType::kNeedReplication);
+       } else if (slicesDimSize == 1) {
+-        // To keep the operand dim sharded, we need an all-reduce on the result.
++        // To keep the operand dim sharded, we need an all-reduce on the instead of kReduction which would insert all-reduce after gather.
+         addUnblockedFactorFn(inputDim, /*indicesDim=*/kNullDim,
+                              /*slicesDim=*/kNullDim, inputDimSize,
+                              FactorType::kReduction);
+@@ -166,7 +166,7 @@ void addGatherScatterFactors(
+       assert(indicesDim < startIndices.getRank());
+
+       // If `indicesDim` is in `indicesBatchingDims`, This is an explicit batch
+-      // dimension across input, indices, and result. Otherwise, it is an
++      // dimension across input, indices, and instead of kReduction which would insert all-reduce after gather. Otherwise, it is an
+       // implicit batch dimension across input and result only.
+       const auto* batchingDimIt = llvm::find(indicesBatchingDims, indicesDim);
+       bool isExplicitBatchDim = batchingDimIt != indicesBatchingDims.end();
@@ -187,12 +187,12 @@ void addGatherScatterFactors(
 
    // We add factors for all collapsed slice dimensions.
@@ -667,58 +682,28 @@ index ab93067..b181797 100644
    }
 
    // Add a factor for the index-vector-dim, if it's present.
-@@ -303,6 +303,37 @@ OpShardingRuleAttr createOpShardingRule(Operation* op,
-         }
-         return builder.build();
-       })
-+      .Case<stablehlo::BatchNormInferenceOp>(
-+        [conservativePropagation](stablehlo::BatchNormInferenceOp bn) {
-+          auto inTy  = llvm::cast<mlir::RankedTensorType>(bn.getOperand().getType());
-+          auto outTy = llvm::cast<mlir::RankedTensorType>(bn.getResult().getType());
-+
-+          OpShardingRuleBuilder builder(bn);
-+
-+          const int64_t numOperands = static_cast<int64_t>(bn->getNumOperands());
-+          llvm::SmallVector<int64_t> opDims(numOperands, kNullDim);
-+
-+          for (auto [dU, dimSize] : llvm::enumerate(inTy.getShape())) {
-+            const int64_t d = static_cast<int64_t>(dU);
-+            std::fill(opDims.begin(), opDims.end(), kNullDim);
-+            opDims[0] = d;
-+            builder.addFactor(opDims, d, dimSize);
-+          }
-+
-+          const int64_t featAxis = static_cast<int64_t>(bn.getFeatureIndex());
-+          const int64_t C = outTy.getDimSize(featAxis);
-+
-+          for (int64_t paramIdx : {1LL, 2LL, 3LL, 4LL}) {
-+            std::fill(opDims.begin(), opDims.end(), kNullDim);
-+            opDims[paramIdx] = 0;
-+            auto factorType = conservativePropagation ? FactorType::kNeedReplication
-+                                                        : FactorType::kPassThrough;
-+            builder.addFactor(opDims, kNullDim, C,
-+                  factorType, true);
-+          }
-+
-+          return builder.build();
-+        })
-       .Case<stablehlo::BitcastConvertOp>(
-           [](stablehlo::BitcastConvertOp bitcastConvert) {
-             ArrayRef<int64_t> inShape =
-@@ -685,6 +716,12 @@ OpShardingRuleAttr createOpShardingRule(Operation* op,
-                   /*isBlocked=*/usedByRngBitGenerator)
-               .build();
+@@ -707,6 +707,11 @@ OpShardingRuleAttr createOpShardingRule(Operation* op,
+           }
+           return OpShardingRuleBuilder::buildPointwise(customCall);
          }
 +        // Check if the custom call implements the ShardingRuleOpInterface.
 +        if (auto shardingRuleOp =
 +                  llvm::dyn_cast<ShardingRuleOpInterface>(customCall.getOperation())) {
 +          return shardingRuleOp.getShardingRule();
 +        }
-+
          // TODO(b/327191011): output unregistered op stats instead.
          static llvm::once_flag onceFlag;
          emitOpWarningOnce(
-@@ -1093,6 +1130,16 @@ OpShardingRuleAttr createOpShardingRule(Operation* op,
+@@ -921,7 +926,7 @@ OpShardingRuleAttr createOpShardingRule(Operation* op,
+                               FactorType::kReduction);
+           } else {
+             // Not a reduced dimension. So have a mapping b/w the operand and
+-            // result.
++            // instead of kReduction which would insert all-reduce after gather.
+             assert(resultType.getDimSize(outDim) == dimSize);
+             resultDims.assign(numInputs, outDim++);
+             builder.addFactor(operandDims, resultDims, dimSize);
+@@ -1115,6 +1120,15 @@ OpShardingRuleAttr createOpShardingRule(Operation* op,
              return builder.build();
            })
        .Case<stablehlo::ScatterOp>([](stablehlo::ScatterOp scatter) {
@@ -731,7 +716,6 @@ index ab93067..b181797 100644
 +          }
 +          // If custom rule returns null, fall through to default.
 +        }
-+
          OpShardingRuleBuilder builder(scatter);
 
          // Since all inputs and results have compatible shapes, we can look at
@@ -1191,10 +1175,10 @@ index 0000000..30a5cf9
 +
 +#endif
 diff --git a/shardy/integrations/python/ir/sdy_module.cc b/shardy/integrations/python/ir/sdy_module.cc
-index da451fa..44c0ea2 100644
+index cd7fdc8..1b5aa5b 100644
 --- a/shardy/integrations/python/ir/sdy_module.cc
 +++ b/shardy/integrations/python/ir/sdy_module.cc
-@@ -109,7 +109,15 @@ NB_MODULE(_sdy, m) {
+@@ -110,7 +110,15 @@ NB_MODULE(_sdy, m) {
                               })
        .def_property_readonly("size", [](MlirAttribute self) {
          return sdyMeshAxisAttrGetSize(self);
@@ -1211,7 +1195,7 @@ index da451fa..44c0ea2 100644
 
    mlir::python::nanobind_adaptors::mlir_attribute_subclass(
        m, "MeshAttr", sdyAttributeIsAMeshAttr)
-@@ -133,7 +141,15 @@ NB_MODULE(_sdy, m) {
+@@ -134,7 +142,15 @@ NB_MODULE(_sdy, m) {
        .def_property_readonly("axes", [](MlirAttribute self) {
          return propertyVector<MlirAttribute>(self, sdyMeshAttrGetAxesSize,
                                               sdyMeshAttrGetAxesElem);
@@ -1228,7 +1212,7 @@ index da451fa..44c0ea2 100644
 
    mlir::python::nanobind_adaptors::mlir_attribute_subclass(
        m, "SubAxisInfoAttr", sdyAttributeIsASubAxisInfoAttr)
-@@ -150,7 +166,15 @@ NB_MODULE(_sdy, m) {
+@@ -151,7 +167,15 @@ NB_MODULE(_sdy, m) {
            [](MlirAttribute self) { return sdySubAxisInfoAttrGetPreSize(self); })
        .def_property_readonly("size", [](MlirAttribute self) {
          return sdySubAxisInfoAttrGetSize(self);
@@ -1245,7 +1229,7 @@ index da451fa..44c0ea2 100644
 
    mlir::python::nanobind_adaptors::mlir_attribute_subclass(
        m, "AxisRefAttr", sdyAttributeIsAnAxisRefAttr)
-@@ -175,7 +199,15 @@ NB_MODULE(_sdy, m) {
+@@ -176,7 +200,15 @@ NB_MODULE(_sdy, m) {
          MlirAttribute subAxisInfo = sdyAxisRefAttrGetSubAxisInfo(self);
          return subAxisInfo.ptr == nullptr ? std::nullopt
                                            : std::optional(subAxisInfo);
@@ -1262,7 +1246,7 @@ index da451fa..44c0ea2 100644
 
    mlir::python::nanobind_adaptors::mlir_attribute_subclass(
        m, "DimensionShardingAttr", sdyAttributeIsADimensionShardingAttr)
-@@ -205,7 +237,15 @@ NB_MODULE(_sdy, m) {
+@@ -206,7 +238,15 @@ NB_MODULE(_sdy, m) {
        .def_property_readonly("priority", [](MlirAttribute self) {
          int64_t priority = sdyDimensionShardingAttrGetPriority(self);
          return priority == -1 ? std::nullopt : std::optional(priority);
@@ -1279,7 +1263,7 @@ index da451fa..44c0ea2 100644
 
    mlir::python::nanobind_adaptors::mlir_attribute_subclass(
        m, "TensorShardingAttr", sdyAttributeIsATensorShardingAttr)
-@@ -251,7 +291,15 @@ NB_MODULE(_sdy, m) {
+@@ -252,7 +292,15 @@ NB_MODULE(_sdy, m) {
          return propertyVector<MlirAttribute>(
              self, sdyTensorShardingAttrGetUnreducedAxesSize,
              sdyTensorShardingAttrGetUnreducedAxesElem);
@@ -1296,7 +1280,7 @@ index da451fa..44c0ea2 100644
 
    mlir::python::nanobind_adaptors::mlir_attribute_subclass(
        m, "TensorShardingPerValueAttr",
-@@ -270,7 +318,15 @@ NB_MODULE(_sdy, m) {
+@@ -271,7 +319,15 @@ NB_MODULE(_sdy, m) {
          return propertyVector<MlirAttribute>(
              self, sdyTensorShardingPerValueAttrGetShardingsSize,
              sdyTensorShardingPerValueAttrGetShardingsElem);
@@ -1313,7 +1297,7 @@ index da451fa..44c0ea2 100644
 
    mlir::python::nanobind_adaptors::mlir_attribute_subclass(
        m, "DimMappingAttr", sdyAttributeIsADimMappingAttr)
-@@ -288,7 +344,15 @@ NB_MODULE(_sdy, m) {
+@@ -289,7 +345,15 @@ NB_MODULE(_sdy, m) {
          return propertyVector<intptr_t>(self,
                                          sdyDimMappingAttrGetFactorIndicesSize,
                                          sdyDimMappingAttrGetFactorIndicesElem);
@@ -1330,7 +1314,7 @@ index da451fa..44c0ea2 100644
 
    mlir::python::nanobind_adaptors::mlir_attribute_subclass(
        m, "TensorMappingAttr", sdyAttributeIsATensorMappingAttr)
-@@ -310,7 +374,15 @@ NB_MODULE(_sdy, m) {
+@@ -311,7 +375,15 @@ NB_MODULE(_sdy, m) {
                               })
        .def_property_readonly("rank", [](MlirAttribute self) {
          return sdyTensorMappingAttrGetRank(self);
@@ -1347,7 +1331,7 @@ index da451fa..44c0ea2 100644
 
    mlir::python::nanobind_adaptors::mlir_attribute_subclass(
        m, "OpShardingRuleAttr", sdyAttributeIsAOpShardingRuleAttr)
-@@ -394,6 +466,14 @@ NB_MODULE(_sdy, m) {
+@@ -395,6 +467,14 @@ NB_MODULE(_sdy, m) {
              return propertyVector<intptr_t>(
                  self, sdyOpShardingRuleAttrGetBlockedPropagationFactorsSize,
                  sdyOpShardingRuleAttrGetBlockedPropagationFactorsElem);
@@ -1362,7 +1346,7 @@ index da451fa..44c0ea2 100644
            });
 
    mlir::python::nanobind_adaptors::mlir_attribute_subclass(
-@@ -417,7 +497,67 @@ NB_MODULE(_sdy, m) {
+@@ -418,7 +498,67 @@ NB_MODULE(_sdy, m) {
             })
        .def("__len__", [](MlirAttribute& self) {
          return sdyManualAxesAttrGetAxesSize(self);
 
@@ -1670,6 +1670,8 @@ index 0000000..9bcab92
 +#include <variant>
 +#include <vector>
 +
++#include "llvm/ADT/STLExtras.h"
++
 +#include "mlir-c/BuiltinAttributes.h"
 +#include "mlir-c/IR.h"
 +#include "mlir-c/Support.h"
@@ -3581,6 +3583,8 @@ index 0000000..58ce9e1
 ++#include <string>
 ++#include <variant>
 ++#include <vector>
++
++#include "llvm/ADT/STLExtras.h"
 ++
 ++#include "mlir-c/BuiltinAttributes.h"
 ++#include "mlir-c/IR.h"
 
@@ -117,8 +117,8 @@ fullyApplyAffineMap(mlir::OpBuilder &builder, mlir::Location loc,
                     mlir::AffineMap map, mlir::ValueRange inputs) {
   llvm::SmallVector<mlir::Value> results;
   for (unsigned i = 0; i < map.getNumResults(); i++) {
-    results.push_back(builder.create<mlir::affine::AffineApplyOp>(
-        loc, affineMapSelectOneOutput(map, i), inputs));
+    results.push_back(mlir::affine::AffineApplyOp::create(
+        builder, loc, affineMapSelectOneOutput(map, i), inputs));
   }
   return results;
 }
Original file line number	Diff line number	Diff line change
`@@ -117,8 +117,8 @@ fullyApplyAffineMap(mlir::OpBuilder &builder, mlir::Location loc,`
`117`	`117`	`mlir::AffineMap map, mlir::ValueRange inputs) {`
`118`	`118`	`llvm::SmallVector<mlir::Value> results;`
`119`	`119`	`for (unsigned i = 0; i < map.getNumResults(); i++) {`
`120`		`- results.push_back(builder.create<mlir::affine::AffineApplyOp>(`
`121`		`- loc, affineMapSelectOneOutput(map, i), inputs));`
	`120`	`+ results.push_back(mlir::affine::AffineApplyOp::create(`
	`121`	`+ builder, loc, affineMapSelectOneOutput(map, i), inputs));`
`122`	`122`	`}`
`123`	`123`	`return results;`
`124`	`124`	`}`