ROCm
diff --git a/‎jax/BUILD‎
Lines changed: 1 addition & 0 deletions b/‎jax/BUILD‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎jax/_src/custom_partitioning_sharding_rule.py‎
Lines changed: 380 additions & 0 deletions b/‎jax/_src/custom_partitioning_sharding_rule.py‎
Lines changed: 380 additions & 0 deletions
@@ -193,6 +193,7 @@ py_library_providing_imports_info(
         "_src/custom_batching.py",
         "_src/custom_derivatives.py",
         "_src/custom_partitioning.py",
+        "_src/custom_partitioning_sharding_rule.py",
         "_src/custom_transpose.py",
         "_src/debugging.py",
         "_src/dispatch.py",
 
@@ -0,0 +1,380 @@
+# Copyright 2024 The JAX Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Implements SdyShardingRule."""
+
+from collections import OrderedDict
+
+from jax._src.lib.mlir import ir
+from jax._src.lib.mlir.dialects import sdy
+
+
+_CompoundFactor = tuple[str, ...]
+_DimMapping = tuple[str | _CompoundFactor, ...]
+
+# A single character replacement for ... to simplify parsing.
+_ELLIPSIS: str = "…"
+
+# A prefix for names of batching dimension factors, used for expanding the
+# leading ... into factors.
+_BATCHING_DIM_FACTOR_PREFIX = "?"
+
+def _get_batching_dim_factor_name(batch_dim_order : int):
+  """Constructs a factor name for a batching dimension.
+
+  We expand the leading ... into factors representing the batching dimensions
+  to support building the MLIR representation for the sharding rule. For this
+  reason, we construct a factor name that won't be used by users for the
+  batching dimensions.
+  """
+  return f"{_BATCHING_DIM_FACTOR_PREFIX}{batch_dim_order}"
+
+def _parse_values(
+    rule: str,
+) -> tuple[_DimMapping, ...]:
+  """Parses the LHS or RHS of an Einsum notation like string.
+
+  Converts each operand or result in the Einsum notation like string to a tuple
+  of _DimMapping. This very closely follows how einops parses their rules in
+  einops/parsing.py.
+
+  Args:
+    rule: The Einsum notation for the operands or results of an operation.
+
+  Returns:
+    The tuple of values.
+
+  Raises:
+    ValueError: If the rule is not balanced or contains unknown characters.
+  """
+
+  # Remove unnecessary spaces in the rule to simplify the parsing process.
+  words = rule.split()
+  rule = " ".join(words)
+
+  # Similar to einops rules, an empty LHS/RHS has a single scalar value.
+  if not rule:
+    return ((),)
+
+  all_values = []
+  # Represent all dimensions of an value. When an value[0]==_ELLIPSIS, the
+  # value may have 0 or more leading dimensions.
+  value = []
+  current_factor = None
+  # A value of None indicates the current dimension is not a compound dimension,
+  # while a value of [] indicates that we have just started parsing a compound
+  # dimension.
+  current_compound_dim: list[str] | None = None
+
+  def add_factor(x):
+    if current_compound_dim is None:
+      value.append(x)
+    else:
+      current_compound_dim.append(x)
+
+  for char in rule:
+    if char == _ELLIPSIS:
+      if (current_factor is not None or current_compound_dim is not None
+          or value):
+        raise ValueError(
+            "Ellipsis can only be used at the beginning of a dimension")
+      add_factor(_ELLIPSIS)
+      continue
+    if char in "(), ":
+      if current_factor is not None:
+        add_factor(current_factor)
+        current_factor = None
+      if char == "(":
+        if current_compound_dim is not None:
+          raise ValueError(
+              "Compound factors should be one level, nested brackets are not"
+              " allowed")
+        current_compound_dim = []
+      elif char == ")":
+        if current_compound_dim is None:
+          raise ValueError("Brackets are not balanced")
+        if len(current_compound_dim) <= 1:
+          raise ValueError("Brackets should contain at least two factors")
+        value.append(tuple(current_compound_dim))
+        current_compound_dim = None
+      elif char == ",":
+        all_values.append(tuple(value))
+        value = []
+    elif char == "_" or char.isdigit() or char.isalpha():
+      if current_factor is None:
+        if str.isdigit(char):
+          raise ValueError(f"Factor names have to start with a letter, but got '{char}'")
+        current_factor = char
+      else:
+        current_factor += char
+    else:
+      raise ValueError(f"Unknown character '{char}'")
+
+  if current_compound_dim is not None:
+    raise ValueError(f"Brackets are not balanced in rule: '{rule}'")
+  if current_factor is not None:
+    add_factor(current_factor)
+  all_values.append(tuple(value))
+
+  return tuple(all_values)
+
+
+class SdyShardingRule:
+  """A representation for Shardy sharding rule.
+
+  A SdyShardingRule includes an Enisum notation like string and an optional
+  list of factor sizes. A factor is a name in the Einsum notation. If a factor
+  is only used in compound factors, its size must be specified.
+
+  SdyShardingRule examples:
+
+  * Contracting dim matmul AB@BC->AC: SdyShardingRule('i j, j k -> i k')
+  * Batching matmul: SdyShardingRule('... i j, ... j k -> ... i k')
+  * A reshape (8,) -> (4, 2): SdyShardingRule('(i j) -> i j')
+  * Another reshape (4, 2) -> (2, 4): SdyShardingRule('(i j) -> (j i)`, i=4, j=2)
+  * An elementwise add of any dimensions x + y -> z: SdyShardingRule('..., ... -> ...')
+  """
+
+  def __init__(self, rule: str, **factor_sizes):
+    """Constructs a SdyShardingRule object from the Einsum notation like string.
+
+    This is done by verifying that the input Einsum notation like string and
+    with optional factor sizes represents a valid sharding rule and converting
+    it to an internal representation.
+
+    Args:
+      rule: The Einsum notation like string for an operation.
+      **factor_sizes: The optional factor sizes.
+
+    Raises:
+      ValueError: If there is any problem with the rule or factor_sizes.
+    """
+    if not isinstance(rule, str):
+      raise TypeError(f"rule must be a str, but got {type(rule)}")
+    if not all(isinstance(size, int) for size in factor_sizes.values()):
+      raise TypeError(
+          f"factor_sizes must be a dict of str to int, but got {factor_sizes}")
+
+    # Replace ... with a single char to simplify parsing.
+    if _ELLIPSIS in rule:
+      raise ValueError(f"Unknown character '{_ELLIPSIS}'")
+    if "." in rule:
+      rule = rule.replace("...", _ELLIPSIS)
+      if "." in rule:
+        raise ValueError("Character '.' must be used inside ellipsis '...'")
+
+    try:
+      operands, results = rule.split("->")
+    except ValueError as e:
+      raise ValueError(f"There is no -> in rule: '{rule}'") from e
+
+    self.operands = _parse_values(operands)
+    self.results = _parse_values(results)
+
+    # Find all factors and mark whether their size can be inferred.
+    factors_inferrable = dict()
+    for value in self.operands + self.results:
+      for dim in value:
+        if dim == _ELLIPSIS:
+          continue
+        if isinstance(dim, str):
+          factors_inferrable[dim] = True
+        else:
+          for factor in dim:
+            if factor not in factors_inferrable.keys():
+              factors_inferrable[factor] = False
+
+    # Check that factors in factor_sizes are used in the rule.
+    for factor in factor_sizes:
+      if factor not in factors_inferrable:
+        raise ValueError(
+            f"Factor {factor} is not used in the rule, but size is provided")
+
+    # Check that factors that are used for a whole dimension aren't in
+    # factor_sizes and factors that are never used for a whole dimension are
+    # in factor_sizes.
+    for factor, inferrable in factors_inferrable.items():
+      if factor not in factor_sizes and not inferrable:
+        raise ValueError(
+            f"Factor {factor} is only used in compound factors; must specify"
+            " its size")
+      if factor in factor_sizes and inferrable:
+        raise ValueError(
+            f"Factor {factor} represents a whole dimension; do not specify its"
+            " size")
+
+    self.factor_sizes = factor_sizes
+
+  def __str__(self):
+    return f"SdyShardingRule({self.operands}, {self.results}, {self.factor_sizes})"
+
+  def build(
+      self,
+      operand_types: list[ir.Type],
+      result_types: list[ir.Type],) -> ir.Attribute:
+    """Builds the MLIR representation for the sharding rule.
+
+    This is done by verifying that the rule is consistent with the types of
+    the operation and converting the Einsum notation like string to
+    OpShardingRuleAttr.
+    """
+    if len(self.operands) != len(operand_types):
+      raise ValueError(
+          f"Sharding rule has {len(self.operands)} operands, but the operation"
+          f" has {len(operand_types)} operands"
+      )
+    if len(self.results) != len(result_types):
+      raise ValueError(
+          f"Sharding rule has {len(self.results)} results, but the operation"
+          f" has {len(result_types)} results"
+      )
+
+    factors_to_indices_sizes: OrderedDict[str, list[int]] = OrderedDict()
+    types = operand_types + result_types
+    UNKNOWN = -1  # Representation for unknown factor size or factor index.
+
+    def get_message_for_value(i):
+      if i >= len(operand_types):
+        return f"{i - len(operand_types)}th result"
+      else:
+        return f"{i}th operand"
+
+    def get_rank_for_value(i):
+      return ir.ShapedType(types[i]).rank
+
+    def get_size_for_value_dim(i, j):
+      return ir.ShapedType(types[i]).shape[j]
+
+    def add_factor(factor, size):
+      """Adds a factor to factors_to_indices_sizes.
+
+      `size` may be a dimensions size, a user specified factor size, or UNKNOWN
+      if a factor is first used as in a compound factor and then used for a
+      whole dimension.
+      """
+      factor_index, factor_size = factors_to_indices_sizes.get(factor, [UNKNOWN, UNKNOWN])
+      if factor_index != UNKNOWN:
+        # Not the first time seeing the factor.
+        if size != UNKNOWN and factor_size != UNKNOWN and factor_size != size:
+          factor_or_batching_dim = (
+            f"Factor {factor}" if _BATCHING_DIM_FACTOR_PREFIX not in factor
+              else f"Batching dimension {factor[1:]}")
+          raise ValueError(
+            f"{factor_or_batching_dim} corresponds to two sizes:"
+            f" {factor_size} and {size}")
+        if size != UNKNOWN and factor_size == UNKNOWN:
+          factors_to_indices_sizes[factor] = [factor_index, size]
+      else:
+        # First time seeing the factor.
+        factor_index = len(factors_to_indices_sizes)
+        factors_to_indices_sizes[factor] = [factor_index, size]
+
+    def add_batching_dim_factor(batch_dim_order, factor_size):
+      ellipsis_batch_dim_name = _get_batching_dim_factor_name(batch_dim_order)
+      add_factor(ellipsis_batch_dim_name, factor_size)
+
+    def build_dim_mapping_for_compound_factors(i, j, factors):
+      accumulated_size = 1
+      all_indices = []
+      for factor in factors:
+        factor_index, factor_size = factors_to_indices_sizes[factor]
+        accumulated_size *= factor_size
+        all_indices.append(factor_index)
+
+      dim_size = get_size_for_value_dim(i, j)
+      if accumulated_size != dim_size:
+        raise ValueError(
+            f"{get_message_for_value(i)} actual size {dim_size} doesn't match"
+            f" the size {accumulated_size} derived from the compound factors"
+            f" {factors}")
+
+      return sdy.DimMappingAttr.get(factor_indices=all_indices)
+
+    # Add factors and their sizes in the order they appear in the rule,
+    # including the batching dimensions represented by ellipsis.
+    ellipsis_rank = None
+    for i, value in enumerate(self.operands + self.results):
+      if value and value[0] == _ELLIPSIS:
+        has_ellipsis = True
+        value = value[1:]
+      else:
+        has_ellipsis = False
+      rule_rank = len(value)
+      op_rank = get_rank_for_value(i)
+      # The number of dimensions represented by ellipsis.
+      current_ellipsis_rank = 0
+      if has_ellipsis and op_rank > rule_rank:
+        current_ellipsis_rank = op_rank - rule_rank
+      if has_ellipsis:
+        if ellipsis_rank is None:
+          ellipsis_rank = current_ellipsis_rank
+        elif ellipsis_rank != current_ellipsis_rank:
+          raise ValueError(
+              "Ellipsis represents different number of leading dimensions"
+              f" {ellipsis_rank} and {current_ellipsis_rank}")
+      rule_rank += current_ellipsis_rank
+      if rule_rank != op_rank:
+        msg = get_message_for_value(i)
+        raise ValueError(
+            f"Sharding rule {msg} has rank {rule_rank}, but the operation"
+            f" {msg} has rank {op_rank}")
+
+      for j in range(current_ellipsis_rank):
+        add_batching_dim_factor(j, get_size_for_value_dim(i, j))
+
+      for j, dim in enumerate(value):
+        if isinstance(dim, str):
+          add_factor(
+            dim, get_size_for_value_dim(i, j + current_ellipsis_rank))
+        else:
+          for factor in dim:
+            add_factor(factor, self.factor_sizes.get(factor, UNKNOWN))
+
+    # Build the tensor mappings for each operand and result.
+    tensor_mappings = []
+    for i, value in enumerate(self.operands + self.results):
+      dim_mappings = []
+
+      if value and value[0] == _ELLIPSIS:
+        value = value[1:]
+        if ellipsis_rank is None:
+          current_ellipsis_rank = 0
+        else:
+          current_ellipsis_rank = ellipsis_rank
+      else:
+        current_ellipsis_rank = 0
+
+      for j in range(current_ellipsis_rank):
+        dim_mappings.append(
+          sdy.DimMappingAttr.get(factor_indices=[
+            factors_to_indices_sizes[_get_batching_dim_factor_name(j)][0]]))
+
+      for j, dim in enumerate(value):
+        if isinstance(dim, str):
+          dim_mappings.append(
+            sdy.DimMappingAttr.get(
+              factor_indices=[factors_to_indices_sizes[dim][0]]))
+        else:
+          dim_mappings.append(
+            build_dim_mapping_for_compound_factors(
+              i, j + current_ellipsis_rank, dim))
+
+      tensor_mappings.append(
+        sdy.TensorMappingAttr.get(dim_mappings=dim_mappings))
+
+    op_sharding_rule = sdy.OpShardingRuleAttr.get(
+        factor_sizes=[item[1] for item in factors_to_indices_sizes.values()],
+        operand_mappings=tensor_mappings[0:len(operand_types)],
+        result_mappings=tensor_mappings[len(operand_types):])
+    return op_sharding_rule