update num_poolings attribute to pass to the sharding options. (#3441)

isururanawaka · meta-codesync[bot] · commit 4673c1670bf2 · 2025-10-09T07:59:41.000-07:00
Summary: Pull Request resolved: #3441 io_sizes, output_sizes, input_sizes calculations depend on num_poolings. Update the num_poolings to feed from manifold planner configs to sharding options recalculates correct options. Reviewed By: mserturk Differential Revision: D84111173 fbshipit-source-id: 0403396502dbc6015038de7ffe275af55f2130a8
diff --git a/torchrec/distributed/planner/shard_estimators.py b/torchrec/distributed/planner/shard_estimators.py
@@ -24,6 +24,7 @@
     HALF_BLOCK_PENALTY,
     kernel_bw_lookup,
     KV_CACHING_RATIO,
+    NUM_POOLINGS,
     QUARTER_BLOCK_PENALTY,
     UVM_CACHING_RATIO,
     WEIGHTED_KERNEL_MULTIPLIER,
@@ -123,13 +124,7 @@ def estimate(
                     else None
                 )
 
-            num_poolings = (
-                cast(List[float], self._constraints[sharding_option.name].num_poolings)
-                if self._constraints
-                and self._constraints.get(sharding_option.name)
-                and self._constraints[sharding_option.name].num_poolings
-                else [1.0] * sharding_option.num_inputs
-            )
+            num_poolings = get_num_poolings(self._constraints, sharding_option)
             batch_sizes = (
                 cast(List[int], self._constraints[sharding_option.name].batch_sizes)
                 if self._constraints
@@ -1008,11 +1003,7 @@ def estimate(
                 if self._constraints
                 else None
             )
-            num_poolings = (
-                constraints.num_poolings
-                if constraints and constraints.num_poolings
-                else [1.0] * sharding_option.num_inputs
-            )
+            num_poolings = get_num_poolings(self._constraints, sharding_option)
             assert len(num_poolings) == sharding_option.num_inputs
             batch_sizes = (
                 constraints.batch_sizes
@@ -1313,6 +1304,25 @@ def _is_table_cached(
     return False
 
 
+def get_num_poolings(
+    constraints: Optional[Dict[str, ParameterConstraints]], so: ShardingOption
+) -> List[float]:
+    # first priority is given for sharding_option.num_poolings,
+    # otherwise Manifold planner configs will be overwritten by parameter constraints
+    # default path will use constraints
+    if so.num_poolings is not None:
+        num_poolings = so.num_poolings
+        if len(so.input_lengths) == len(num_poolings):
+            return num_poolings
+
+    # Second priority: use constraint-based num_poolings
+    if constraints and constraints.get(so.name) and constraints[so.name].num_poolings:
+        return cast(List[float], constraints[so.name].num_poolings)
+
+    # Fallback: use default NUM_POOLINGS constant
+    return [NUM_POOLINGS] * len(so.input_lengths)
+
+
 def _calculate_shard_io_sizes(
     sharding_type: str,
     batch_sizes: List[int],
diff --git a/torchrec/distributed/planner/stats.py b/torchrec/distributed/planner/stats.py
@@ -29,7 +29,10 @@
 
 from torchrec.distributed.embedding_types import EmbeddingComputeKernel
 from torchrec.distributed.planner.constants import BIGINT_DTYPE, NUM_POOLINGS
-from torchrec.distributed.planner.shard_estimators import _calculate_shard_io_sizes
+from torchrec.distributed.planner.shard_estimators import (
+    _calculate_shard_io_sizes,
+    get_num_poolings,
+)
 from torchrec.distributed.planner.storage_reservations import (
     FixedPercentageStorageReservation,
     HeuristicalStorageReservation,
@@ -361,13 +364,7 @@ def _get_shard_stats(
         assert shard.ranks
         ranks = shard.ranks
 
-        num_poolings = (
-            cast(List[float], constraints[sharding_option.name].num_poolings)
-            if constraints
-            and constraints.get(sharding_option.name)
-            and constraints[sharding_option.name].num_poolings
-            else [1.0] * sharding_option.num_inputs
-        )
+        num_poolings = get_num_poolings(constraints, sharding_option)
         batch_sizes = (
             cast(List[int], constraints[sharding_option.name].batch_sizes)
             if constraints
@@ -761,18 +758,6 @@ def _get_embedding_dim(so: ShardingOption) -> str:
             )
             return embedding_dim
 
-        def _get_num_poolings(
-            constraints: Optional[Dict[str, ParameterConstraints]], so: ShardingOption
-        ) -> List[float]:
-            num_poolings = (
-                cast(List[float], constraints[so.name].num_poolings)
-                if constraints
-                and constraints.get(so.name)
-                and constraints[so.name].num_poolings
-                else [NUM_POOLINGS] * len(so.input_lengths)
-            )
-            return num_poolings
-
         def _get_cache_load_factor(
             sharder: Optional[ModuleSharder[nn.Module]], so: ShardingOption
         ) -> str:
@@ -865,7 +850,7 @@ def _get_cache_load_factor(
             shard_storages = _format_storage_breakdown(so_storage)
 
             pooling_factor = str(round(sum(so.input_lengths), 3))
-            num_poolings = _get_num_poolings(constraints, so)
+            num_poolings = get_num_poolings(constraints, so)
             num_indices = str(
                 round(sum(x * y for x, y in zip(so.input_lengths, num_poolings)), 3)
             )
diff --git a/torchrec/distributed/planner/types.py b/torchrec/distributed/planner/types.py
@@ -532,6 +532,7 @@ def __init__(
         feature_names: Optional[List[str]] = None,
         output_dtype: Optional[DataType] = None,
         key_value_params: Optional[KeyValueParams] = None,
+        num_poolings: Optional[List[float]] = None,
     ) -> None:
         self.name = name
         self._tensor = tensor
@@ -554,6 +555,7 @@ def __init__(
         self.feature_names: Optional[List[str]] = feature_names
         self.output_dtype: Optional[DataType] = output_dtype
         self.key_value_params: Optional[KeyValueParams] = key_value_params
+        self.num_poolings: Optional[List[float]] = num_poolings
 
     @property
     def tensor(self) -> torch.Tensor: