limit facto to 4000 bytes than numel

zonglinpeng · web-flow · commit 7d5c886e41de · 2025-09-17T12:12:45.000-07:00
Differential Revision: D82483935 Pull Request resolved: #14318
diff --git a/backends/cadence/utils/facto_util.py b/backends/cadence/utils/facto_util.py
@@ -22,26 +22,50 @@
 MAX_CASES = 50
 
 
+# Global cache to store generated shapes per tensor to ensure consistency
+_shape_cache: dict[str, list[int]] = {}
+
+
 def apply_tensor_contraints(op_name: str, index: int) -> list[object]:
-    # Constraint to limit tensor size product to < 4000 with fully randomized shapes
+    # Constraint to limit tensor size to < 4000 bytes with fully randomized shapes
     import random
 
-    # Global cache to store generated shapes per tensor to ensure consistency
-    _shape_cache: dict[str, list[int]] = {}
+    def get_dtype_bytes(dtype: torch.dtype) -> int:
+        """Get the number of bytes per element for a given dtype"""
+        dtype_bytes = {
+            torch.int8: 1,
+            torch.uint8: 1,
+            torch.int16: 2,
+            torch.uint16: 2,
+            torch.int32: 4,
+            torch.float32: 4,
+            torch.int64: 8,
+            torch.float64: 8,
+            torch.bool: 1,
+            torch.float: 4,  # alias for float32
+            torch.int: 4,  # alias for int32
+            torch.long: 8,  # alias for int64
+        }
+        return dtype_bytes.get(dtype, 4)  # Default to 4 bytes if dtype not found
 
-    def generate_random_shape_with_product_limit(
-        rank: int, max_product: int = 3999, seed_base: int = 42
+    def generate_random_shape_with_byte_limit(
+        rank: int, dtype: torch.dtype, max_bytes: int = 3999, seed_base: int = 42
     ) -> list[int]:
-        """Generate a random shape with given rank ensuring product < max_product"""
+        """Generate a random shape with given rank ensuring total byte size < max_bytes"""
         random.seed(seed_base + rank)
 
+        bytes_per_element = get_dtype_bytes(dtype)
+        max_elements = max_bytes // bytes_per_element
+
         # Start with all dimensions as 1
         shape = [1] * rank
-        remaining_product = max_product - 1  # Leave room since we start with product=1
+        remaining_elements = (
+            max_elements - 1
+        )  # Leave room since we start with product=1
 
         # Randomly distribute the remaining capacity across dimensions
         for i in range(rank):
-            if remaining_product <= 1:
+            if remaining_elements <= 1:
                 break
 
             # Calculate maximum size this dimension can have without exceeding limit
@@ -51,28 +75,32 @@ def generate_random_shape_with_product_limit(
                     current_product *= shape[j]
 
             max_size_for_dim = min(
-                remaining_product // current_product, 50
+                remaining_elements // current_product, 50
             )  # Cap at 50
             if max_size_for_dim > shape[i]:
                 # Randomly choose a size between current and max
                 new_size = random.randint(shape[i], max_size_for_dim)
                 shape[i] = new_size
-                remaining_product = max_product // (current_product * new_size)
-                remaining_product = max(1, remaining_product)
+                remaining_elements = max_elements // (current_product * new_size)
+                remaining_elements = max(1, remaining_elements)
 
         # Final random shuffle of the dimensions to make it more random
         random.shuffle(shape)
         return shape
 
     def random_size_constraint(deps: object, r: int, d: int) -> int:
-        """Generate random sizes ensuring total product < 4000"""
+        """Generate random sizes ensuring total byte size < 4000 bytes"""
+        # Use conservative approach: assume worst case is 4 bytes per element (float32/int32)
+        # This ensures we never exceed 4000 bytes regardless of actual dtype
+        worst_case_dtype = torch.float32  # 4 bytes per element
+
         # Create a unique key for this tensor configuration
-        cache_key = f"{r}_{d}"
+        cache_key = f"{r}_{d}_conservative"
 
         if cache_key not in _shape_cache:
-            # Generate a new random shape for this rank
-            shape = generate_random_shape_with_product_limit(
-                r, max_product=3999, seed_base=42 + r * 10
+            # Generate a new random shape for this rank using worst-case byte estimation
+            shape = generate_random_shape_with_byte_limit(
+                r, worst_case_dtype, max_bytes=3999, seed_base=42 + r * 10 + d
             )
             _shape_cache[cache_key] = shape