Fix max seq length bug (#15141)

pytorchbot · kimishpatel · web-flow · commit 1ddd9129e39c · 2025-10-15T11:46:35.000-07:00
Summary:
update cache op was forcing incorrect constraint on sequence length.
Fixing that alongw with fixing export allows us to correctly export the
model

Differential Revision: D84562463

Co-authored-by: Kimish Patel &lt;kimishpatel@fb.com&gt;
diff --git a/extension/llm/custom_ops/custom_ops.py b/extension/llm/custom_ops/custom_ops.py
@@ -207,8 +207,8 @@ def _validate_update_cache_params(
             1
         ), f"Start position {start_pos} must be less than sequence length {cache.size(1)}"
 
-        torch._check((start_pos + seq_len) < cache.size(1))
-        assert (start_pos + seq_len) < cache.size(
+        torch._check((start_pos + seq_len) <= cache.size(1))
+        assert (start_pos + seq_len) <= cache.size(
             1
         ), f"Start position  + length = {start_pos + seq_len} must be less than sequence length {cache.size(1)}"
 
diff --git a/extension/llm/export/builder.py b/extension/llm/export/builder.py
@@ -142,7 +142,8 @@ def __init__(
                     {1: torch.export.Dim("token_dim", max=self.max_seq_len - 1)},
                 )
             else:
-                # Two input arguments: tokens and input_pos but input_pos is static shape
+                # Two input arguments: tokens and input_pos but input_pos is static shape.
+
                 self.dynamic_shapes = (
                     {1: torch.export.Dim("token_dim", max=self.max_seq_len)},
                     {"input_pos": {0: 1}},

Original file line number	Diff line number	Diff line change
`@@ -207,8 +207,8 @@ def _validate_update_cache_params(`
`207`	`207`	`1`
`208`	`208`	`), f"Start position {start_pos} must be less than sequence length {cache.size(1)}"`
`209`	`209`
`210`		`- torch._check((start_pos + seq_len) < cache.size(1))`
`211`		`- assert (start_pos + seq_len) < cache.size(`
	`210`	`+ torch._check((start_pos + seq_len) <= cache.size(1))`
	`211`	`+ assert (start_pos + seq_len) <= cache.size(`
`212`	`212`	`1`
`213`	`213`	`), f"Start position + length = {start_pos + seq_len} must be less than sequence length {cache.size(1)}"`
`214`	`214`