[Tests] Increase max seq length for tracing tests (#1478)

kylesayrs · web-flow · commit 2c110a8b0906 · 2025-05-27T18:24:34.000Z
## Purpose ##
* Support `transformers&gt;=4.52`

## Background ##
* After`transformers&gt;=4.50`, many multimodal processors now check that
image tokens have not been truncated during the tokenization process.
Previously we were tracing with samples which truncated image tokens,
leading to technically invalid samples. This doesn't matter for tracing,
but does make sense for transformers to check.

```
ValueError: Mismatch in `image` token count between text and `input_ids`. Got ids=[354] and text=[2197]. Likely due to `truncation='max_length'`. Please disable truncation or increase `max_length`.
```

## Changes ##
* Increase the `max_seq_length` for tracing tests

Signed-off-by: Kyle Sayers &lt;kylesayrs@gmail.com&gt;
diff --git a/src/llmcompressor/transformers/tracing/debug.py b/src/llmcompressor/transformers/tracing/debug.py
@@ -121,14 +121,17 @@ def get_dataset_kwargs(modality: str) -> Dict[str, str]:
         "text": {
             "dataset": "ultrachat-200k",
             "splits": {"calibration": "test_sft[:1]"},
+            "max_seq_length": 4096,
         },
         "vision": {
             "dataset": "flickr",
             "splits": {"calibration": "test[:1]"},
+            "max_seq_length": 4096,
         },
         "audio": {
             "dataset": "peoples_speech",
             "splits": {"calibration": "test[:1]"},
+            "max_seq_length": 4096,
         },
     }
 
diff --git a/tests/llmcompressor/observers/test_min_max.py b/tests/llmcompressor/observers/test_min_max.py
@@ -36,9 +36,9 @@ def test_min_max_observer(symmetric, expected_scale, expected_zero_point):
     tensor = torch.tensor([1, 1, 1, 1, 1])
     num_bits = 8
 
-    weights = QuantizationArgs(num_bits=num_bits,
-                               symmetric=symmetric,
-                               observer="minmax")
+    weights = QuantizationArgs(
+        num_bits=num_bits, symmetric=symmetric, observer="minmax"
+    )
 
     observer = weights.observer
     observer = Observer.load_from_registry(observer, quantization_args=weights)