fix mamba

xadupre · xadupre · commit a5462761f789 · 2025-04-15T16:55:39.000+02:00
diff --git a/_unittests/ut_tasks/test_tasks.py b/_unittests/ut_tasks/test_tasks.py
@@ -120,7 +120,7 @@ def test_falcon_mamba_dev(self):
         model, inputs = data["model"], data["inputs"]
         print(self.string_type(inputs, with_shape=True))
         model(**inputs)
-        self.assertIn((data["size"], data["n_weights"]), [(62461440, 15615360)])
+        self.assertIn((data["size"], data["n_weights"]), [(138640384, 34660096)])
 
 
 if __name__ == "__main__":
diff --git a/onnx_diagnostic/helpers/cache_helper.py b/onnx_diagnostic/helpers/cache_helper.py
@@ -157,6 +157,14 @@ def __init__(self):
         device=key_value_pairs[0][0].device,
     )
     for i in range(len(key_value_pairs)):
+        assert cache.conv_states[i].shape == key_value_pairs[i][0].shape, (
+            f"Shape mismatch, expected {cache.conv_states[i].shape}, "
+            f"got {key_value_pairs[i][0].shape}"
+        )
         cache.conv_states[i][:, :, :] = key_value_pairs[i][0]
+        assert cache.ssm_states[i].shape == key_value_pairs[i][1].shape, (
+            f"Shape mismatch, expected {cache.ssm_states[i].shape}, "
+            f"got {key_value_pairs[i][1].shape}"
+        )
         cache.ssm_states[i][:, :, :] = key_value_pairs[i][1]
     return cache
diff --git a/onnx_diagnostic/tasks/text_generation.py b/onnx_diagnostic/tasks/text_generation.py
@@ -88,6 +88,15 @@ def get_inputs(
     cache_length = torch.export.Dim("cache_length", min=1, max=4096)
 
     if config is not None and config.__class__.__name__ == "FalconMambaConfig":
+        seq_length_multiple = 8
+        sequence_length = (
+            (sequence_length + seq_length_multiple)
+            // seq_length_multiple
+            * seq_length_multiple
+        )
+        # sequence_inc = seq_length_multiple
+        sequence_length2 = seq_length_multiple
+
         shapes = {
             "input_ids": {0: batch, 1: torch.export.Dim.DYNAMIC},
             "attention_mask": {
@@ -110,9 +119,8 @@ def get_inputs(
             attention_mask=torch.ones((batch_size, sequence_length + sequence_length2)).to(
                 torch.int64
             ),
-            cache_position=torch.arange(0, sequence_length + sequence_length2)
-            .to(torch.int64)
-            .expand((batch_size, -1)),
+            cache_position=torch.arange(0, kwargs["conv_kernel"]).to(torch.int64),
+            # .expand((batch_size, -1))
             cache_params=make_mamba_cache(
                 [
                     (