fix two things

xadupre · xadupre · commit e05a02ae9273 · 2025-11-06T18:58:26.000+01:00
diff --git a/CHANGELOGS.rst b/CHANGELOGS.rst
@@ -5,6 +5,7 @@ Change Logs
 +++++
 
 * :pr:`287`: adds input ``'inputs_prompt'`` to test a LLM, meant to be used during validation
+* :pr:`288`: add .contiguous in torch.cond branch (attention patch for sdpa implementation)
 * :pr:`286`: adds variable to track random nodes in models
 
 0.8.0
diff --git a/_doc/technical/plot_generate.py b/_doc/technical/plot_generate.py
@@ -186,7 +186,7 @@ def simple_generate_with_cache(
 # seen earlier for a torch model.
 # Let's ask first the function to return the session to avoid creating on the second call.
 
-_res, session = onnx_generate(
+_res, session, _feeds = onnx_generate(
     model_name, inputs.input_ids, 2, max_new_tokens=2, return_session=True
 )
 
diff --git a/_unittests/ut_helpers/test_rt_helper.py b/_unittests/ut_helpers/test_rt_helper.py
@@ -48,7 +48,7 @@ def test_onnx_generate(self):
             )
 
         print("-- test_onnx_generate: generate")
-        res, session = onnx_generate(
+        res, session, _feeds = onnx_generate(
             model_name, input_ids[:1], 2, max_new_tokens=10, return_session=True
         )
         n_inputs = input_ids.shape[1]
diff --git a/onnx_diagnostic/torch_export_patches/patches/patch_transformers.py b/onnx_diagnostic/torch_export_patches/patches/patch_transformers.py
@@ -1452,7 +1452,7 @@ def patched_sdpa_attention_forward(
             scale=scaling,
             is_causal=True,
             **sdpa_kwargs,
-        ),
+        ).contiguous(),
         lambda query, key, value: torch.nn.functional.scaled_dot_product_attention(
             query,
             key,
@@ -1461,7 +1461,7 @@ def patched_sdpa_attention_forward(
             scale=scaling,
             is_causal=False,
             **sdpa_kwargs,
-        ),
+        ).contiguous(),
         [query, key, value],
     )
     attn_output = attn_output.transpose(1, 2).contiguous()

Original file line number	Diff line number	Diff line change
`@@ -186,7 +186,7 @@ def simple_generate_with_cache(`
`186`	`186`	`# seen earlier for a torch model.`
`187`	`187`	`# Let's ask first the function to return the session to avoid creating on the second call.`
`188`	`188`
`189`		`-_res, session = onnx_generate(`
	`189`	`+_res, session, _feeds = onnx_generate(`
`190`	`190`	`model_name, inputs.input_ids, 2, max_new_tokens=2, return_session=True`
`191`	`191`	`)`
`192`	`192`
Original file line number	Diff line number	Diff line change
`@@ -48,7 +48,7 @@ def test_onnx_generate(self):`
`48`	`48`	`)`
`49`	`49`
`50`	`50`	`print("-- test_onnx_generate: generate")`
`51`		`- res, session = onnx_generate(`
	`51`	`+ res, session, _feeds = onnx_generate(`
`52`	`52`	`model_name, input_ids[:1], 2, max_new_tokens=10, return_session=True`
`53`	`53`	`)`
`54`	`54`	`n_inputs = input_ids.shape[1]`