huggingface · zucchini-nlp · Aug 19, 2025 · Aug 18, 2025
diff --git a/src/transformers/integrations/executorch.py b/src/transformers/integrations/executorch.py
@@ -325,14 +325,14 @@ def export(
                 "input_ids": input_ids,
                 "cache_position": cache_position
                 if cache_position is not None
-                else torch.arange(input_ids.shape[-1], dtype=torch.long, model=model_device),
+                else torch.arange(input_ids.shape[-1], dtype=torch.long, device=model_device),
             }
         else:  # inputs_embeds
             input_kwargs = {
                 "inputs_embeds": inputs_embeds,
                 "cache_position": cache_position
                 if cache_position is not None
-                else torch.arange(inputs_embeds.shape[1], dtype=torch.long, model=model_device),
+                else torch.arange(inputs_embeds.shape[1], dtype=torch.long, device=model_device),
             }
 
         exported_program = torch.export.export(

diff --git a/tests/models/gemma/test_modeling_gemma.py b/tests/models/gemma/test_modeling_gemma.py
@@ -461,8 +461,8 @@ def test_export_static_cache(self):
 
         exportable_module = TorchExportableModuleForDecoderOnlyLM(model)
         exported_program = exportable_module.export(
-            input_ids=prompt_token_ids,
-            cache_position=torch.arange(prompt_token_ids.shape[-1], dtype=torch.long, device=model.device),
+            input_ids=torch.tensor([[1]], dtype=torch.long, device=model.device),
+            cache_position=torch.tensor([0], dtype=torch.long, device=model.device),
         )
         ep_generated_ids = TorchExportableModuleWithStaticCache.generate(
             exported_program=exported_program, prompt_token_ids=prompt_token_ids, max_new_tokens=max_new_tokens

diff --git a/tests/models/gemma2/test_modeling_gemma2.py b/tests/models/gemma2/test_modeling_gemma2.py
@@ -367,8 +367,8 @@ def test_export_static_cache(self):
 
         exportable_module = TorchExportableModuleForDecoderOnlyLM(model)
         exported_program = exportable_module.export(
-            input_ids=prompt_token_ids,
-            cache_position=torch.arange(prompt_token_ids.shape[-1], dtype=torch.long, device=model.device),
+            input_ids=torch.tensor([[1]], dtype=torch.long, device=model.device),
+            cache_position=torch.tensor([0], dtype=torch.long, device=model.device),
         )
         ep_generated_ids = TorchExportableModuleWithStaticCache.generate(
             exported_program=exported_program, prompt_token_ids=prompt_token_ids, max_new_tokens=max_new_tokens

diff --git a/tests/models/llama/test_modeling_llama.py b/tests/models/llama/test_modeling_llama.py
@@ -351,8 +351,8 @@ def test_export_static_cache(self):
 
             exportable_module = TorchExportableModuleForDecoderOnlyLM(model)
             exported_program = exportable_module.export(
-                input_ids=prompt_token_ids,
-                cache_position=torch.arange(prompt_token_ids.shape[-1], dtype=torch.long, device=model.device),
+                input_ids=torch.tensor([[1]], dtype=torch.long, device=model.device),
+                cache_position=torch.tensor([0], dtype=torch.long, device=model.device),
             )
             ep_generated_ids = TorchExportableModuleWithStaticCache.generate(
                 exported_program=exported_program, prompt_token_ids=prompt_token_ids, max_new_tokens=max_new_tokens

diff --git a/tests/models/olmo/test_modeling_olmo.py b/tests/models/olmo/test_modeling_olmo.py
@@ -385,8 +385,8 @@ def test_export_static_cache(self):
 
         exportable_module = TorchExportableModuleForDecoderOnlyLM(model)
         exported_program = exportable_module.export(
-            input_ids=prompt_token_ids,
-            cache_position=torch.arange(prompt_token_ids.shape[-1], dtype=torch.long, device=model.device),
+            input_ids=torch.tensor([[1]], dtype=torch.long, device=model.device),
+            cache_position=torch.tensor([0], dtype=torch.long, device=model.device),
         )
         ep_generated_ids = TorchExportableModuleWithStaticCache.generate(
             exported_program=exported_program, prompt_token_ids=prompt_token_ids, max_new_tokens=max_new_tokens

diff --git a/tests/models/phi3/test_modeling_phi3.py b/tests/models/phi3/test_modeling_phi3.py
@@ -412,8 +412,8 @@ def test_export_static_cache(self):
 
         exportable_module = TorchExportableModuleForDecoderOnlyLM(model)
         exported_program = exportable_module.export(
-            input_ids=prompt_token_ids,
-            cache_position=torch.arange(prompt_token_ids.shape[-1], dtype=torch.long, device=model.device),
+            input_ids=torch.tensor([[1]], dtype=torch.long, device=model.device),
+            cache_position=torch.tensor([0], dtype=torch.long, device=model.device),
         )
         ep_generated_ids = TorchExportableModuleWithStaticCache.generate(
             exported_program=exported_program, prompt_token_ids=prompt_token_ids, max_new_tokens=max_new_tokens

diff --git a/tests/models/qwen2/test_modeling_qwen2.py b/tests/models/qwen2/test_modeling_qwen2.py
@@ -304,8 +304,8 @@ def test_export_static_cache(self):
             "2.7.0"
         )  # Due to https://github.com/pytorch/pytorch/issues/150994
         exported_program = exportable_module.export(
-            input_ids=prompt_token_ids,
-            cache_position=torch.arange(prompt_token_ids.shape[-1], dtype=torch.long, device=model.device),
+            input_ids=torch.tensor([[1]], dtype=torch.long, device=model.device),
+            cache_position=torch.tensor([0], dtype=torch.long, device=model.device),
             strict=strict,
         )
         ep_generated_ids = TorchExportableModuleWithStaticCache.generate(

diff --git a/tests/models/qwen3/test_modeling_qwen3.py b/tests/models/qwen3/test_modeling_qwen3.py
@@ -294,8 +294,8 @@ def test_export_static_cache(self):
 
         exportable_module = TorchExportableModuleForDecoderOnlyLM(model)
         exported_program = exportable_module.export(
-            input_ids=prompt_token_ids,
-            cache_position=torch.arange(prompt_token_ids.shape[-1], dtype=torch.long, device=model.device),
+            input_ids=torch.tensor([[1]], dtype=torch.long, device=model.device),
+            cache_position=torch.tensor([0], dtype=torch.long, device=model.device),
             strict=strict,
         )
         ep_generated_ids = TorchExportableModuleWithStaticCache.generate(