diff --git a/src/transformers/integrations/executorch.py b/src/transformers/integrations/executorch.py index a56a48ab844d..abdf21d97fd5 100644 --- a/src/transformers/integrations/executorch.py +++ b/src/transformers/integrations/executorch.py @@ -325,14 +325,14 @@ def export( "input_ids": input_ids, "cache_position": cache_position if cache_position is not None - else torch.arange(input_ids.shape[-1], dtype=torch.long, model=model_device), + else torch.arange(input_ids.shape[-1], dtype=torch.long, device=model_device), } else: # inputs_embeds input_kwargs = { "inputs_embeds": inputs_embeds, "cache_position": cache_position if cache_position is not None - else torch.arange(inputs_embeds.shape[1], dtype=torch.long, model=model_device), + else torch.arange(inputs_embeds.shape[1], dtype=torch.long, device=model_device), } exported_program = torch.export.export( diff --git a/tests/models/gemma/test_modeling_gemma.py b/tests/models/gemma/test_modeling_gemma.py index 8a1e2ea9eb7f..be44d7b97944 100644 --- a/tests/models/gemma/test_modeling_gemma.py +++ b/tests/models/gemma/test_modeling_gemma.py @@ -461,8 +461,8 @@ def test_export_static_cache(self): exportable_module = TorchExportableModuleForDecoderOnlyLM(model) exported_program = exportable_module.export( - input_ids=prompt_token_ids, - cache_position=torch.arange(prompt_token_ids.shape[-1], dtype=torch.long, device=model.device), + input_ids=torch.tensor([[1]], dtype=torch.long, device=model.device), + cache_position=torch.tensor([0], dtype=torch.long, device=model.device), ) ep_generated_ids = TorchExportableModuleWithStaticCache.generate( exported_program=exported_program, prompt_token_ids=prompt_token_ids, max_new_tokens=max_new_tokens diff --git a/tests/models/gemma2/test_modeling_gemma2.py b/tests/models/gemma2/test_modeling_gemma2.py index 5d778d8cb2ec..72568ba62e49 100644 --- a/tests/models/gemma2/test_modeling_gemma2.py +++ b/tests/models/gemma2/test_modeling_gemma2.py @@ -367,8 +367,8 @@ def test_export_static_cache(self): exportable_module = TorchExportableModuleForDecoderOnlyLM(model) exported_program = exportable_module.export( - input_ids=prompt_token_ids, - cache_position=torch.arange(prompt_token_ids.shape[-1], dtype=torch.long, device=model.device), + input_ids=torch.tensor([[1]], dtype=torch.long, device=model.device), + cache_position=torch.tensor([0], dtype=torch.long, device=model.device), ) ep_generated_ids = TorchExportableModuleWithStaticCache.generate( exported_program=exported_program, prompt_token_ids=prompt_token_ids, max_new_tokens=max_new_tokens diff --git a/tests/models/llama/test_modeling_llama.py b/tests/models/llama/test_modeling_llama.py index 26be82b9da82..fc04efac2be8 100644 --- a/tests/models/llama/test_modeling_llama.py +++ b/tests/models/llama/test_modeling_llama.py @@ -351,8 +351,8 @@ def test_export_static_cache(self): exportable_module = TorchExportableModuleForDecoderOnlyLM(model) exported_program = exportable_module.export( - input_ids=prompt_token_ids, - cache_position=torch.arange(prompt_token_ids.shape[-1], dtype=torch.long, device=model.device), + input_ids=torch.tensor([[1]], dtype=torch.long, device=model.device), + cache_position=torch.tensor([0], dtype=torch.long, device=model.device), ) ep_generated_ids = TorchExportableModuleWithStaticCache.generate( exported_program=exported_program, prompt_token_ids=prompt_token_ids, max_new_tokens=max_new_tokens diff --git a/tests/models/olmo/test_modeling_olmo.py b/tests/models/olmo/test_modeling_olmo.py index ea23f4e96fda..d1cf7d2defac 100644 --- a/tests/models/olmo/test_modeling_olmo.py +++ b/tests/models/olmo/test_modeling_olmo.py @@ -385,8 +385,8 @@ def test_export_static_cache(self): exportable_module = TorchExportableModuleForDecoderOnlyLM(model) exported_program = exportable_module.export( - input_ids=prompt_token_ids, - cache_position=torch.arange(prompt_token_ids.shape[-1], dtype=torch.long, device=model.device), + input_ids=torch.tensor([[1]], dtype=torch.long, device=model.device), + cache_position=torch.tensor([0], dtype=torch.long, device=model.device), ) ep_generated_ids = TorchExportableModuleWithStaticCache.generate( exported_program=exported_program, prompt_token_ids=prompt_token_ids, max_new_tokens=max_new_tokens diff --git a/tests/models/phi3/test_modeling_phi3.py b/tests/models/phi3/test_modeling_phi3.py index f80015eeeb56..d4b0bb0d7337 100644 --- a/tests/models/phi3/test_modeling_phi3.py +++ b/tests/models/phi3/test_modeling_phi3.py @@ -412,8 +412,8 @@ def test_export_static_cache(self): exportable_module = TorchExportableModuleForDecoderOnlyLM(model) exported_program = exportable_module.export( - input_ids=prompt_token_ids, - cache_position=torch.arange(prompt_token_ids.shape[-1], dtype=torch.long, device=model.device), + input_ids=torch.tensor([[1]], dtype=torch.long, device=model.device), + cache_position=torch.tensor([0], dtype=torch.long, device=model.device), ) ep_generated_ids = TorchExportableModuleWithStaticCache.generate( exported_program=exported_program, prompt_token_ids=prompt_token_ids, max_new_tokens=max_new_tokens diff --git a/tests/models/qwen2/test_modeling_qwen2.py b/tests/models/qwen2/test_modeling_qwen2.py index 51bd943cf916..50b220c8cb67 100644 --- a/tests/models/qwen2/test_modeling_qwen2.py +++ b/tests/models/qwen2/test_modeling_qwen2.py @@ -304,8 +304,8 @@ def test_export_static_cache(self): "2.7.0" ) # Due to https://github.com/pytorch/pytorch/issues/150994 exported_program = exportable_module.export( - input_ids=prompt_token_ids, - cache_position=torch.arange(prompt_token_ids.shape[-1], dtype=torch.long, device=model.device), + input_ids=torch.tensor([[1]], dtype=torch.long, device=model.device), + cache_position=torch.tensor([0], dtype=torch.long, device=model.device), strict=strict, ) ep_generated_ids = TorchExportableModuleWithStaticCache.generate( diff --git a/tests/models/qwen3/test_modeling_qwen3.py b/tests/models/qwen3/test_modeling_qwen3.py index 205228073e19..bf17e5133a9c 100644 --- a/tests/models/qwen3/test_modeling_qwen3.py +++ b/tests/models/qwen3/test_modeling_qwen3.py @@ -294,8 +294,8 @@ def test_export_static_cache(self): exportable_module = TorchExportableModuleForDecoderOnlyLM(model) exported_program = exportable_module.export( - input_ids=prompt_token_ids, - cache_position=torch.arange(prompt_token_ids.shape[-1], dtype=torch.long, device=model.device), + input_ids=torch.tensor([[1]], dtype=torch.long, device=model.device), + cache_position=torch.tensor([0], dtype=torch.long, device=model.device), strict=strict, ) ep_generated_ids = TorchExportableModuleWithStaticCache.generate(