[tests] make cuda-only tests device-agnostic (#35607)

faaany · web-flow · commit 2fa876d2d824 · 2025-01-13T14:48:39.000+01:00
* intial commit

* remove unrelated files

* further remove

* Update test_trainer.py

* fix style
diff --git a/tests/fsdp/test_fsdp.py b/tests/fsdp/test_fsdp.py
@@ -32,7 +32,6 @@
     require_accelerate,
     require_fsdp,
     require_torch_accelerator,
-    require_torch_gpu,
     require_torch_multi_accelerator,
     slow,
     torch_device,
@@ -288,7 +287,7 @@ def test_training_and_can_resume_normally(self, state_dict_type):
 
     @require_torch_multi_accelerator
     @slow
-    @require_torch_gpu
+    @require_torch_accelerator
     @require_fsdp
     def test_fsdp_cpu_offloading(self):
         try:
diff --git a/tests/generation/test_utils.py b/tests/generation/test_utils.py
@@ -33,6 +33,7 @@
     require_flash_attn,
     require_optimum_quanto,
     require_torch,
+    require_torch_accelerator,
     require_torch_gpu,
     require_torch_multi_accelerator,
     require_torch_multi_gpu,
@@ -2043,7 +2044,7 @@ def test_generate_with_quant_cache(self):
                 model.generate(**generation_kwargs, **inputs_dict)
 
     @pytest.mark.generate
-    @require_torch_gpu
+    @require_torch_accelerator
     @slow
     def test_generate_compile_model_forward(self):
         """
@@ -3791,10 +3792,12 @@ def test_assisted_decoding_in_different_gpu(self):
         self.assertTrue(input_length <= out.shape[-1] <= input_length + 20)
 
     @slow
-    @require_torch_gpu
+    @require_torch_accelerator
     def test_assisted_decoding_model_in_gpu_assistant_in_cpu(self):
         # PT-only test: TF doesn't support assisted decoding yet.
-        model = AutoModelForCausalLM.from_pretrained("hf-internal-testing/tiny-random-MistralForCausalLM").to("cuda")
+        model = AutoModelForCausalLM.from_pretrained("hf-internal-testing/tiny-random-MistralForCausalLM").to(
+            torch_device
+        )
         assistant = AutoModelForCausalLM.from_pretrained("hf-internal-testing/tiny-random-MistralForCausalLM").to(
             "cpu"
         )
diff --git a/tests/models/blip_2/test_modeling_blip_2.py b/tests/models/blip_2/test_modeling_blip_2.py
@@ -27,6 +27,7 @@
 from transformers import CONFIG_MAPPING, Blip2Config, Blip2QFormerConfig, Blip2VisionConfig
 from transformers.testing_utils import (
     require_torch,
+    require_torch_accelerator,
     require_torch_fp16,
     require_torch_gpu,
     require_torch_multi_accelerator,
@@ -1565,7 +1566,7 @@ def test_forward_signature(self):
             self.assertListEqual(arg_names[: len(expected_arg_names)], expected_arg_names)
 
     @slow
-    @require_torch_gpu
+    @require_torch_accelerator
     def test_model_from_pretrained(self):
         model_name = "Salesforce/blip2-itm-vit-g"
         model = Blip2TextModelWithProjection.from_pretrained(model_name)
@@ -2191,7 +2192,7 @@ def test_expansion_in_processing(self):
 
         self.assertTrue(generated_text_expanded == generated_text)
 
-    @require_torch_gpu
+    @require_torch_accelerator
     def test_inference_itm(self):
         model_name = "Salesforce/blip2-itm-vit-g"
         processor = Blip2Processor.from_pretrained(model_name)
@@ -2210,7 +2211,7 @@ def test_inference_itm(self):
         self.assertTrue(torch.allclose(torch.nn.Softmax()(out_itm[0].cpu()), expected_scores, rtol=1e-3, atol=1e-3))
         self.assertTrue(torch.allclose(out[0].cpu(), torch.Tensor([[0.4406]]), rtol=1e-3, atol=1e-3))
 
-    @require_torch_gpu
+    @require_torch_accelerator
     @require_torch_fp16
     def test_inference_itm_fp16(self):
         model_name = "Salesforce/blip2-itm-vit-g"
@@ -2232,7 +2233,7 @@ def test_inference_itm_fp16(self):
         )
         self.assertTrue(torch.allclose(out[0].cpu().float(), torch.Tensor([[0.4406]]), rtol=1e-3, atol=1e-3))
 
-    @require_torch_gpu
+    @require_torch_accelerator
     @require_torch_fp16
     def test_inference_vision_with_projection_fp16(self):
         model_name = "Salesforce/blip2-itm-vit-g"
@@ -2256,7 +2257,7 @@ def test_inference_vision_with_projection_fp16(self):
         ]
         self.assertTrue(np.allclose(out.image_embeds[0][0][:6].tolist(), expected_image_embeds, atol=1e-3))
 
-    @require_torch_gpu
+    @require_torch_accelerator
     @require_torch_fp16
     def test_inference_text_with_projection_fp16(self):
         model_name = "Salesforce/blip2-itm-vit-g"
diff --git a/tests/models/diffllama/test_modeling_diffllama.py b/tests/models/diffllama/test_modeling_diffllama.py
@@ -676,7 +676,7 @@ def test_eager_matches_sdpa_generate(self):
                 )
 
 
-@require_torch_gpu
+@require_torch_accelerator
 class DiffLlamaIntegrationTest(unittest.TestCase):
     # This variable is used to determine which CUDA device are we using for our runners (A10 or T4)
     # Depending on the hardware we get different logits / generations
@@ -689,7 +689,7 @@ def setUpClass(cls):
             cls.cuda_compute_capability_major_version = torch.cuda.get_device_capability()[0]
 
     @slow
-    @require_torch_gpu
+    @require_torch_accelerator
     @require_read_token
     def test_compile_static_cache(self):
         # `torch==2.2` will throw an error on this test (as in other compilation tests), but torch==2.1.2 and torch>2.2
diff --git a/tests/models/falcon_mamba/test_modeling_falcon_mamba.py b/tests/models/falcon_mamba/test_modeling_falcon_mamba.py
@@ -23,7 +23,7 @@
 from transformers.testing_utils import (
     require_bitsandbytes,
     require_torch,
-    require_torch_gpu,
+    require_torch_accelerator,
     require_torch_multi_gpu,
     slow,
     torch_device,
@@ -426,7 +426,7 @@ def recursive_check(tuple_object, dict_object):
 
 
 @require_torch
-@require_torch_gpu
+@require_torch_accelerator
 @slow
 class FalconMambaIntegrationTests(unittest.TestCase):
     def setUp(self):
diff --git a/tests/models/fuyu/test_modeling_fuyu.py b/tests/models/fuyu/test_modeling_fuyu.py
@@ -22,7 +22,7 @@
 from parameterized import parameterized
 
 from transformers import FuyuConfig, is_torch_available, is_vision_available
-from transformers.testing_utils import require_torch, require_torch_gpu, slow, torch_device
+from transformers.testing_utils import require_torch, require_torch_accelerator, slow, torch_device
 from transformers.utils import cached_property
 
 from ...generation.test_utils import GenerationTesterMixin
@@ -327,7 +327,7 @@ def test_model_parallelism(self):
 
 
 @slow
-@require_torch_gpu
+@require_torch_accelerator
 class FuyuModelIntegrationTest(unittest.TestCase):
     @cached_property
     def default_processor(self):
diff --git a/tests/models/llama/test_modeling_llama.py b/tests/models/llama/test_modeling_llama.py
@@ -26,7 +26,6 @@
     require_read_token,
     require_torch,
     require_torch_accelerator,
-    require_torch_gpu,
     slow,
     torch_device,
 )
@@ -541,7 +540,7 @@ def _reinitialize_config(base_config, new_kwargs):
             config = _reinitialize_config(base_config, {"rope_scaling": {"rope_type": "linear"}})  # missing "factor"
 
 
-@require_torch_gpu
+@require_torch_accelerator
 class LlamaIntegrationTest(unittest.TestCase):
     # This variable is used to determine which CUDA device are we using for our runners (A10 or T4)
     # Depending on the hardware we get different logits / generations
@@ -695,7 +694,7 @@ def test_model_7b_dola_generation(self):
         self.assertEqual(EXPECTED_TEXT_COMPLETION, text)
 
     @slow
-    @require_torch_gpu
+    @require_torch_accelerator
     @require_read_token
     def test_compile_static_cache(self):
         # `torch==2.2` will throw an error on this test (as in other compilation tests), but torch==2.1.2 and torch>2.2
diff --git a/tests/models/mistral/test_modeling_mistral.py b/tests/models/mistral/test_modeling_mistral.py
@@ -424,7 +424,7 @@ def test_flash_attn_2_inference_equivalence_right_padding(self):
         self.skipTest(reason="Mistral flash attention does not support right padding")
 
 
-@require_torch_gpu
+@require_torch_accelerator
 class MistralIntegrationTest(unittest.TestCase):
     # This variable is used to determine which CUDA device are we using for our runners (A10 or T4)
     # Depending on the hardware we get different logits / generations
diff --git a/tests/models/mixtral/test_modeling_mixtral.py b/tests/models/mixtral/test_modeling_mixtral.py
@@ -22,6 +22,7 @@
 from transformers.testing_utils import (
     require_flash_attn,
     require_torch,
+    require_torch_accelerator,
     require_torch_gpu,
     slow,
     torch_device,
@@ -471,7 +472,7 @@ def setUpClass(cls):
             cls.cuda_compute_capability_major_version = torch.cuda.get_device_capability()[0]
 
     @slow
-    @require_torch_gpu
+    @require_torch_accelerator
     def test_small_model_logits(self):
         model_id = "hf-internal-testing/Mixtral-tiny"
         dummy_input = torch.LongTensor([[0, 1, 0], [0, 1, 0]]).to(torch_device)
@@ -507,7 +508,7 @@ def test_small_model_logits(self):
         )
 
     @slow
-    @require_torch_gpu
+    @require_torch_accelerator
     def test_small_model_logits_batched(self):
         model_id = "hf-internal-testing/Mixtral-tiny"
         dummy_input = torch.LongTensor([[0, 0, 0, 0, 0, 0, 1, 2, 3], [1, 1, 2, 3, 4, 5, 6, 7, 8]]).to(torch_device)
diff --git a/tests/models/nemotron/test_modeling_nemotron.py b/tests/models/nemotron/test_modeling_nemotron.py
@@ -26,6 +26,7 @@
     require_flash_attn,
     require_read_token,
     require_torch,
+    require_torch_accelerator,
     require_torch_gpu,
     require_torch_sdpa,
     slow,
@@ -103,7 +104,7 @@ def test_model_outputs_equivalence(self, **kwargs):
         pass
 
     @require_torch_sdpa
-    @require_torch_gpu
+    @require_torch_accelerator
     @slow
     def test_sdpa_equivalence(self):
         for model_class in self.all_model_classes:
diff --git a/tests/models/omdet_turbo/test_modeling_omdet_turbo.py b/tests/models/omdet_turbo/test_modeling_omdet_turbo.py
@@ -26,7 +26,7 @@
 from transformers.testing_utils import (
     require_timm,
     require_torch,
-    require_torch_gpu,
+    require_torch_accelerator,
     require_vision,
     slow,
     torch_device,
@@ -865,7 +865,7 @@ def test_inference_object_detection_head_batched(self):
         ]
         self.assertListEqual([result["classes"] for result in results], expected_classes)
 
-    @require_torch_gpu
+    @require_torch_accelerator
     def test_inference_object_detection_head_equivalence_cpu_gpu(self):
         processor = self.default_processor
         image = prepare_img()
@@ -878,8 +878,8 @@ def test_inference_object_detection_head_equivalence_cpu_gpu(self):
             cpu_outputs = model(**encoding)
 
         # 2. run model on GPU
-        model.to("cuda")
-        encoding = encoding.to("cuda")
+        model.to(torch_device)
+        encoding = encoding.to(torch_device)
         with torch.no_grad():
             gpu_outputs = model(**encoding)
 
diff --git a/tests/models/rt_detr/test_modeling_rt_detr.py b/tests/models/rt_detr/test_modeling_rt_detr.py
@@ -28,7 +28,13 @@
     is_torch_available,
     is_vision_available,
 )
-from transformers.testing_utils import require_torch, require_torch_gpu, require_vision, slow, torch_device
+from transformers.testing_utils import (
+    require_torch,
+    require_torch_accelerator,
+    require_vision,
+    slow,
+    torch_device,
+)
 from transformers.utils import cached_property
 
 from ...test_configuration_common import ConfigTester
@@ -631,7 +637,7 @@ def test_initialization(self):
         self.assertTrue(not failed_cases, message)
 
     @parameterized.expand(["float32", "float16", "bfloat16"])
-    @require_torch_gpu
+    @require_torch_accelerator
     @slow
     def test_inference_with_different_dtypes(self, torch_dtype_str):
         torch_dtype = {
@@ -653,7 +659,7 @@ def test_inference_with_different_dtypes(self, torch_dtype_str):
                 _ = model(**self._prepare_for_class(inputs_dict, model_class))
 
     @parameterized.expand(["float32", "float16", "bfloat16"])
-    @require_torch_gpu
+    @require_torch_accelerator
     @slow
     def test_inference_equivalence_for_static_and_dynamic_anchors(self, torch_dtype_str):
         torch_dtype = {
diff --git a/tests/models/starcoder2/test_modeling_starcoder2.py b/tests/models/starcoder2/test_modeling_starcoder2.py
@@ -23,6 +23,7 @@
     require_bitsandbytes,
     require_flash_attn,
     require_torch,
+    require_torch_accelerator,
     require_torch_gpu,
     slow,
     torch_device,
@@ -412,7 +413,7 @@ def test_flash_attn_2_inference_equivalence_right_padding(self):
 
 
 @slow
-@require_torch_gpu
+@require_torch_accelerator
 class Starcoder2IntegrationTest(unittest.TestCase):
     def test_starcoder2_batched_generation_sdpa(self):
         EXPECTED_TEXT = [
diff --git a/tests/models/t5/test_modeling_t5.py b/tests/models/t5/test_modeling_t5.py
@@ -27,7 +27,7 @@
     require_sentencepiece,
     require_tokenizers,
     require_torch,
-    require_torch_gpu,
+    require_torch_accelerator,
     slow,
     torch_device,
 )
@@ -1646,7 +1646,7 @@ def test_contrastive_search_t5(self):
         )
 
     @slow
-    @require_torch_gpu
+    @require_torch_accelerator
     def test_compile_static_cache(self):
         NUM_TOKENS_TO_GENERATE = 40
         EXPECTED_TEXT_COMPLETION = [
@@ -1686,7 +1686,7 @@ def test_compile_static_cache(self):
         self.assertEqual(EXPECTED_TEXT_COMPLETION, static_compiled_text)
 
     @slow
-    @require_torch_gpu
+    @require_torch_accelerator
     def test_compile_static_cache_encoder(self):
         prompts = [
             "summarize: Simply put, the theory of relativity states that 1) the speed of light is constant in all inertial "
diff --git a/tests/pipelines/test_pipelines_text_generation.py b/tests/pipelines/test_pipelines_text_generation.py
@@ -28,7 +28,6 @@
     require_tf,
     require_torch,
     require_torch_accelerator,
-    require_torch_gpu,
     require_torch_or_tf,
     torch_device,
 )
@@ -553,7 +552,7 @@ def run_pipeline_test(self, text_generator, _):
 
     @require_torch
     @require_accelerate
-    @require_torch_gpu
+    @require_torch_accelerator
     def test_small_model_pt_bloom_accelerate(self):
         import torch
 
diff --git a/tests/quantization/quanto_integration/test_quanto.py b/tests/quantization/quanto_integration/test_quanto.py
@@ -21,6 +21,7 @@
     require_accelerate,
     require_optimum_quanto,
     require_read_token,
+    require_torch_accelerator,
     require_torch_gpu,
     slow,
     torch_device,
@@ -123,7 +124,7 @@ def test_conversion_with_modules_to_not_convert(self):
 
 
 @slow
-@require_torch_gpu
+@require_torch_accelerator
 @require_optimum_quanto
 @require_accelerate
 class QuantoQuantizationTest(unittest.TestCase):
@@ -268,7 +269,7 @@ def test_compare_with_quanto(self):
         quantize(model.transformer, weights=w_mapping[self.weights])
         freeze(model.transformer)
         self.check_same_model(model, self.quantized_model)
-        self.check_inference_correctness(model, device="cuda")
+        self.check_inference_correctness(model, device=torch_device)
 
     @unittest.skip
     def test_load_from_quanto_saved(self):
diff --git a/tests/test_modeling_common.py b/tests/test_modeling_common.py
diff --git a/tests/trainer/test_trainer.py b/tests/trainer/test_trainer.py