2929import requests_mock
3030import torch
3131import torch .nn as nn
32- from accelerate .utils .modeling import _get_proper_dtype , dtype_byte_size
32+ from accelerate .utils .modeling import _get_proper_dtype , compute_module_sizes , dtype_byte_size
3333from huggingface_hub import ModelCard , delete_repo , snapshot_download
3434from huggingface_hub .utils import is_jinja_available
3535from parameterized import parameterized
5757 get_python_version ,
5858 is_torch_compile ,
5959 require_torch_2 ,
60+ require_torch_accelerator ,
6061 require_torch_accelerator_with_training ,
61- require_torch_gpu ,
6262 require_torch_multi_gpu ,
6363 run_test_in_subprocess ,
6464 torch_all_close ,
@@ -543,7 +543,7 @@ def test_set_xformers_attn_processor_for_determinism(self):
543543 assert torch .allclose (output , output_3 , atol = self .base_precision )
544544 assert torch .allclose (output_2 , output_3 , atol = self .base_precision )
545545
546- @require_torch_gpu
546+ @require_torch_accelerator
547547 def test_set_attn_processor_for_determinism (self ):
548548 if self .uses_custom_attn_processor :
549549 return
@@ -1068,7 +1068,7 @@ def test_wrong_adapter_name_raises_error(self):
10681068
10691069 self .assertTrue (f"Adapter name { wrong_name } not found in the model." in str (err_context .exception ))
10701070
1071- @require_torch_gpu
1071+ @require_torch_accelerator
10721072 def test_cpu_offload (self ):
10731073 config , inputs_dict = self .prepare_init_args_and_inputs_for_common ()
10741074 model = self .model_class (** config ).eval ()
@@ -1080,7 +1080,7 @@ def test_cpu_offload(self):
10801080 torch .manual_seed (0 )
10811081 base_output = model (** inputs_dict )
10821082
1083- model_size = compute_module_persistent_sizes (model )["" ]
1083+ model_size = compute_module_sizes (model )["" ]
10841084 # We test several splits of sizes to make sure it works.
10851085 max_gpu_sizes = [int (p * model_size ) for p in self .model_split_percents [1 :]]
10861086 with tempfile .TemporaryDirectory () as tmp_dir :
@@ -1098,7 +1098,7 @@ def test_cpu_offload(self):
10981098
10991099 self .assertTrue (torch .allclose (base_output [0 ], new_output [0 ], atol = 1e-5 ))
11001100
1101- @require_torch_gpu
1101+ @require_torch_accelerator
11021102 def test_disk_offload_without_safetensors (self ):
11031103 config , inputs_dict = self .prepare_init_args_and_inputs_for_common ()
11041104 model = self .model_class (** config ).eval ()
@@ -1110,7 +1110,7 @@ def test_disk_offload_without_safetensors(self):
11101110 torch .manual_seed (0 )
11111111 base_output = model (** inputs_dict )
11121112
1113- model_size = compute_module_persistent_sizes (model )["" ]
1113+ model_size = compute_module_sizes (model )["" ]
11141114 with tempfile .TemporaryDirectory () as tmp_dir :
11151115 model .cpu ().save_pretrained (tmp_dir , safe_serialization = False )
11161116
@@ -1132,7 +1132,7 @@ def test_disk_offload_without_safetensors(self):
11321132
11331133 self .assertTrue (torch .allclose (base_output [0 ], new_output [0 ], atol = 1e-5 ))
11341134
1135- @require_torch_gpu
1135+ @require_torch_accelerator
11361136 def test_disk_offload_with_safetensors (self ):
11371137 config , inputs_dict = self .prepare_init_args_and_inputs_for_common ()
11381138 model = self .model_class (** config ).eval ()
@@ -1144,7 +1144,7 @@ def test_disk_offload_with_safetensors(self):
11441144 torch .manual_seed (0 )
11451145 base_output = model (** inputs_dict )
11461146
1147- model_size = compute_module_persistent_sizes (model )["" ]
1147+ model_size = compute_module_sizes (model )["" ]
11481148 with tempfile .TemporaryDirectory () as tmp_dir :
11491149 model .cpu ().save_pretrained (tmp_dir )
11501150
@@ -1172,7 +1172,7 @@ def test_model_parallelism(self):
11721172 torch .manual_seed (0 )
11731173 base_output = model (** inputs_dict )
11741174
1175- model_size = compute_module_persistent_sizes (model )["" ]
1175+ model_size = compute_module_sizes (model )["" ]
11761176 # We test several splits of sizes to make sure it works.
11771177 max_gpu_sizes = [int (p * model_size ) for p in self .model_split_percents [1 :]]
11781178 with tempfile .TemporaryDirectory () as tmp_dir :
@@ -1183,6 +1183,7 @@ def test_model_parallelism(self):
11831183 new_model = self .model_class .from_pretrained (tmp_dir , device_map = "auto" , max_memory = max_memory )
11841184 # Making sure part of the model will actually end up offloaded
11851185 self .assertSetEqual (set (new_model .hf_device_map .values ()), {0 , 1 })
1186+ print (f" new_model.hf_device_map:{ new_model .hf_device_map } " )
11861187
11871188 self .check_device_map_is_respected (new_model , new_model .hf_device_map )
11881189
@@ -1191,7 +1192,7 @@ def test_model_parallelism(self):
11911192
11921193 self .assertTrue (torch .allclose (base_output [0 ], new_output [0 ], atol = 1e-5 ))
11931194
1194- @require_torch_gpu
1195+ @require_torch_accelerator
11951196 def test_sharded_checkpoints (self ):
11961197 torch .manual_seed (0 )
11971198 config , inputs_dict = self .prepare_init_args_and_inputs_for_common ()
@@ -1223,7 +1224,7 @@ def test_sharded_checkpoints(self):
12231224
12241225 self .assertTrue (torch .allclose (base_output [0 ], new_output [0 ], atol = 1e-5 ))
12251226
1226- @require_torch_gpu
1227+ @require_torch_accelerator
12271228 def test_sharded_checkpoints_with_variant (self ):
12281229 torch .manual_seed (0 )
12291230 config , inputs_dict = self .prepare_init_args_and_inputs_for_common ()
@@ -1261,7 +1262,7 @@ def test_sharded_checkpoints_with_variant(self):
12611262
12621263 self .assertTrue (torch .allclose (base_output [0 ], new_output [0 ], atol = 1e-5 ))
12631264
1264- @require_torch_gpu
1265+ @require_torch_accelerator
12651266 def test_sharded_checkpoints_device_map (self ):
12661267 config , inputs_dict = self .prepare_init_args_and_inputs_for_common ()
12671268 model = self .model_class (** config ).eval ()
0 commit comments