2929import  requests_mock 
3030import  torch 
3131import  torch .nn  as  nn 
32- from  accelerate .utils .modeling  import  _get_proper_dtype , dtype_byte_size 
32+ from  accelerate .utils .modeling  import  _get_proper_dtype , dtype_byte_size ,  compute_module_sizes 
3333from  huggingface_hub  import  ModelCard , delete_repo , snapshot_download 
3434from  huggingface_hub .utils  import  is_jinja_available 
3535from  parameterized  import  parameterized 
@@ -1080,7 +1080,7 @@ def test_cpu_offload(self):
10801080        torch .manual_seed (0 )
10811081        base_output  =  model (** inputs_dict )
10821082
1083-         model_size  =  compute_module_persistent_sizes (model )["" ]
1083+         model_size  =  compute_module_sizes (model )["" ]
10841084        # We test several splits of sizes to make sure it works. 
10851085        max_gpu_sizes  =  [int (p  *  model_size ) for  p  in  self .model_split_percents [1 :]]
10861086        with  tempfile .TemporaryDirectory () as  tmp_dir :
@@ -1110,7 +1110,7 @@ def test_disk_offload_without_safetensors(self):
11101110        torch .manual_seed (0 )
11111111        base_output  =  model (** inputs_dict )
11121112
1113-         model_size  =  compute_module_persistent_sizes (model )["" ]
1113+         model_size  =  compute_module_sizes (model )["" ]
11141114        with  tempfile .TemporaryDirectory () as  tmp_dir :
11151115            model .cpu ().save_pretrained (tmp_dir , safe_serialization = False )
11161116
@@ -1144,7 +1144,7 @@ def test_disk_offload_with_safetensors(self):
11441144        torch .manual_seed (0 )
11451145        base_output  =  model (** inputs_dict )
11461146
1147-         model_size  =  compute_module_persistent_sizes (model )["" ]
1147+         model_size  =  compute_module_sizes (model )["" ]
11481148        with  tempfile .TemporaryDirectory () as  tmp_dir :
11491149            model .cpu ().save_pretrained (tmp_dir )
11501150
@@ -1172,7 +1172,7 @@ def test_model_parallelism(self):
11721172        torch .manual_seed (0 )
11731173        base_output  =  model (** inputs_dict )
11741174
1175-         model_size  =  compute_module_persistent_sizes (model )["" ]
1175+         model_size  =  compute_module_sizes (model )["" ]
11761176        # We test several splits of sizes to make sure it works. 
11771177        max_gpu_sizes  =  [int (p  *  model_size ) for  p  in  self .model_split_percents [1 :]]
11781178        with  tempfile .TemporaryDirectory () as  tmp_dir :
@@ -1183,6 +1183,7 @@ def test_model_parallelism(self):
11831183                new_model  =  self .model_class .from_pretrained (tmp_dir , device_map = "auto" , max_memory = max_memory )
11841184                # Making sure part of the model will actually end up offloaded 
11851185                self .assertSetEqual (set (new_model .hf_device_map .values ()), {0 , 1 })
1186+                 print (f" new_model.hf_device_map:{ new_model .hf_device_map }  )
11861187
11871188                self .check_device_map_is_respected (new_model , new_model .hf_device_map )
11881189
0 commit comments