@@ -983,14 +983,11 @@ def remove_all_hooks(self):
983983 r"""
984984 Removes all hooks that were added when using `enable_sequential_cpu_offload` or `enable_model_cpu_offload`.
985985 """
986- print ("Within remove_all_hooks()." )
987986 for _ , model in self .components .items ():
988987 if isinstance (model , torch .nn .Module ) and hasattr (model , "_hf_hook" ):
989- print (f"{ model .__class__ .__name__ = } " )
990988 accelerate .hooks .remove_hook_from_module (model , recurse = True )
991- print ( "Done removing from the current model." )
989+
992990 self ._all_hooks = []
993- print ("Done in remove." )
994991
995992 def enable_model_cpu_offload (self , gpu_id : Optional [int ] = None , device : Union [torch .device , str ] = "cuda" ):
996993 r"""
@@ -1039,21 +1036,16 @@ def enable_model_cpu_offload(self, gpu_id: Optional[int] = None, device: Union[t
10391036 device_type = torch_device .type
10401037 device = torch .device (f"{ device_type } :{ self ._offload_gpu_id } " )
10411038 self ._offload_device = device
1042- print ("Initial assignments done." )
10431039
10441040 self .to ("cpu" , silence_dtype_warnings = True )
1045- print ("placed on CPU." )
10461041 device_mod = getattr (torch , device .type , None )
1047- print (f"{ device = } " )
10481042 if hasattr (device_mod , "empty_cache" ) and device_mod .is_available ():
10491043 device_mod .empty_cache () # otherwise we don't see the memory savings (but they probably exist)
1050- print ("Empty cache called." )
10511044 all_model_components = {k : v for k , v in self .components .items () if isinstance (v , torch .nn .Module )}
10521045
10531046 self ._all_hooks = []
10541047 hook = None
10551048 for model_str in self .model_cpu_offload_seq .split ("->" ):
1056- print (f"Entering with { model_str } " )
10571049 model = all_model_components .pop (model_str , None )
10581050
10591051 if not isinstance (model , torch .nn .Module ):
@@ -1069,7 +1061,6 @@ def enable_model_cpu_offload(self, gpu_id: Optional[int] = None, device: Union[t
10691061
10701062 _ , hook = cpu_offload_with_hook (model , device , prev_module_hook = hook )
10711063 self ._all_hooks .append (hook )
1072- print ("Initial hooks appended." )
10731064
10741065 # CPU offload models that are not in the seq chain unless they are explicitly excluded
10751066 # these models will stay on CPU until maybe_free_model_hooks is called
@@ -1083,7 +1074,7 @@ def enable_model_cpu_offload(self, gpu_id: Optional[int] = None, device: Union[t
10831074 else :
10841075 _ , hook = cpu_offload_with_hook (model , device )
10851076 self ._all_hooks .append (hook )
1086-
1077+
10871078 def maybe_free_model_hooks (self ):
10881079 r"""
10891080 Function that offloads all components, removes all model hooks that were added when using
0 commit comments