@@ -54,6 +54,7 @@ def benchmark_quantizer(
5454 results = {}
5555 try :
5656 self ._clear_memory ()
57+ print (f"GPU memory before { name } : { torch .cuda .memory_allocated () / 1024 ** 2 :.1f} MB" )
5758
5859 # Configure quantizer for memory efficiency
5960 mem_efficient_args = dict (quantizer_args )
@@ -66,23 +67,26 @@ def benchmark_quantizer(
6667 "percdamp" : 0.01 ,
6768 "block_size" : 128 ,
6869 })
70+ print (f"Creating model copy for { name } ..." )
71+ # Create a fresh model instance with same config
72+ config_dict = self .model .config .to_dict ()
73+ config_dict .pop ('_name_or_path' , None ) # Remove path to ensure clean config
6974
70- print (f"Creating copy of model for { name } ..." )
71- # Create a fresh model instance from pretrained
72- model_clone = AutoModelForCausalLM .from_pretrained (
73- self .model .config ._name_or_path ,
74- low_cpu_mem_usage = True ,
75- torch_dtype = torch .float32 ,
76- device_map = None # Important: disable device map for copying
77- )
75+ model_clone = type (self .model )(self .model .config )
7876
7977 print (f"Copying parameters for { name } ..." )
80- # Manually copy parameters to ensure proper copying
78+ # Copy parameters with proper CPU offloading
8179 with torch .no_grad ():
82- for name , param in self .model .named_parameters ():
83- if name in model_clone .state_dict ():
84- # Ensure parameter is on CPU for copying
85- model_clone .state_dict ()[name ].copy_ (param .cpu ())
80+ state_dict = {}
81+ for param_name , param in self .model .state_dict ().items ():
82+ # Handle device placement during copy
83+ param_data = param .detach ()
84+ if param_data .device .type != 'cpu' :
85+ param_data = param_data .cpu ()
86+ state_dict [param_name ] = param_data
87+
88+ # Load state dict all at once
89+ model_clone .load_state_dict (state_dict )
8690
8791 # Initialize quantizer with model copy
8892 quantizer = quantizer_class (model = model_clone , ** mem_efficient_args )
0 commit comments