@@ -147,7 +147,7 @@ def _apply_optimizations(self, model: AutoModelForCausalLM) -> AutoModelForCausa
147147 """Apply various optimizations to the model"""
148148 try :
149149 # Only apply attention slicing if explicitly enabled and not empty
150- if ENABLE_ATTENTION_SLICING and str ( ENABLE_ATTENTION_SLICING ).lower () not in ('false' , '0' , 'none' , '' ):
150+ if os . environ . get ( 'LOCALLAB_ENABLE_ATTENTION_SLICING' , '' ).lower () not in ('false' , '0' , 'none' , '' ):
151151 if hasattr (model , 'enable_attention_slicing' ):
152152 model .enable_attention_slicing (1 )
153153 logger .info ("Attention slicing enabled" )
@@ -156,15 +156,15 @@ def _apply_optimizations(self, model: AutoModelForCausalLM) -> AutoModelForCausa
156156 "Attention slicing not available for this model" )
157157
158158 # Only apply CPU offloading if explicitly enabled and not empty
159- if ENABLE_CPU_OFFLOADING and str ( ENABLE_CPU_OFFLOADING ).lower () not in ('false' , '0' , 'none' , '' ):
159+ if os . environ . get ( 'LOCALLAB_ENABLE_CPU_OFFLOADING' , '' ).lower () not in ('false' , '0' , 'none' , '' ):
160160 if hasattr (model , "enable_cpu_offload" ):
161161 model .enable_cpu_offload ()
162162 logger .info ("CPU offloading enabled" )
163163 else :
164164 logger .info ("CPU offloading not available for this model" )
165165
166166 # Only apply BetterTransformer if explicitly enabled and not empty
167- if ENABLE_BETTERTRANSFORMER and str ( ENABLE_BETTERTRANSFORMER ).lower () not in ('false' , '0' , 'none' , '' ):
167+ if os . environ . get ( 'LOCALLAB_ENABLE_BETTERTRANSFORMER' , '' ).lower () not in ('false' , '0' , 'none' , '' ):
168168 try :
169169 from optimum .bettertransformer import BetterTransformer
170170 model = BetterTransformer .transform (model )
@@ -219,10 +219,17 @@ async def load_model(self, model_id: str) -> bool:
219219 ** config
220220 )
221221
222+ # Move model to the appropriate device
222223 if not ENABLE_QUANTIZATION or str (ENABLE_QUANTIZATION ).lower () in ('false' , '0' , 'none' , '' ):
223224 device = "cuda" if torch .cuda .is_available () else "cpu"
224225 self .model = self .model .to (device )
225226
227+ # Capture model parameters after loading
228+ model_architecture = self .model .config .architectures [0 ] if hasattr (self .model .config , 'architectures' ) else 'Unknown'
229+ memory_used = torch .cuda .memory_allocated () if torch .cuda .is_available () else 'N/A'
230+ logger .info (f"Model architecture: { model_architecture } " )
231+ logger .info (f"Memory used: { memory_used } " )
232+
226233 self .model = self ._apply_optimizations (self .model )
227234
228235 self .current_model = model_id
0 commit comments