Updated package v3.9

UtkarshTheDev · UtkarshTheDev · commit 9b140f01a7ab · 2025-03-06T10:10:11.000+05:30
diff --git a/locallab/__init__.py b/locallab/__init__.py
@@ -2,7 +2,7 @@
 LocalLab: Run LLMs locally with a friendly API similar to OpenAI
 """
 
-__version__ = "0.3.8"
+__version__ = "0.3.9"
 
 from typing import Dict, Any, Optional
 
diff --git a/locallab/model_manager.py b/locallab/model_manager.py
@@ -147,7 +147,7 @@ def _apply_optimizations(self, model: AutoModelForCausalLM) -> AutoModelForCausa
         """Apply various optimizations to the model"""
         try:
             # Only apply attention slicing if explicitly enabled and not empty
-            if ENABLE_ATTENTION_SLICING and str(ENABLE_ATTENTION_SLICING).lower() not in ('false', '0', 'none', ''):
+            if os.environ.get('LOCALLAB_ENABLE_ATTENTION_SLICING', '').lower() not in ('false', '0', 'none', ''):
                 if hasattr(model, 'enable_attention_slicing'):
                     model.enable_attention_slicing(1)
                     logger.info("Attention slicing enabled")
@@ -156,15 +156,15 @@ def _apply_optimizations(self, model: AutoModelForCausalLM) -> AutoModelForCausa
                         "Attention slicing not available for this model")
 
             # Only apply CPU offloading if explicitly enabled and not empty
-            if ENABLE_CPU_OFFLOADING and str(ENABLE_CPU_OFFLOADING).lower() not in ('false', '0', 'none', ''):
+            if os.environ.get('LOCALLAB_ENABLE_CPU_OFFLOADING', '').lower() not in ('false', '0', 'none', ''):
                 if hasattr(model, "enable_cpu_offload"):
                     model.enable_cpu_offload()
                     logger.info("CPU offloading enabled")
                 else:
                     logger.info("CPU offloading not available for this model")
 
             # Only apply BetterTransformer if explicitly enabled and not empty
-            if ENABLE_BETTERTRANSFORMER and str(ENABLE_BETTERTRANSFORMER).lower() not in ('false', '0', 'none', ''):
+            if os.environ.get('LOCALLAB_ENABLE_BETTERTRANSFORMER', '').lower() not in ('false', '0', 'none', ''):
                 try:
                     from optimum.bettertransformer import BetterTransformer
                     model = BetterTransformer.transform(model)
@@ -219,10 +219,17 @@ async def load_model(self, model_id: str) -> bool:
                     **config
                 )
 
+                # Move model to the appropriate device
                 if not ENABLE_QUANTIZATION or str(ENABLE_QUANTIZATION).lower() in ('false', '0', 'none', ''):
                     device = "cuda" if torch.cuda.is_available() else "cpu"
                     self.model = self.model.to(device)
 
+                # Capture model parameters after loading
+                model_architecture = self.model.config.architectures[0] if hasattr(self.model.config, 'architectures') else 'Unknown'
+                memory_used = torch.cuda.memory_allocated() if torch.cuda.is_available() else 'N/A'
+                logger.info(f"Model architecture: {model_architecture}")
+                logger.info(f"Memory used: {memory_used}")
+
                 self.model = self._apply_optimizations(self.model)
 
                 self.current_model = model_id
diff --git a/setup.py b/setup.py
@@ -5,7 +5,7 @@
 
 setup(
     name="locallab",
-    version="0.3.8",
+    version="0.3.9",
     packages=find_packages(include=["locallab", "locallab.*"]),
     install_requires=[
         "fastapi>=0.95.0,<1.0.0",