Fixed LocalLab Model Download issue

UtkarshTheDev · UtkarshTheDev · commit 7e1a0c43da1c · 2025-06-22T03:02:37.000+05:30
diff --git a/locallab/core/app.py b/locallab/core/app.py
@@ -84,6 +84,9 @@ def init(backend, **kwargs):
 # Startup event triggered flag
 startup_event_triggered = False
 
+# Model loading status flag
+model_loading_in_progress = False
+
 # Application startup event to ensure banners are displayed
 @app.on_event("startup")
 async def startup_event():
@@ -155,7 +158,11 @@ async def startup_event():
     if model_to_load:
         try:
             # This will run asynchronously without blocking server startup
+            # But we'll set a flag to indicate model loading is in progress
             asyncio.create_task(load_model_in_background(model_to_load))
+            # Set a global flag to indicate model is loading
+            global model_loading_in_progress
+            model_loading_in_progress = True
         except Exception as e:
             logger.error(f"Error starting model loading task: {str(e)}")
     else:
@@ -288,6 +295,7 @@ async def add_process_time_header(request: Request, call_next):
 
 async def load_model_in_background(model_id: str):
     """Load the model asynchronously in the background"""
+    global model_loading_in_progress
     logger.info(f"Loading model {model_id} in background...")
     start_time = time.time()
 
@@ -309,8 +317,40 @@ async def load_model_in_background(model_id: str):
 
         # We don't need to call log_model_loaded here since it's already done in the model_manager
         logger.info(f"{Fore.GREEN}Model {model_id} loaded successfully in {load_time:.2f} seconds!{Style.RESET_ALL}")
+
+        # Now that model is loaded, set server status to running
+        from ..logger.logger import set_server_status
+        set_server_status("running")
+        logger.info("Server status changed to: running")
+
+        # Mark model loading as complete
+        model_loading_in_progress = False
+
+        # Display the running banner now that model is loaded
+        try:
+            from ..ui.banners import print_running_banner
+            from .. import __version__
+            print_running_banner(__version__)
+        except Exception as banner_e:
+            logger.warning(f"Could not display running banner: {banner_e}")
+
     except Exception as e:
         logger.error(f"Failed to load model {model_id}: {str(e)}")
         if "401 Client Error: Unauthorized" in str(e):
             logger.error("This appears to be an authentication error. Please ensure your HuggingFace token is set correctly.")
-            logger.info("You can set your token using: locallab config")
+            logger.info("You can set your token using: locallab config")
+
+        # Even if model loading fails, mark it as complete and set server to running
+        # so the server can still be used for other operations
+        model_loading_in_progress = False
+        from ..logger.logger import set_server_status
+        set_server_status("running")
+        logger.info("Server status changed to: running (model loading failed)")
+
+        # Display the running banner even if model loading failed
+        try:
+            from ..ui.banners import print_running_banner
+            from .. import __version__
+            print_running_banner(__version__)
+        except Exception as banner_e:
+            logger.warning(f"Could not display running banner: {banner_e}")
diff --git a/locallab/model_manager.py b/locallab/model_manager.py
diff --git a/locallab/routes/models.py b/locallab/routes/models.py
@@ -49,18 +49,11 @@ class LoadModelRequest(BaseModel):
 async def load_model(request: LoadModelRequest) -> Dict[str, str]:
     """Load a specific model"""
     try:
-        # Check if model exists in registry
-        if request.model_id not in MODEL_REGISTRY:
-            raise HTTPException(
-                status_code=404, 
-                detail=f"Model {request.model_id} not found. Available models: {list(MODEL_REGISTRY.keys())}"
-            )
-        
         # Check if model is already loaded
         if model_manager.current_model == request.model_id and model_manager.is_model_loaded(request.model_id):
             return {"status": "success", "message": f"Model {request.model_id} is already loaded"}
-        
-        # Load the model
+
+        # Load the model (this will handle both registry and custom models)
         await model_manager.load_model(request.model_id)
         return {"status": "success", "message": f"Model {request.model_id} loaded successfully"}
     except Exception as e:
@@ -108,17 +101,14 @@ async def get_current_model() -> ModelResponse:
     )
 
 @router.post("/load/{model_id}", response_model=Dict[str, str])
-async def load_model(model_id: str, background_tasks: BackgroundTasks) -> Dict[str, str]:
+async def load_model_by_path(model_id: str, background_tasks: BackgroundTasks) -> Dict[str, str]:
     """Load a specific model"""
-    if model_id not in MODEL_REGISTRY:
-        raise HTTPException(status_code=404, detail=f"Model {model_id} not found")
-    
     # Check if the model is already loaded
     if model_manager.current_model == model_id and model_manager.is_model_loaded(model_id):
         return {"status": "success", "message": f"Model {model_id} is already loaded"}
-    
+
     try:
-        # Load model in background
+        # Load model in background (this will handle both registry and custom models)
         background_tasks.add_task(model_manager.load_model, model_id)
         return {"status": "loading", "message": f"Model {model_id} loading started in background"}
     except Exception as e:
@@ -129,15 +119,13 @@ async def load_model(model_id: str, background_tasks: BackgroundTasks) -> Dict[s
 async def load_model_from_body(request: LoadModelRequest, background_tasks: BackgroundTasks) -> Dict[str, str]:
     """Load a specific model using model_id from request body"""
     model_id = request.model_id
-    if model_id not in MODEL_REGISTRY:
-        raise HTTPException(status_code=404, detail=f"Model {model_id} not found")
-    
+
     # Check if the model is already loaded
     if model_manager.current_model == model_id and model_manager.is_model_loaded(model_id):
         return {"status": "success", "message": f"Model {model_id} is already loaded"}
-    
+
     try:
-        # Load model in background
+        # Load model in background (this will handle both registry and custom models)
         background_tasks.add_task(model_manager.load_model, model_id)
         return {"status": "loading", "message": f"Model {model_id} loading started in background"}
     except Exception as e:
@@ -161,12 +149,10 @@ async def unload_model() -> Dict[str, str]:
 @router.get("/status/{model_id}", response_model=ModelResponse)
 async def get_model_status(model_id: str) -> ModelResponse:
     """Get the loading status of a specific model"""
-    if model_id not in MODEL_REGISTRY:
-        raise HTTPException(status_code=404, detail=f"Model {model_id} not found")
-    
+    # Check if model is in registry first, otherwise treat as custom model
     model_info = MODEL_REGISTRY.get(model_id, {})
     is_loaded = model_manager.is_model_loaded(model_id)
-    
+
     # If this is the current model and it's loaded or loading
     if model_manager.current_model == model_id:
         return ModelResponse(
diff --git a/locallab/server.py b/locallab/server.py
@@ -811,8 +811,34 @@ def on_startup():
             try:
                 logger.info("Server startup callback triggered")
 
-                # Set server status to running
-                set_server_status("running")
+                # Check if a model is configured to load on startup
+                try:
+                    from .cli.config import get_config_value
+                    from .config import DEFAULT_MODEL
+                    import os
+
+                    # Get the model that should be loaded
+                    model_to_load = (
+                        os.environ.get("HUGGINGFACE_MODEL") or
+                        get_config_value("model") or
+                        DEFAULT_MODEL
+                    )
+
+                    if model_to_load:
+                        # Set server status to loading while model loads
+                        set_server_status("loading")
+                        logger.info("Server status changed to: loading (waiting for model)")
+                        # Don't display running banner yet - wait for model to load
+                        return
+                    else:
+                        # No model to load, set to running immediately
+                        set_server_status("running")
+                        logger.info("Server status changed to: running")
+                except Exception as e:
+                    # Fallback if anything fails
+                    logger.warning(f"Could not determine model loading status: {e}")
+                    set_server_status("running")
+                    logger.info("Server status changed to: running")
 
                 # Display the RUNNING banner
                 print_running_banner(__version__)
@@ -862,8 +888,30 @@ def on_startup():
                 logger.debug(f"Startup display error details: {traceback.format_exc()}")
                 # Still mark startup as complete to avoid repeated attempts
                 startup_complete[0] = True
-                # Ensure server status is set to running even if display fails
-                set_server_status("running")
+                # Check if a model is configured to load before setting to running
+                try:
+                    from .cli.config import get_config_value
+                    from .config import DEFAULT_MODEL
+                    import os
+
+                    # Get the model that should be loaded
+                    model_to_load = (
+                        os.environ.get("HUGGINGFACE_MODEL") or
+                        get_config_value("model") or
+                        DEFAULT_MODEL
+                    )
+
+                    if model_to_load:
+                        set_server_status("loading")
+                        logger.info("Server status changed to: loading (waiting for model)")
+                    else:
+                        set_server_status("running")
+                        logger.info("Server status changed to: running")
+                except Exception as e:
+                    # Fallback if anything fails
+                    logger.warning(f"Could not determine model loading status: {e}")
+                    set_server_status("running")
+                    logger.info("Server status changed to: running")
 
         # Define async callback that uvicorn can call
         async def on_startup_async():
diff --git a/locallab/utils/early_config.py b/locallab/utils/early_config.py
@@ -9,7 +9,14 @@
 import warnings
 
 # Configure environment variables for Hugging Face
-os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"  # Enable HF Transfer for better downloads
+# Only enable HF Transfer if the package is available
+try:
+    import hf_transfer
+    os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"  # Enable HF Transfer for better downloads
+except ImportError:
+    # hf_transfer not available, disable it to avoid errors
+    os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "0"
+
 os.environ["TOKENIZERS_PARALLELISM"] = "true"  # Enable parallelism for tokenizers
 os.environ["TRANSFORMERS_NO_ADVISORY_WARNINGS"] = "1"  # Disable advisory warnings
 os.environ["HF_HUB_DISABLE_TELEMETRY"] = "1"  # Disable telemetry
@@ -106,9 +113,14 @@ def enable_hf_progress_bars():
         # Method 3: Set environment variable (works for all versions)
         os.environ["HF_HUB_DISABLE_PROGRESS_BARS"] = "0"
 
-        # Also enable HF Transfer for better download experience
+        # Also enable HF Transfer for better download experience (only if available)
         if hasattr(huggingface_hub, "constants"):
-            huggingface_hub.constants.HF_HUB_ENABLE_HF_TRANSFER = True
+            try:
+                import hf_transfer
+                huggingface_hub.constants.HF_HUB_ENABLE_HF_TRANSFER = True
+            except ImportError:
+                # hf_transfer not available, don't enable it
+                huggingface_hub.constants.HF_HUB_ENABLE_HF_TRANSFER = False
     except ImportError:
         pass
 
diff --git a/locallab/utils/progress.py b/locallab/utils/progress.py
@@ -164,9 +164,14 @@ def configure_hf_hub_progress():
         from huggingface_hub.utils import logging as hf_logging
         hf_logging.enable_progress_bars()
 
-        # 2. Enable HF Transfer for better download experience
-        from huggingface_hub import constants
-        constants.HF_HUB_ENABLE_HF_TRANSFER = True
+        # 2. Enable HF Transfer for better download experience (only if available)
+        try:
+            import hf_transfer
+            from huggingface_hub import constants
+            constants.HF_HUB_ENABLE_HF_TRANSFER = True
+        except ImportError:
+            # hf_transfer not available, skip enabling it
+            pass
 
         # 3. Make sure we're NOT overriding HuggingFace's progress callback
         # This is critical - we want to use their native implementation
diff --git a/requirements.txt b/requirements.txt
@@ -1,7 +1,7 @@
 fastapi>=0.68.0,<1.0.0
 uvicorn>=0.15.0,<1.0.0
 python-multipart>=0.0.5
-transformers>=4.0.0
+transformers>=4.49.0
 accelerate>=0.12.0
 pyngrok>=5.1.0
 nest-asyncio>=1.5.1
diff --git a/setup.py b/setup.py
@@ -13,7 +13,7 @@
     "python-multipart>=0.0.5",
     "dataclasses-json>=0.5.7,<1.0.0",
     "torch>=2.0.0,<3.0.0",
-    "transformers>=4.28.1,<5.0.0",
+    "transformers>=4.49.0,<5.0.0",
     "accelerate>=0.18.0,<1.0.0",
     "click>=8.1.3,<9.0.0",
     "rich>=13.3.4,<14.0.0",