Updated LocalLab v0.2.1

UtkarshTheDev · UtkarshTheDev · commit 51d8d759b121 · 2025-03-01T23:08:01.000+05:30
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -28,6 +28,8 @@ All notable changes for version updates.
 
 - Fixed circular import issues between core/app.py and routes modules
 - Fixed ngrok authentication flow to properly use auth token from environment variables
+- Fixed error with missing torch import in the server.py file
+- Added graceful handling of missing torch module to prevent startup failures
 - Improved error messages when server fails to start
 - Better exception handling throughout the codebase
 
@@ -108,31 +110,4 @@ All notable changes for version updates.
 - Refactored `run_server_proc` in the spawned process to initialize a dedicated logger ("locallab.spawn") to avoid inheriting SemLock objects from a fork context.
 - Ensured that the log queue is created using the multiprocessing spawn context, preventing runtime errors in Google Colab.
 - Updated Mermaid diagrams in `README.md` and `docs/colab/README.md` to enclose node labels in double quotes, resolving parse errors in GitHub rendering.
-- Removed duplicate architecture diagrams from the root `README.md` to streamline documentation.
-- Minor improvements to logging and error handling.
-
-## [0.1.2] - 2025-02-25
-
-### Changed
-
-- Updated GitHub Actions workflow to install the Locallab package along with its runtime dependencies in CI.
-
-### Fixed
-
-- Fixed RuntimeError related to SemLock sharing in multiprocessing by clearing logger handlers in `run_server_proc`.
-- Updated Mermaid diagrams to wrap node labels in double quotes, improving compatibility with GitHub rendering.
-- Improved build status badge aesthetics in the README.
-
-## [0.1.1] - 2025-02-25
-
-### Fixed
-
-- Fixed RuntimeError related to SemLock sharing in multiprocessing by clearing logger handlers in `run_server_proc`.
-- Updated Mermaid diagrams to wrap node labels in double quotes, improving compatibility with GitHub rendering.
-- Improved build status badge aesthetics in the README.
-
-## [0.1.0] - 2025-02-24
-
-### Added
-
-- Initial release as a Python package with full Google Colab integration, dynamic model loading, robust logging (with ASCII art banners), API endpoints for text generation and system monitoring, Ngrok tunnel management, and comprehensive documentation.
+- Removed duplicate architecture diagrams from the root `
diff --git a/locallab/__init__.py b/locallab/__init__.py
@@ -2,7 +2,7 @@
 LocalLab - A lightweight AI inference server
 """
 
-__version__ = "0.2.0" 
+__version__ = "0.2.1" 
 
 from typing import Dict, Any, Optional
 
diff --git a/locallab/server.py b/locallab/server.py
@@ -22,6 +22,13 @@
 from .logger.logger import set_server_status, log_request
 from .utils.system import get_gpu_memory
 
+# Import torch - handle import error gracefully
+try:
+    import torch
+    TORCH_AVAILABLE = True
+except ImportError:
+    TORCH_AVAILABLE = False
+
 # Get the logger instance
 logger = get_logger("locallab.server")
 
@@ -61,16 +68,21 @@ def check_environment() -> List[Tuple[str, str, bool]]:
             ))
         
         # Check Colab runtime type for GPU
-        if not torch.cuda.is_available():
+        if TORCH_AVAILABLE and not torch.cuda.is_available():
             issues.append((
                 "Running in Colab without GPU acceleration",
                 "Change runtime type to GPU: Runtime > Change runtime type > Hardware accelerator > GPU",
                 True
             ))
+        elif not TORCH_AVAILABLE:
+            issues.append((
+                "PyTorch is not installed",
+                "Install PyTorch with: pip install torch",
+                True
+            ))
     
     # Check for CUDA and GPU availability
-    try:
-        import torch
+    if TORCH_AVAILABLE:
         if not torch.cuda.is_available():
             issues.append((
                 "CUDA is not available - using CPU for inference",
@@ -86,40 +98,33 @@ def check_environment() -> List[Tuple[str, str, bool]]:
                     if free_mem < 2000:  # Less than 2GB free
                         issues.append((
                             f"Low GPU memory: Only {free_mem}MB available",
-                            "Consider using a smaller model or enabling quantization with LOCALLAB_ENABLE_QUANTIZATION=true",
+                            "Models may require 2-6GB of GPU memory. Consider closing other applications or using a smaller model",
                             True if free_mem < 1000 else False
                         ))
             except Exception as e:
-                issues.append((
-                    f"Failed to check GPU memory: {str(e)}",
-                    "This may indicate driver issues. Consider updating your GPU drivers",
-                    False
-                ))
-    except ImportError:
+                logger.warning(f"Failed to check GPU memory: {str(e)}")
+    else:
         issues.append((
             "PyTorch is not installed",
             "Install PyTorch with: pip install torch",
             True
         ))
     
-    # Check available system memory
+    # Check system memory
     try:
         import psutil
         memory = psutil.virtual_memory()
+        total_gb = memory.total / (1024 * 1024 * 1024)
         available_gb = memory.available / (1024 * 1024 * 1024)
         
         if available_gb < 2.0:  # Less than 2GB available
             issues.append((
                 f"Low system memory: Only {available_gb:.1f}GB available",
-                "Consider closing other applications or using a system with more RAM",
-                True if available_gb < 1.0 else False
+                "Models may require 2-8GB of system memory. Consider closing other applications",
+                True
             ))
     except Exception as e:
-        issues.append((
-            f"Failed to check system memory: {str(e)}",
-            "This may affect model loading and performance",
-            False
-        ))
+        pass  # Skip if psutil isn't available
     
     # Check for required dependencies
     try:
diff --git a/locallab/utils/system.py b/locallab/utils/system.py
@@ -4,7 +4,11 @@
 
 import os
 import psutil
-import torch
+try:
+    import torch
+    TORCH_AVAILABLE = True
+except ImportError:
+    TORCH_AVAILABLE = False
 from typing import Optional, Tuple, Dict, Any
 
 from ..logger import get_logger
@@ -24,7 +28,7 @@ def get_system_memory() -> Tuple[int, int]:
 
 def get_gpu_memory() -> Optional[Tuple[int, int]]:
     """Get GPU memory information in MB if available"""
-    if not torch.cuda.is_available():
+    if not TORCH_AVAILABLE or not torch.cuda.is_available():
         return None
     
     try:
@@ -53,7 +57,7 @@ def check_resource_availability(required_memory: int) -> bool:
         return False
     
     # If GPU is available, check GPU memory
-    if torch.cuda.is_available():
+    if TORCH_AVAILABLE and torch.cuda.is_available():
         gpu_memory = get_gpu_memory()
         if gpu_memory:
             total_gpu, free_gpu = gpu_memory
@@ -64,11 +68,11 @@ def check_resource_availability(required_memory: int) -> bool:
     return True
 
 
-def get_device() -> torch.device:
+def get_device() -> str:
     """Get the best available device for computation"""
-    if torch.cuda.is_available():
-        return torch.device("cuda")
-    return torch.device("cpu")
+    if TORCH_AVAILABLE and torch.cuda.is_available():
+        return "cuda"
+    return "cpu"
 
 
 def format_model_size(size_in_bytes: int) -> str:
@@ -88,19 +92,22 @@ def get_system_resources() -> Dict[str, Any]:
         'ram_total': psutil.virtual_memory().total / (1024 * 1024),
         'ram_available': psutil.virtual_memory().available / (1024 * 1024),
         'memory_usage': psutil.virtual_memory().percent,
-        'gpu_available': torch.cuda.is_available(),
+        'gpu_available': False,
         'gpu_info': []
     }
     
-    if resources['gpu_available']:
-        gpu_count = torch.cuda.device_count()
-        for i in range(gpu_count):
-            gpu_mem = get_gpu_memory()
-            if gpu_mem:
-                total_mem, _ = gpu_mem
-                resources['gpu_info'].append({
-                    'name': torch.cuda.get_device_name(i),
-                    'total_memory': total_mem
-                })
+    # Update GPU availability only if torch is available
+    if TORCH_AVAILABLE:
+        resources['gpu_available'] = torch.cuda.is_available()
+        if resources['gpu_available']:
+            gpu_count = torch.cuda.device_count()
+            for i in range(gpu_count):
+                gpu_mem = get_gpu_memory()
+                if gpu_mem:
+                    total_mem, _ = gpu_mem
+                    resources['gpu_info'].append({
+                        'name': torch.cuda.get_device_name(i),
+                        'total_memory': total_mem
+                    })
     
     return resources 
diff --git a/setup.py b/setup.py
@@ -5,7 +5,7 @@
 
 setup(
     name="locallab",
-    version="0.2.0",
+    version="0.2.1",
     packages=find_packages(include=["locallab", "locallab.*"]),
     install_requires=[
         "fastapi>=0.68.0,<1.0.0",