Updated LocalLab v0.2.4

UtkarshTheDev · UtkarshTheDev · commit 0b0456290f9b · 2025-03-02T10:01:55.000+05:30
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -2,6 +2,28 @@
 
 All notable changes for version updates.
 
+## [0.2.4] - 2025-03-04
+
+### Fixed
+
+- Fixed API endpoint errors for `/models/available` and other model endpoints
+- Resolved parameter error in `get_model_generation_params()` function
+- Improved error handling for model optimization settings through environment variables
+- Fixed circular import issues between routes and core modules
+- Enhanced Flash Attention warning message to be more informative
+
+### Added
+
+- Added new `get_gpu_info()` function for detailed GPU monitoring
+- Added improved system resource endpoint with detailed GPU metrics
+- Added robust environment variable handling for optimization settings
+
+### Changed
+
+- Made optimization flags more robust by checking for empty string values
+- Improved fallback handling for missing torch packages
+- Enhanced server startup logs with better optimization information
+
 ## [0.2.3] - 2025-03-03
 
 ### Fixed
diff --git a/locallab/__init__.py b/locallab/__init__.py
@@ -1,8 +1,8 @@
 """
-LocalLab - A lightweight AI inference server
+LocalLab: Run LLMs locally with a friendly API similar to OpenAI
 """
 
-__version__ = "0.2.3" 
+__version__ = "0.2.4" 
 
 from typing import Dict, Any, Optional
 
diff --git a/locallab/config.py b/locallab/config.py
@@ -347,12 +347,37 @@ def estimate_model_requirements(model_id: str) -> Optional[Dict[str, Any]]:
 Keep responses short unless specifically asked for detailed information.
 Respond directly to greetings with simple, friendly responses."""
 
-def get_model_generation_params() -> dict:
-    return {
+def get_model_generation_params(model_id: Optional[str] = None) -> dict:
+    """Get model generation parameters, optionally specific to a model.
+    
+    Args:
+        model_id: Optional model ID to get specific parameters for
+        
+    Returns:
+        Dictionary of generation parameters
+    """
+    # Base parameters (defaults)
+    params = {
         "max_length": get_env_var("LOCALLAB_MODEL_MAX_LENGTH", default=DEFAULT_MAX_LENGTH, var_type=int),
         "temperature": get_env_var("LOCALLAB_MODEL_TEMPERATURE", default=DEFAULT_TEMPERATURE, var_type=float),
         "top_p": get_env_var("LOCALLAB_MODEL_TOP_P", default=DEFAULT_TOP_P, var_type=float),
+        "top_k": get_env_var("LOCALLAB_TOP_K", default=DEFAULT_TOP_K, var_type=int),
+        "repetition_penalty": get_env_var("LOCALLAB_REPETITION_PENALTY", default=DEFAULT_REPETITION_PENALTY, var_type=float),
     }
+    
+    # If model_id is provided and exists in MODEL_REGISTRY, use model-specific parameters
+    if model_id and model_id in MODEL_REGISTRY:
+        model_config = MODEL_REGISTRY[model_id]
+        # Override with model-specific parameters if available
+        if "max_length" in model_config:
+            params["max_length"] = model_config["max_length"]
+        
+        # Add any other model-specific parameters from the registry
+        for param in ["temperature", "top_p", "top_k", "repetition_penalty"]:
+            if param in model_config:
+                params[param] = model_config[param]
+    
+    return params
 
 class SystemInstructions:
     def __init__(self):
diff --git a/locallab/model_manager.py b/locallab/model_manager.py
diff --git a/locallab/routes/models.py b/locallab/routes/models.py
@@ -52,6 +52,13 @@ async def list_models() -> ModelsListResponse:
     )
 
 
+@router.get("/available", response_model=ModelsListResponse)
+async def available_models() -> ModelsListResponse:
+    """List all available models (alternative endpoint)"""
+    # This endpoint exists to provide compatibility with different API patterns
+    return await list_models()
+
+
 @router.get("/current", response_model=ModelResponse)
 async def get_current_model() -> ModelResponse:
     """Get information about the currently loaded model"""
diff --git a/locallab/routes/system.py b/locallab/routes/system.py
@@ -8,12 +8,16 @@
 import time
 import psutil
 import torch
+import platform
+from datetime import datetime
 
 from ..logger import get_logger
 from ..logger.logger import get_request_count, get_uptime_seconds
 from ..core.app import model_manager, start_time
 from ..ui.banners import print_system_resources
 from ..config import system_instructions
+from ..utils.system import get_gpu_info as utils_get_gpu_info
+from ..utils.networking import get_public_ip, get_network_interfaces
 
 # Get logger
 logger = get_logger("locallab.routes.system")
@@ -38,6 +42,17 @@ class SystemInstructionsRequest(BaseModel):
     model_id: Optional[str] = None
 
 
+class SystemResourcesResponse(BaseModel):
+    """Response model for system resources"""
+    cpu: Dict[str, Any]
+    memory: Dict[str, Any]
+    gpu: Optional[List[Dict[str, Any]]] = None
+    disk: Dict[str, Any]
+    platform: str
+    server_uptime: float
+    api_requests: int
+
+
 def get_gpu_memory() -> Optional[Tuple[int, int]]:
     """Get GPU memory info in MB"""
     try:
@@ -51,24 +66,6 @@ def get_gpu_memory() -> Optional[Tuple[int, int]]:
         return None
 
 
-def get_gpu_info() -> Optional[Dict[str, Any]]:
-    """Get detailed GPU information including memory and device name"""
-    try:
-        gpu_mem = get_gpu_memory()
-        if gpu_mem:
-            total_gpu, free_gpu = gpu_mem
-            return {
-                "total_memory": total_gpu,
-                "free_memory": free_gpu,
-                "used_memory": total_gpu - free_gpu,
-                "device": torch.cuda.get_device_name(0)
-            }
-        return None
-    except Exception as e:
-        logger.debug(f"Failed to get GPU info: {str(e)}")
-        return None
-
-
 @router.post("/system/instructions")
 async def update_system_instructions(request: SystemInstructionsRequest) -> Dict[str, str]:
     """Update system instructions"""
@@ -112,7 +109,7 @@ async def get_system_info():
         memory_percent = memory.percent
         
         # Get GPU info if available
-        gpu_info = get_gpu_info() if torch.cuda.is_available() else None
+        gpu_info = utils_get_gpu_info() if torch.cuda.is_available() else None
         
         # Get server stats
         uptime = time.time() - start_time
@@ -170,24 +167,81 @@ async def root() -> Dict[str, Any]:
     }
 
 
+@router.get("/resources", response_model=SystemResourcesResponse)
+async def get_system_resources() -> SystemResourcesResponse:
+    """Get system resource information"""
+    disk = psutil.disk_usage('/')
+    uptime = time.time() - start_time
+    
+    # Get detailed GPU information
+    gpu_info = utils_get_gpu_info()
+    
+    return SystemResourcesResponse(
+        cpu={
+            "cores": psutil.cpu_count(logical=False),
+            "threads": psutil.cpu_count(logical=True),
+            "usage": psutil.cpu_percent(interval=0.1),
+            "frequency": psutil.cpu_freq().current if psutil.cpu_freq() else None
+        },
+        memory={
+            "total": psutil.virtual_memory().total,
+            "available": psutil.virtual_memory().available,
+            "used": psutil.virtual_memory().used,
+            "percent": psutil.virtual_memory().percent
+        },
+        gpu=gpu_info,
+        disk={
+            "total": disk.total,
+            "free": disk.free,
+            "used": disk.used,
+            "percent": disk.percent
+        },
+        platform=platform.platform(),
+        server_uptime=uptime,
+        api_requests=get_request_count()
+    )
+
+
+@router.get("/network", response_model=Dict[str, Any])
+async def get_network_info() -> Dict[str, Any]:
+    """Get network information"""
+    try:
+        public_ip = await get_public_ip()
+    except:
+        public_ip = "Unknown"
+        
+    return {
+        "public_ip": public_ip,
+        "hostname": platform.node(),
+        "interfaces": get_network_interfaces()
+    }
+
+
 def get_system_resources() -> Dict[str, Any]:
     """Get system resource information"""
+    try:
+        import torch
+        torch_available = True
+    except ImportError:
+        torch_available = False
+    
+    # Get memory information
+    virtual_memory = psutil.virtual_memory()
+    ram_gb = virtual_memory.total / 1024 / 1024 / 1024
+    ram_available_gb = virtual_memory.available / 1024 / 1024 / 1024
+    
     resources = {
-        "ram_gb": psutil.virtual_memory().total / 1024 / 1024 / 1024,
+        "ram_gb": ram_gb,
+        "ram_available_gb": ram_available_gb, 
+        "ram_used_percent": virtual_memory.percent,
         "cpu_count": psutil.cpu_count(),
-        "gpu_available": torch.cuda.is_available(),
+        "cpu_usage": psutil.cpu_percent(interval=0.1),
+        "gpu_available": torch_available and torch.cuda.is_available() if torch_available else False,
         "gpu_info": []
     }
     
+    # Use the new gpu_info function from utils.system for more detailed GPU info
     if resources['gpu_available']:
-        gpu_count = torch.cuda.device_count()
-        for i in range(gpu_count):
-            gpu_mem = get_gpu_memory()
-            if gpu_mem:
-                total_mem, _ = gpu_mem
-                resources['gpu_info'].append({
-                    'name': torch.cuda.get_device_name(i),
-                    'total_memory': total_mem
-                })
+        resources['gpu_info'] = utils_get_gpu_info()
     
-    return resources 
+    return resources
diff --git a/locallab/utils/system.py b/locallab/utils/system.py
@@ -4,12 +4,15 @@
 
 import os
 import psutil
+import shutil
+import socket
+import platform
 try:
     import torch
     TORCH_AVAILABLE = True
 except ImportError:
     TORCH_AVAILABLE = False
-from typing import Optional, Tuple, Dict, Any
+from typing import Optional, Tuple, Dict, Any, List
 
 from ..logger import get_logger
 from ..config import MIN_FREE_MEMORY
@@ -69,10 +72,11 @@ def check_resource_availability(required_memory: int) -> bool:
 
 
 def get_device() -> str:
-    """Get the best available device for computation"""
+    """Get the device to use for computations."""
     if TORCH_AVAILABLE and torch.cuda.is_available():
         return "cuda"
-    return "cpu"
+    else:
+        return "cpu"
 
 
 def format_model_size(size_in_bytes: int) -> str:
@@ -110,4 +114,100 @@ def get_system_resources() -> Dict[str, Any]:
                         'total_memory': total_mem
                     })
     
-    return resources 
+    return resources 
+
+
+def get_cpu_info() -> Dict[str, Any]:
+    """Get information about the CPU."""
+    return {
+        "cores": psutil.cpu_count(logical=False),
+        "threads": psutil.cpu_count(logical=True),
+        "usage": psutil.cpu_percent(interval=0.1)
+    }
+
+
+def get_gpu_info() -> List[Dict[str, Any]]:
+    """Get detailed information about all available GPUs.
+    
+    Returns:
+        List of dictionaries with GPU information including name, memory, 
+        utilization, and temperature if available
+    """
+    gpu_info = []
+    
+    if not TORCH_AVAILABLE or not torch.cuda.is_available():
+        return gpu_info
+    
+    try:
+        # Get basic CUDA information
+        device_count = torch.cuda.device_count()
+        
+        for i in range(device_count):
+            gpu_data = {
+                "index": i,
+                "name": torch.cuda.get_device_name(i),
+                "total_memory_mb": round(torch.cuda.get_device_properties(i).total_memory / (1024 * 1024))
+            }
+            
+            # Try to get more detailed info with pynvml
+            try:
+                import pynvml
+                pynvml.nvmlInit()
+                handle = pynvml.nvmlDeviceGetHandleByIndex(i)
+                
+                # Memory info
+                mem_info = pynvml.nvmlDeviceGetMemoryInfo(handle)
+                gpu_data.update({
+                    "memory_free_mb": round(mem_info.free / (1024 * 1024)),
+                    "memory_used_mb": round(mem_info.used / (1024 * 1024)),
+                    "memory_percent": round((mem_info.used / mem_info.total) * 100, 1)
+                })
+                
+                # Utilization info
+                try:
+                    util = pynvml.nvmlDeviceGetUtilizationRates(handle)
+                    gpu_data.update({
+                        "gpu_utilization": util.gpu,
+                        "memory_utilization": util.memory
+                    })
+                except:
+                    pass
+                
+                # Temperature
+                try:
+                    temp = pynvml.nvmlDeviceGetTemperature(handle, pynvml.NVML_TEMPERATURE_GPU)
+                    gpu_data["temperature"] = temp
+                except:
+                    pass
+                    
+                # Power usage
+                try:
+                    power = pynvml.nvmlDeviceGetPowerUsage(handle) / 1000.0  # convert from mW to W
+                    gpu_data["power_usage_watts"] = round(power, 2)
+                except:
+                    pass
+                    
+            except (ImportError, Exception) as e:
+                # If pynvml fails, we still have basic torch.cuda info
+                gpu_data["available_memory_mb"] = round(torch.cuda.get_device_properties(i).total_memory / (1024 * 1024) - 
+                                               torch.cuda.memory_allocated(i) / (1024 * 1024))
+                gpu_data["used_memory_mb"] = round(torch.cuda.memory_allocated(i) / (1024 * 1024))
+            
+            gpu_info.append(gpu_data)
+            
+    except Exception as e:
+        import logging
+        logging.warning(f"Error getting GPU info: {str(e)}")
+        
+    return gpu_info
+
+
+def get_memory_info() -> Dict[str, Any]:
+    """Get information about the system memory."""
+    mem = psutil.virtual_memory()
+    return {
+        "total": mem.total,
+        "available": mem.available,
+        "used": mem.used,
+        "percent": mem.percent
+    } 
diff --git a/setup.py b/setup.py