Updated LocalLab v0.3.2 and Updated Docs

UtkarshTheDev · UtkarshTheDev · commit a67b4b17403d · 2025-03-03T17:32:52.000+05:30
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -2,7 +2,7 @@
 
 All notable changes to LocalLab will be documented in this file.
 
-## [0.3.1] - 2025-03-06
+## [0.3.1] - 2025-03-03
 
 ### Fixed
 
@@ -18,7 +18,7 @@ All notable changes to LocalLab will be documented in this file.
 - Improved error messages for configuration issues
 - Updated environment variable documentation with more details
 
-## [0.3.0] - 2025-03-05
+## [0.3.0] - 2025-03-03
 
 ### Added
 
@@ -45,7 +45,7 @@ All notable changes to LocalLab will be documented in this file.
 - Added detailed memory usage logging before and after model operations
 - Optimized memory usage by cleaning GPU cache more aggressively
 
-## [0.2.9] - 2025-03-04
+## [0.2.9] - 2025-03-02
 
 ### Added
 
@@ -58,7 +58,7 @@ All notable changes to LocalLab will be documented in this file.
 - Redesigned modern ASCII art banners for a more aesthetic interface
 - Improved UI with cleaner banner separations and better readability
 
-## [0.2.8] - 2025-03-03
+## [0.2.8] - 2025-03-02
 
 ### Fixed
 
@@ -125,3 +125,14 @@ All notable changes to LocalLab will be documented in this file.
 ### Added
 
 - Added new `
+
+## [0.3.2] - 2025-03-07
+
+### Added
+
+- Integrated MIN_FREE_MEMORY environment variable for better memory management.
+- Updated documentation to include MIN_FREE_MEMORY in environment variable settings.
+
+### Improved
+
+- Updated memory checks in the server to utilize MIN_FREE_MEMORY for consistency.
diff --git a/docs/guides/environment_variables.md b/docs/guides/environment_variables.md
@@ -166,3 +166,5 @@ For boolean environment variables, the following values are recognized:
 - [Performance Guide](../features/performance.md)
 - [Memory Monitoring](../features/memory.md)
 - [Google Colab Guide](../colab/README.md)
+
+Make sure to set this variable to ensure optimal performance for LocalLab.
diff --git a/docs/guides/getting-started.md b/docs/guides/getting-started.md
@@ -7,11 +7,13 @@ This guide will help you start using LocalLab, whether you're running it locally
 ### Local Setup
 
 1. **Install LocalLab**
+
    ```bash
    pip install locallab
    ```
 
 2. **Start the Server**
+
    ```python
    from locallab import start_server
    start_server()
@@ -26,17 +28,20 @@ This guide will help you start using LocalLab, whether you're running it locally
 ### Google Colab Setup
 
 1. **Install LocalLab**
+
    ```python
    !pip install locallab
    ```
 
 2. **Set Up Ngrok**
+
    ```python
    import os
    os.environ["NGROK_AUTH_TOKEN"] = "your_token_here"
    ```
 
 3. **Start Server**
+
    ```python
    from locallab import start_server
    start_server(ngrok=True)  # Will show public URL in logs
@@ -51,6 +56,7 @@ This guide will help you start using LocalLab, whether you're running it locally
 ## First Steps
 
 ### 1. Generate Text
+
 ```python
 # Simple text generation
 response = await client.generate(
@@ -61,6 +67,7 @@ print(response)
 ```
 
 ### 2. Chat with AI
+
 ```python
 # Chat completion
 response = await client.chat([
@@ -71,6 +78,7 @@ print(response.choices[0].message.content)
 ```
 
 ### 3. Process Multiple Prompts
+
 ```python
 # Batch processing
 responses = await client.batch_generate([
diff --git a/locallab/config.py b/locallab/config.py
@@ -7,6 +7,10 @@
 from huggingface_hub import model_info, HfApi
 from pathlib import Path
 from dataclasses import dataclass
+from ..logger import get_logger
+
+# Get the logger instance
+logger = get_logger("locallab.config")
 
 def get_env_var(key: str, *, default: Any = None, var_type: Type = str) -> Any:
     """Get environment variable with type conversion and validation.
@@ -45,6 +49,8 @@ def get_env_var(key: str, *, default: Any = None, var_type: Type = str) -> Any:
 DEFAULT_MAX_LENGTH = get_env_var("DEFAULT_MAX_LENGTH", default=2048, var_type=int)
 DEFAULT_TEMPERATURE = get_env_var("DEFAULT_TEMPERATURE", default=0.7, var_type=float)
 DEFAULT_TOP_P = get_env_var("DEFAULT_TOP_P", default=0.9, var_type=float)
+DEFAULT_TOP_K = 50  # Default value for top_k parameter
+DEFAULT_REPETITION_PENALTY = 1.0  # Default value for repetition penalty
 
 # Optimization settings
 ENABLE_QUANTIZATION = get_env_var("LOCALLAB_ENABLE_QUANTIZATION", default=True, var_type=bool)
@@ -350,3 +356,5 @@ def get_model_info(model_id: str, fallback: Optional[str] = None) -> Optional[Di
         return MODEL_REGISTRY[fallback]
     
     return None
+
+MIN_FREE_MEMORY = 2000  # Minimum free memory in MB
diff --git a/locallab/core/app.py b/locallab/core/app.py
@@ -13,8 +13,6 @@
 from fastapi.middleware.gzip import GZipMiddleware
 from contextlib import contextmanager
 from colorama import Fore, Style
-import threading
-import psutil
 from ..logger import get_logger
 from ..logger.logger import log_request, log_model_loaded, log_model_unloaded, get_request_count, SERVER_START_TIME
 from ..model_manager import ModelManager
diff --git a/locallab/routes/system.py b/locallab/routes/system.py
@@ -10,6 +10,7 @@
 import torch
 import platform
 from datetime import datetime
+import gc
 
 from ..logger import get_logger
 from ..logger.logger import get_request_count, get_uptime_seconds
diff --git a/locallab/server.py b/locallab/server.py
@@ -29,15 +29,7 @@
 from .logger.logger import set_server_status, log_request
 from .utils.system import get_gpu_memory
 from .config import (
-    DEFAULT_MODEL,
-    system_instructions,
-    ENABLE_QUANTIZATION, 
-    QUANTIZATION_TYPE,
-    ENABLE_ATTENTION_SLICING,
-    ENABLE_BETTERTRANSFORMER, 
-    ENABLE_FLASH_ATTENTION,
-    HOST,
-    PORT
+    MIN_FREE_MEMORY
 )
 
 # Import torch - handle import error gracefully
@@ -135,7 +127,7 @@ def check_environment() -> List[Tuple[str, str, bool]]:
         total_gb = memory.total / (1024 * 1024 * 1024)
         available_gb = memory.available / (1024 * 1024 * 1024)
         
-        if available_gb < 2.0:  # Less than 2GB available
+        if available_gb < MIN_FREE_MEMORY / 1024:  # Convert MB to GB
             issues.append((
                 f"Low system memory: Only {available_gb:.1f}GB available",
                 "Models may require 2-8GB of system memory. Consider closing other applications",