Skip to content

Commit 414a57f

Browse files
committed
Updated LocalLab v0.3.3
1 parent e5f2f39 commit 414a57f

File tree

5 files changed

+75
-107
lines changed

5 files changed

+75
-107
lines changed

locallab/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
LocalLab: Run LLMs locally with a friendly API similar to OpenAI
33
"""
44

5-
__version__ = "0.3.2"
5+
__version__ = "0.3.3"
66

77
from typing import Dict, Any, Optional
88
import logging

locallab/core/app.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ def init(backend, **kwargs):
105105
@app.on_event("startup")
106106
async def startup_event():
107107
"""Initialization tasks when the server starts"""
108-
logger.info("Starting LocalLab server...")
108+
logger.debug("Initializing LocalLab server...")
109109

110110
# Initialize cache if available
111111
if FASTAPI_CACHE_AVAILABLE:
@@ -118,12 +118,12 @@ async def startup_event():
118118
model_to_load = os.environ.get("HUGGINGFACE_MODEL", DEFAULT_MODEL)
119119

120120
# Log model configuration
121-
logger.info(f"Model configuration:")
122-
logger.info(f" - Model to load: {model_to_load}")
123-
logger.info(f" - Quantization: {'Enabled - ' + os.environ.get('LOCALLAB_QUANTIZATION_TYPE', QUANTIZATION_TYPE) if os.environ.get('LOCALLAB_ENABLE_QUANTIZATION', '').lower() == 'true' else 'Disabled'}")
124-
logger.info(f" - Attention slicing: {'Enabled' if os.environ.get('LOCALLAB_ENABLE_ATTENTION_SLICING', '').lower() == 'true' else 'Disabled'}")
125-
logger.info(f" - Flash attention: {'Enabled' if os.environ.get('LOCALLAB_ENABLE_FLASH_ATTENTION', '').lower() == 'true' else 'Disabled'}")
126-
logger.info(f" - Better transformer: {'Enabled' if os.environ.get('LOCALLAB_ENABLE_BETTERTRANSFORMER', '').lower() == 'true' else 'Disabled'}")
121+
logger.debug("Model configuration:")
122+
logger.debug(" - Model to load: %s", model_to_load)
123+
logger.debug(f" - Quantization: {'Enabled - ' + os.environ.get('LOCALLAB_QUANTIZATION_TYPE', QUANTIZATION_TYPE) if os.environ.get('LOCALLAB_ENABLE_QUANTIZATION', '').lower() == 'true' else 'Disabled'}")
124+
logger.debug(f" - Attention slicing: {'Enabled' if os.environ.get('LOCALLAB_ENABLE_ATTENTION_SLICING', '').lower() == 'true' else 'Disabled'}")
125+
logger.debug(f" - Flash attention: {'Enabled' if os.environ.get('LOCALLAB_ENABLE_FLASH_ATTENTION', '').lower() == 'true' else 'Disabled'}")
126+
logger.debug(f" - Better transformer: {'Enabled' if os.environ.get('LOCALLAB_ENABLE_BETTERTRANSFORMER', '').lower() == 'true' else 'Disabled'}")
127127

128128
# Start loading the model in background if specified
129129
if model_to_load:

locallab/logger/logger.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,9 @@
88
from typing import Optional, Dict, Any
99
from . import get_logger
1010

11+
# Set up logging configuration
12+
logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
13+
1114
# Server start time for uptime calculation
1215
SERVER_START_TIME = time.time()
1316

locallab/server.py

Lines changed: 63 additions & 98 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,12 @@
2929
from .logger.logger import set_server_status, log_request
3030
from .utils.system import get_gpu_memory
3131
from .config import (
32-
MIN_FREE_MEMORY
32+
DEFAULT_MODEL,
33+
ENABLE_QUANTIZATION,
34+
QUANTIZATION_TYPE,
35+
ENABLE_ATTENTION_SLICING,
36+
ENABLE_BETTERTRANSFORMER,
37+
ENABLE_FLASH_ATTENTION
3338
)
3439

3540
# Import torch - handle import error gracefully
@@ -42,7 +47,6 @@
4247
# Get the logger instance
4348
logger = get_logger("locallab.server")
4449

45-
4650
def check_environment() -> List[Tuple[str, str, bool]]:
4751
"""
4852
Check the environment for potential issues
@@ -73,7 +77,7 @@ def check_environment() -> List[Tuple[str, str, bool]]:
7377
if not os.environ.get("NGROK_AUTH_TOKEN"):
7478
issues.append((
7579
"Running in Google Colab without NGROK_AUTH_TOKEN set",
76-
"Set os.environ['NGROK_AUTH_TOKEN'] = 'your_token' for public URL access. Get your token from https://dashboard.ngrok.com/get-started/your-authtoken",
80+
"Set os.environ['NGROK_AUTH_TOKEN'] = 'your_token' for public URL access.",
7781
True
7882
))
7983

@@ -127,7 +131,7 @@ def check_environment() -> List[Tuple[str, str, bool]]:
127131
total_gb = memory.total / (1024 * 1024 * 1024)
128132
available_gb = memory.available / (1024 * 1024 * 1024)
129133

130-
if available_gb < MIN_FREE_MEMORY / 1024: # Convert MB to GB
134+
if available_gb < 2.0: # Less than 2GB available
131135
issues.append((
132136
f"Low system memory: Only {available_gb:.1f}GB available",
133137
"Models may require 2-8GB of system memory. Consider closing other applications",
@@ -163,7 +167,6 @@ def check_environment() -> List[Tuple[str, str, bool]]:
163167

164168
return issues
165169

166-
167170
def signal_handler(signum, frame):
168171
"""Handle system signals for graceful shutdown"""
169172
print(f"\n{Fore.YELLOW}Received signal {signum}, shutting down server...{Style.RESET_ALL}")
@@ -173,7 +176,6 @@ def signal_handler(signum, frame):
173176

174177
# Attempt to run shutdown tasks
175178
try:
176-
# Import here to avoid circular imports
177179
from .core.app import shutdown_event
178180

179181
loop = asyncio.get_event_loop()
@@ -189,106 +191,69 @@ def delayed_exit():
189191

190192
threading.Thread(target=delayed_exit, daemon=True).start()
191193

192-
193-
def start_server(
194-
host: str = None,
195-
port: int = None,
196-
use_ngrok: bool = False,
197-
ngrok_auth_token: str = None,
198-
**kwargs
199-
):
200-
"""Start the FastAPI server with optional ngrok tunnel"""
201-
from .config import HOST, PORT
194+
def start_server(use_ngrok: bool = False, port=8000, ngrok_auth_token: Optional[str] = None):
195+
"""Start the LocalLab server directly in the main process"""
202196

203-
host = host or HOST
204-
port = port or PORT
197+
# Set initial server status
198+
set_server_status("initializing")
205199

206-
public_url = None
207-
208-
# Set up ngrok if requested
209-
if use_ngrok:
210-
try:
211-
# Import pyngrok
212-
from pyngrok import ngrok, conf
213-
import nest_asyncio
214-
215-
# Apply nest_asyncio for Colab environment
216-
nest_asyncio.apply()
217-
218-
# Set ngrok auth token if provided
219-
if ngrok_auth_token:
220-
ngrok.set_auth_token(ngrok_auth_token)
221-
222-
# Start ngrok tunnel
223-
public_url = ngrok.connect(port).public_url
224-
print(f"Ngrok tunnel established! Public URL: {public_url}")
225-
226-
# Store the public URL in environment variables for access across modules
227-
os.environ["LOCALLAB_PUBLIC_URL"] = public_url
228-
229-
except ImportError:
230-
print("Error: pyngrok not installed. Install with 'pip install pyngrok'.")
231-
return
232-
except Exception as e:
233-
print(f"Error setting up ngrok: {str(e)}")
234-
return
200+
# Display startup banner with INITIALIZING status
201+
print_initializing_banner(__version__)
235202

236-
# Set up uvicorn config with public_url for on_startup function to access
237-
from .core.app import app
203+
# Check environment for issues
204+
issues = check_environment()
205+
if issues:
206+
print(f"\n{Fore.YELLOW}⚠️ Environment Check Results:{Style.RESET_ALL}")
207+
for issue, suggestion, is_critical in issues:
208+
prefix = f"{Fore.RED}CRITICAL:" if is_critical else f"{Fore.YELLOW}WARNING:"
209+
print(f"{prefix} {issue}{Style.RESET_ALL}")
210+
print(f" {Fore.CYAN}Suggestion: {suggestion}{Style.RESET_ALL}\n")
238211

239-
# Inject public_url into app state for use in on_startup
240-
app.state.public_url = public_url
241-
242-
# Start uvicorn server
243-
import uvicorn
244-
uvicorn.run(
245-
"locallab.core.app:app",
246-
host=host,
247-
port=port,
248-
reload=False,
249-
log_level="info",
250-
**kwargs
251-
)
252-
253-
async def on_startup():
254-
"""Initialize server and display startup banner"""
255-
from .core.app import app
256-
from .logger import update_server_status, get_logger
257-
258-
# Get logger
259-
logger = get_logger("locallab.server")
260-
261-
# Update server status
262-
update_server_status("running")
263-
264-
# Determine server URL - check app.state first for ngrok url
265-
server_url = getattr(app.state, "public_url", None)
266-
267-
if not server_url:
268-
# Check environment variable
269-
server_url = os.environ.get("LOCALLAB_PUBLIC_URL")
270-
271-
# If still no URL, use the local URL
272-
if not server_url:
273-
from .config import HOST, PORT
274-
server_url = f"http://{HOST}:{PORT}"
212+
# Check if port is already in use
213+
if is_port_in_use(port):
214+
logger.warning(f"Port {port} is already in use. Trying to find another port...")
215+
for p in range(port+1, port+100):
216+
if not is_port_in_use(p):
217+
port = p
218+
logger.info(f"Using alternative port: {port}")
219+
break
220+
else:
221+
raise RuntimeError(f"Could not find an available port in range {port}-{port+100}")
275222

276-
# Log server URL
277-
logger.info(f"Server running at: {server_url}")
223+
# Set up ngrok before starting server if requested
224+
public_url = None
225+
if use_ngrok:
226+
logger.info(f"{Fore.CYAN}Setting up ngrok tunnel to port {port}...{Style.RESET_ALL}")
227+
public_url = setup_ngrok(port=port, auth_token=ngrok_auth_token)
228+
if public_url:
229+
ngrok_section = f"\n{Fore.CYAN}┌────────────────────────── Ngrok Tunnel Details ─────────────────────────────┐{Style.RESET_ALL}\n\n│ 🚀 Ngrok Public URL: {Fore.GREEN}{public_url}{Style.RESET_ALL}\n\n{Fore.CYAN}└──────────────────────────────────────────────────────────────────────────────┘{Style.RESET_ALL}\n"
230+
logger.info(ngrok_section)
231+
print(ngrok_section)
232+
else:
233+
logger.warning(f"{Fore.YELLOW}Failed to set up ngrok tunnel. Server will run locally on port {port}.{Style.RESET_ALL}")
278234

279-
# Print system instructions and banners
280-
from .ui.banners import print_startup_banner, print_system_instructions, print_model_info, print_api_docs
235+
# Server info section
236+
server_section = f"\n{Fore.CYAN}┌────────────────────────── Server Details ─────────────────────────────┐{Style.RESET_ALL}\n\n│ 🖥️ Local URL: {Fore.GREEN}http://localhost:{port}{Style.RESET_ALL}\n│ ⚙️ Status: {Fore.GREEN}Starting{Style.RESET_ALL}\n\n{Fore.CYAN}└──────────────────────────────────────────────────────────────────────────────┘{Style.RESET_ALL}\n"
237+
print(server_section, flush=True)
281238

282-
print_startup_banner()
283-
print_system_instructions(server_url)
239+
# Set up signal handlers for graceful shutdown
240+
signal.signal(signal.SIGINT, signal_handler)
241+
signal.signal(signal.SIGTERM, signal_handler)
284242

285-
# Print model info if a model is already loaded
286-
from .core.app import model_manager
287-
if model_manager.current_model:
288-
print_model_info(model_manager.current_model)
243+
# Import app here to avoid circular imports
244+
try:
245+
from .core.app import app
246+
except ImportError as e:
247+
logger.error(f"{Fore.RED}Failed to import app: {str(e)}{Style.RESET_ALL}")
248+
raise
289249

290-
# Print API docs with the correct server URL
291-
print_api_docs(server_url)
250+
# Start the server
251+
try:
252+
logger.info(f"Starting server on port {port}...")
253+
uvicorn.run(app, host="0.0.0.0", port=port)
254+
except Exception as e:
255+
logger.error(f"Server startup failed: {str(e)}")
256+
raise
292257

293258
def cli():
294259
"""Command line interface entry point for the package"""
@@ -305,4 +270,4 @@ def run(use_ngrok, port, ngrok_auth_token):
305270
run()
306271

307272
if __name__ == "__main__":
308-
cli()
273+
cli()

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
setup(
77
name="locallab",
8-
version="0.3.2",
8+
version="0.3.3",
99
packages=find_packages(include=["locallab", "locallab.*"]),
1010
install_requires=[
1111
"fastapi>=0.95.0,<1.0.0",

0 commit comments

Comments
 (0)