Skip to content

Commit 0b04562

Browse files
committed
Updated LocalLab v0.2.4
1 parent 84566bf commit 0b04562

File tree

8 files changed

+386
-90
lines changed

8 files changed

+386
-90
lines changed

CHANGELOG.md

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,28 @@
22

33
All notable changes for version updates.
44

5+
## [0.2.4] - 2025-03-04
6+
7+
### Fixed
8+
9+
- Fixed API endpoint errors for `/models/available` and other model endpoints
10+
- Resolved parameter error in `get_model_generation_params()` function
11+
- Improved error handling for model optimization settings through environment variables
12+
- Fixed circular import issues between routes and core modules
13+
- Enhanced Flash Attention warning message to be more informative
14+
15+
### Added
16+
17+
- Added new `get_gpu_info()` function for detailed GPU monitoring
18+
- Added improved system resource endpoint with detailed GPU metrics
19+
- Added robust environment variable handling for optimization settings
20+
21+
### Changed
22+
23+
- Made optimization flags more robust by checking for empty string values
24+
- Improved fallback handling for missing torch packages
25+
- Enhanced server startup logs with better optimization information
26+
527
## [0.2.3] - 2025-03-03
628

729
### Fixed

locallab/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
"""
2-
LocalLab - A lightweight AI inference server
2+
LocalLab: Run LLMs locally with a friendly API similar to OpenAI
33
"""
44

5-
__version__ = "0.2.3"
5+
__version__ = "0.2.4"
66

77
from typing import Dict, Any, Optional
88

locallab/config.py

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -347,12 +347,37 @@ def estimate_model_requirements(model_id: str) -> Optional[Dict[str, Any]]:
347347
Keep responses short unless specifically asked for detailed information.
348348
Respond directly to greetings with simple, friendly responses."""
349349

350-
def get_model_generation_params() -> dict:
351-
return {
350+
def get_model_generation_params(model_id: Optional[str] = None) -> dict:
351+
"""Get model generation parameters, optionally specific to a model.
352+
353+
Args:
354+
model_id: Optional model ID to get specific parameters for
355+
356+
Returns:
357+
Dictionary of generation parameters
358+
"""
359+
# Base parameters (defaults)
360+
params = {
352361
"max_length": get_env_var("LOCALLAB_MODEL_MAX_LENGTH", default=DEFAULT_MAX_LENGTH, var_type=int),
353362
"temperature": get_env_var("LOCALLAB_MODEL_TEMPERATURE", default=DEFAULT_TEMPERATURE, var_type=float),
354363
"top_p": get_env_var("LOCALLAB_MODEL_TOP_P", default=DEFAULT_TOP_P, var_type=float),
364+
"top_k": get_env_var("LOCALLAB_TOP_K", default=DEFAULT_TOP_K, var_type=int),
365+
"repetition_penalty": get_env_var("LOCALLAB_REPETITION_PENALTY", default=DEFAULT_REPETITION_PENALTY, var_type=float),
355366
}
367+
368+
# If model_id is provided and exists in MODEL_REGISTRY, use model-specific parameters
369+
if model_id and model_id in MODEL_REGISTRY:
370+
model_config = MODEL_REGISTRY[model_id]
371+
# Override with model-specific parameters if available
372+
if "max_length" in model_config:
373+
params["max_length"] = model_config["max_length"]
374+
375+
# Add any other model-specific parameters from the registry
376+
for param in ["temperature", "top_p", "top_k", "repetition_penalty"]:
377+
if param in model_config:
378+
params[param] = model_config[param]
379+
380+
return params
356381

357382
class SystemInstructions:
358383
def __init__(self):

locallab/model_manager.py

Lines changed: 138 additions & 50 deletions
Large diffs are not rendered by default.

locallab/routes/models.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,13 @@ async def list_models() -> ModelsListResponse:
5252
)
5353

5454

55+
@router.get("/available", response_model=ModelsListResponse)
56+
async def available_models() -> ModelsListResponse:
57+
"""List all available models (alternative endpoint)"""
58+
# This endpoint exists to provide compatibility with different API patterns
59+
return await list_models()
60+
61+
5562
@router.get("/current", response_model=ModelResponse)
5663
async def get_current_model() -> ModelResponse:
5764
"""Get information about the currently loaded model"""

locallab/routes/system.py

Lines changed: 85 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,16 @@
88
import time
99
import psutil
1010
import torch
11+
import platform
12+
from datetime import datetime
1113

1214
from ..logger import get_logger
1315
from ..logger.logger import get_request_count, get_uptime_seconds
1416
from ..core.app import model_manager, start_time
1517
from ..ui.banners import print_system_resources
1618
from ..config import system_instructions
19+
from ..utils.system import get_gpu_info as utils_get_gpu_info
20+
from ..utils.networking import get_public_ip, get_network_interfaces
1721

1822
# Get logger
1923
logger = get_logger("locallab.routes.system")
@@ -38,6 +42,17 @@ class SystemInstructionsRequest(BaseModel):
3842
model_id: Optional[str] = None
3943

4044

45+
class SystemResourcesResponse(BaseModel):
46+
"""Response model for system resources"""
47+
cpu: Dict[str, Any]
48+
memory: Dict[str, Any]
49+
gpu: Optional[List[Dict[str, Any]]] = None
50+
disk: Dict[str, Any]
51+
platform: str
52+
server_uptime: float
53+
api_requests: int
54+
55+
4156
def get_gpu_memory() -> Optional[Tuple[int, int]]:
4257
"""Get GPU memory info in MB"""
4358
try:
@@ -51,24 +66,6 @@ def get_gpu_memory() -> Optional[Tuple[int, int]]:
5166
return None
5267

5368

54-
def get_gpu_info() -> Optional[Dict[str, Any]]:
55-
"""Get detailed GPU information including memory and device name"""
56-
try:
57-
gpu_mem = get_gpu_memory()
58-
if gpu_mem:
59-
total_gpu, free_gpu = gpu_mem
60-
return {
61-
"total_memory": total_gpu,
62-
"free_memory": free_gpu,
63-
"used_memory": total_gpu - free_gpu,
64-
"device": torch.cuda.get_device_name(0)
65-
}
66-
return None
67-
except Exception as e:
68-
logger.debug(f"Failed to get GPU info: {str(e)}")
69-
return None
70-
71-
7269
@router.post("/system/instructions")
7370
async def update_system_instructions(request: SystemInstructionsRequest) -> Dict[str, str]:
7471
"""Update system instructions"""
@@ -112,7 +109,7 @@ async def get_system_info():
112109
memory_percent = memory.percent
113110

114111
# Get GPU info if available
115-
gpu_info = get_gpu_info() if torch.cuda.is_available() else None
112+
gpu_info = utils_get_gpu_info() if torch.cuda.is_available() else None
116113

117114
# Get server stats
118115
uptime = time.time() - start_time
@@ -170,24 +167,81 @@ async def root() -> Dict[str, Any]:
170167
}
171168

172169

170+
@router.get("/resources", response_model=SystemResourcesResponse)
171+
async def get_system_resources() -> SystemResourcesResponse:
172+
"""Get system resource information"""
173+
disk = psutil.disk_usage('/')
174+
uptime = time.time() - start_time
175+
176+
# Get detailed GPU information
177+
gpu_info = utils_get_gpu_info()
178+
179+
return SystemResourcesResponse(
180+
cpu={
181+
"cores": psutil.cpu_count(logical=False),
182+
"threads": psutil.cpu_count(logical=True),
183+
"usage": psutil.cpu_percent(interval=0.1),
184+
"frequency": psutil.cpu_freq().current if psutil.cpu_freq() else None
185+
},
186+
memory={
187+
"total": psutil.virtual_memory().total,
188+
"available": psutil.virtual_memory().available,
189+
"used": psutil.virtual_memory().used,
190+
"percent": psutil.virtual_memory().percent
191+
},
192+
gpu=gpu_info,
193+
disk={
194+
"total": disk.total,
195+
"free": disk.free,
196+
"used": disk.used,
197+
"percent": disk.percent
198+
},
199+
platform=platform.platform(),
200+
server_uptime=uptime,
201+
api_requests=get_request_count()
202+
)
203+
204+
205+
@router.get("/network", response_model=Dict[str, Any])
206+
async def get_network_info() -> Dict[str, Any]:
207+
"""Get network information"""
208+
try:
209+
public_ip = await get_public_ip()
210+
except:
211+
public_ip = "Unknown"
212+
213+
return {
214+
"public_ip": public_ip,
215+
"hostname": platform.node(),
216+
"interfaces": get_network_interfaces()
217+
}
218+
219+
173220
def get_system_resources() -> Dict[str, Any]:
174221
"""Get system resource information"""
222+
try:
223+
import torch
224+
torch_available = True
225+
except ImportError:
226+
torch_available = False
227+
228+
# Get memory information
229+
virtual_memory = psutil.virtual_memory()
230+
ram_gb = virtual_memory.total / 1024 / 1024 / 1024
231+
ram_available_gb = virtual_memory.available / 1024 / 1024 / 1024
232+
175233
resources = {
176-
"ram_gb": psutil.virtual_memory().total / 1024 / 1024 / 1024,
234+
"ram_gb": ram_gb,
235+
"ram_available_gb": ram_available_gb,
236+
"ram_used_percent": virtual_memory.percent,
177237
"cpu_count": psutil.cpu_count(),
178-
"gpu_available": torch.cuda.is_available(),
238+
"cpu_usage": psutil.cpu_percent(interval=0.1),
239+
"gpu_available": torch_available and torch.cuda.is_available() if torch_available else False,
179240
"gpu_info": []
180241
}
181242

243+
# Use the new gpu_info function from utils.system for more detailed GPU info
182244
if resources['gpu_available']:
183-
gpu_count = torch.cuda.device_count()
184-
for i in range(gpu_count):
185-
gpu_mem = get_gpu_memory()
186-
if gpu_mem:
187-
total_mem, _ = gpu_mem
188-
resources['gpu_info'].append({
189-
'name': torch.cuda.get_device_name(i),
190-
'total_memory': total_mem
191-
})
245+
resources['gpu_info'] = utils_get_gpu_info()
192246

193-
return resources
247+
return resources

locallab/utils/system.py

Lines changed: 104 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,15 @@
44

55
import os
66
import psutil
7+
import shutil
8+
import socket
9+
import platform
710
try:
811
import torch
912
TORCH_AVAILABLE = True
1013
except ImportError:
1114
TORCH_AVAILABLE = False
12-
from typing import Optional, Tuple, Dict, Any
15+
from typing import Optional, Tuple, Dict, Any, List
1316

1417
from ..logger import get_logger
1518
from ..config import MIN_FREE_MEMORY
@@ -69,10 +72,11 @@ def check_resource_availability(required_memory: int) -> bool:
6972

7073

7174
def get_device() -> str:
72-
"""Get the best available device for computation"""
75+
"""Get the device to use for computations."""
7376
if TORCH_AVAILABLE and torch.cuda.is_available():
7477
return "cuda"
75-
return "cpu"
78+
else:
79+
return "cpu"
7680

7781

7882
def format_model_size(size_in_bytes: int) -> str:
@@ -110,4 +114,100 @@ def get_system_resources() -> Dict[str, Any]:
110114
'total_memory': total_mem
111115
})
112116

113-
return resources
117+
return resources
118+
119+
120+
def get_cpu_info() -> Dict[str, Any]:
121+
"""Get information about the CPU."""
122+
return {
123+
"cores": psutil.cpu_count(logical=False),
124+
"threads": psutil.cpu_count(logical=True),
125+
"usage": psutil.cpu_percent(interval=0.1)
126+
}
127+
128+
129+
def get_gpu_info() -> List[Dict[str, Any]]:
130+
"""Get detailed information about all available GPUs.
131+
132+
Returns:
133+
List of dictionaries with GPU information including name, memory,
134+
utilization, and temperature if available
135+
"""
136+
gpu_info = []
137+
138+
if not TORCH_AVAILABLE or not torch.cuda.is_available():
139+
return gpu_info
140+
141+
try:
142+
# Get basic CUDA information
143+
device_count = torch.cuda.device_count()
144+
145+
for i in range(device_count):
146+
gpu_data = {
147+
"index": i,
148+
"name": torch.cuda.get_device_name(i),
149+
"total_memory_mb": round(torch.cuda.get_device_properties(i).total_memory / (1024 * 1024))
150+
}
151+
152+
# Try to get more detailed info with pynvml
153+
try:
154+
import pynvml
155+
pynvml.nvmlInit()
156+
handle = pynvml.nvmlDeviceGetHandleByIndex(i)
157+
158+
# Memory info
159+
mem_info = pynvml.nvmlDeviceGetMemoryInfo(handle)
160+
gpu_data.update({
161+
"memory_free_mb": round(mem_info.free / (1024 * 1024)),
162+
"memory_used_mb": round(mem_info.used / (1024 * 1024)),
163+
"memory_percent": round((mem_info.used / mem_info.total) * 100, 1)
164+
})
165+
166+
# Utilization info
167+
try:
168+
util = pynvml.nvmlDeviceGetUtilizationRates(handle)
169+
gpu_data.update({
170+
"gpu_utilization": util.gpu,
171+
"memory_utilization": util.memory
172+
})
173+
except:
174+
pass
175+
176+
# Temperature
177+
try:
178+
temp = pynvml.nvmlDeviceGetTemperature(handle, pynvml.NVML_TEMPERATURE_GPU)
179+
gpu_data["temperature"] = temp
180+
except:
181+
pass
182+
183+
# Power usage
184+
try:
185+
power = pynvml.nvmlDeviceGetPowerUsage(handle) / 1000.0 # convert from mW to W
186+
gpu_data["power_usage_watts"] = round(power, 2)
187+
except:
188+
pass
189+
190+
except (ImportError, Exception) as e:
191+
# If pynvml fails, we still have basic torch.cuda info
192+
gpu_data["available_memory_mb"] = round(torch.cuda.get_device_properties(i).total_memory / (1024 * 1024) -
193+
torch.cuda.memory_allocated(i) / (1024 * 1024))
194+
gpu_data["used_memory_mb"] = round(torch.cuda.memory_allocated(i) / (1024 * 1024))
195+
196+
gpu_info.append(gpu_data)
197+
198+
except Exception as e:
199+
import logging
200+
logging.warning(f"Error getting GPU info: {str(e)}")
201+
202+
return gpu_info
203+
204+
205+
def get_memory_info() -> Dict[str, Any]:
206+
"""Get information about the system memory."""
207+
mem = psutil.virtual_memory()
208+
return {
209+
"total": mem.total,
210+
"available": mem.available,
211+
"used": mem.used,
212+
"percent": mem.percent
213+
}

0 commit comments

Comments
 (0)