Skip to content

Commit 0583607

Browse files
committed
Updated LocalLab v0.2.6
1 parent ef085ce commit 0583607

File tree

8 files changed

+214
-84
lines changed

8 files changed

+214
-84
lines changed

CHANGELOG.md

Lines changed: 23 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,19 +2,35 @@
22

33
All notable changes to LocalLab will be documented in this file.
44

5-
## [0.2.5] - 2025-03-02
5+
## [0.2.6] - 2023-05-05
66

77
### Added
88

9-
- Added `get_network_interfaces` function to retrieve information about available network interfaces
10-
- Added `get_public_ip` async function to retrieve the public IP address of the machine
11-
- Added adapter methods in ModelManager (`generate_text` and `generate_stream`) to maintain API compatibility with route handlers
9+
- New model loading endpoint that accepts model_id in the request body at `/models/load`
10+
- `format_chat_messages` function to properly format chat messages for the model
11+
- CLI function to support command-line usage with click interface
1212

1313
### Fixed
1414

15-
- Fixed import error for `get_public_ip` and `get_network_interfaces` functions
16-
- Fixed naming mismatch between route handlers and ModelManager methods
17-
- Added new dependencies in setup.py: `netifaces` and `httpx`
15+
- Properly awaiting async `generate_text` in chat completion endpoint
16+
- Fixed async generator handling in `generate_stream` function
17+
- Fixed streaming in the `stream_chat` function to correctly send server-sent events
18+
- Properly escaped newline characters in the streaming response
19+
- Added missing dependencies in `setup.py`: colorama, python-multipart, websockets, psutil, and nest-asyncio
20+
21+
## [0.2.5] - 2023-05-02
22+
23+
### Added
24+
25+
- `get_network_interfaces` function to retrieve information about available network interfaces
26+
- `get_public_ip` async function to retrieve the public IP address of the machine
27+
- Adapter methods in `ModelManager` (`generate_text` and `generate_stream`) to maintain API compatibility with route handlers
28+
29+
### Fixed
30+
31+
- Import error for `get_public_ip` and `get_network_interfaces` functions
32+
- Naming mismatch between route handlers and `ModelManager` methods
33+
- New dependencies in `setup.py`: `netifaces` and `httpx`
1834

1935
## [0.2.4] - 2025-03-04
2036

locallab/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
LocalLab: Run LLMs locally with a friendly API similar to OpenAI
33
"""
44

5-
__version__ = "0.2.4"
5+
__version__ = "0.2.6"
66

77
from typing import Dict, Any, Optional
88

locallab/model_manager.py

Lines changed: 43 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -562,14 +562,50 @@ async def generate_text(self, prompt: str, system_prompt: Optional[str] = None,
562562
Adapter method that calls the generate method.
563563
This is used to maintain compatibility with routes that call generate_text.
564564
"""
565-
return await self.generate(prompt=prompt, system_prompt=system_prompt, **kwargs)
565+
# Make sure we're not streaming when generating text
566+
kwargs["stream"] = False
567+
# Directly await the generate method to return the string result
568+
return await self.generate(prompt=prompt, system_instructions=system_prompt, **kwargs)
566569

567570
async def generate_stream(self, prompt: str, system_prompt: Optional[str] = None, **kwargs) -> AsyncGenerator[str, None]:
568-
"""
569-
Adapter method that calls the async_stream_generate method.
570-
This is used to maintain compatibility with routes that call generate_stream.
571-
"""
571+
"""Adapter method for streaming text generation.
572+
Calls the async_stream_generate method with proper parameters."""
572573
# Ensure streaming is enabled
573574
kwargs["stream"] = True
574-
async for token in self.async_stream_generate(prompt=prompt, system_prompt=system_prompt, **kwargs):
575-
yield token
575+
return self.async_stream_generate(prompt=prompt, system_prompt=system_prompt, **kwargs)
576+
577+
def is_model_loaded(self, model_id: str) -> bool:
578+
"""Check if a specific model is loaded.
579+
580+
Args:
581+
model_id: The ID of the model to check
582+
583+
Returns:
584+
True if the model is loaded, False otherwise
585+
"""
586+
return (self.model is not None) and (self.current_model == model_id)
587+
588+
def unload_model(self) -> None:
589+
"""Unload the current model to free memory resources.
590+
591+
This method removes the current model from memory and clears
592+
the tokenizer and model references.
593+
"""
594+
if self.model is not None:
595+
# Log which model is being unloaded
596+
model_id = self.current_model
597+
598+
# Clear model and tokenizer
599+
self.model = None
600+
self.tokenizer = None
601+
self.current_model = None
602+
603+
# Clean up memory
604+
if torch.cuda.is_available():
605+
torch.cuda.empty_cache()
606+
gc.collect()
607+
608+
# Log model unloading
609+
log_model_unloaded(model_id)
610+
611+
logger.info(f"Model {model_id} unloaded successfully")

locallab/routes/generate.py

Lines changed: 79 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from fastapi.responses import JSONResponse, StreamingResponse
77
from pydantic import BaseModel, Field
88
from typing import Dict, List, Any, Optional, Generator, Tuple
9+
import json
910

1011
from ..logger import get_logger
1112
from ..logger.logger import get_request_count
@@ -75,6 +76,36 @@ class BatchGenerationResponse(BaseModel):
7576
responses: List[str]
7677

7778

79+
def format_chat_messages(messages: List[ChatMessage]) -> str:
80+
"""
81+
Format a list of chat messages into a prompt string that the model can understand
82+
83+
Args:
84+
messages: List of ChatMessage objects with role and content
85+
86+
Returns:
87+
Formatted prompt string
88+
"""
89+
formatted_messages = []
90+
91+
for msg in messages:
92+
role = msg.role.strip().lower()
93+
94+
if role == "system":
95+
# System messages get special formatting
96+
formatted_messages.append(f"# System Instruction\n{msg.content}\n")
97+
elif role == "user":
98+
formatted_messages.append(f"User: {msg.content}")
99+
elif role == "assistant":
100+
formatted_messages.append(f"Assistant: {msg.content}")
101+
else:
102+
# Default formatting for other roles
103+
formatted_messages.append(f"{role.capitalize()}: {msg.content}")
104+
105+
# Join all messages with newlines
106+
return "\n\n".join(formatted_messages)
107+
108+
78109
@router.post("/generate", response_model=GenerationResponse)
79110
async def generate_text(request: GenerationRequest) -> GenerationResponse:
80111
"""
@@ -105,8 +136,8 @@ async def generate_text(request: GenerationRequest) -> GenerationResponse:
105136
# Merge model-specific params with request params
106137
generation_params.update(model_params)
107138

108-
# Generate text
109-
generated_text = model_manager.generate_text(
139+
# Generate text - properly await the async call
140+
generated_text = await model_manager.generate_text(
110141
prompt=request.prompt,
111142
system_prompt=request.system_prompt,
112143
**generation_params
@@ -123,46 +154,51 @@ async def generate_text(request: GenerationRequest) -> GenerationResponse:
123154

124155
@router.post("/chat", response_model=ChatResponse)
125156
async def chat_completion(request: ChatRequest) -> ChatResponse:
126-
"""Chat completion endpoint similar to OpenAI's API"""
157+
"""
158+
Chat completion API that formats messages into a prompt and returns the response
159+
"""
127160
if not model_manager.current_model:
128161
raise HTTPException(status_code=400, detail="No model is currently loaded")
129162

163+
# Format messages into a prompt
164+
formatted_prompt = format_chat_messages(request.messages)
165+
166+
# If streaming is requested, return a streaming response
167+
if request.stream:
168+
return StreamingResponse(
169+
stream_chat(formatted_prompt, request.max_tokens, request.temperature, request.top_p),
170+
media_type="text/event-stream"
171+
)
172+
130173
try:
131-
# Format messages into a prompt
132-
formatted_prompt = "\n".join([f"{msg.role}: {msg.content}" for msg in request.messages])
133-
134-
if request.stream:
135-
# Return a streaming response
136-
return StreamingResponse(
137-
stream_chat(formatted_prompt, request.max_tokens, request.temperature, request.top_p),
138-
media_type="text/event-stream"
139-
)
140-
141174
# Get model-specific generation parameters
142175
model_params = get_model_generation_params(model_manager.current_model)
143176

144-
# Update with request parameters
177+
# Prepare generation parameters
145178
generation_params = {
146179
"max_new_tokens": request.max_tokens,
147180
"temperature": request.temperature,
148-
"top_p": request.top_p,
181+
"top_p": request.top_p
149182
}
150183

151184
# Merge model-specific params with request params
152185
generation_params.update(model_params)
153186

154-
# Generate text
155-
response = model_manager.generate_text(
187+
# Generate completion
188+
generated_text = await model_manager.generate_text(
156189
prompt=formatted_prompt,
157190
**generation_params
158191
)
159192

193+
# Format response
160194
return ChatResponse(
161195
choices=[{
162196
"message": {
163197
"role": "assistant",
164-
"content": response
165-
}
198+
"content": generated_text
199+
},
200+
"index": 0,
201+
"finish_reason": "stop"
166202
}]
167203
)
168204
except Exception as e:
@@ -177,7 +213,9 @@ async def generate_stream(
177213
top_p: float,
178214
system_prompt: Optional[str]
179215
) -> Generator[str, None, None]:
180-
"""Generate text in a streaming fashion"""
216+
"""
217+
Generate text in a streaming fashion
218+
"""
181219
try:
182220
# Get model-specific generation parameters
183221
model_params = get_model_generation_params(model_manager.current_model)
@@ -187,26 +225,26 @@ async def generate_stream(
187225
"max_new_tokens": max_tokens,
188226
"temperature": temperature,
189227
"top_p": top_p,
190-
"stream": True
191228
}
192229

193230
# Merge model-specific params with request params
194231
generation_params.update(model_params)
195232

196-
for token in model_manager.generate_stream(
233+
# Stream tokens
234+
async for token in model_manager.generate_stream(
197235
prompt=prompt,
198236
system_prompt=system_prompt,
199237
**generation_params
200238
):
201-
# Format as a server-sent event
202-
yield f"data: {token}\n\n"
203-
204-
# End of stream marker
239+
# Format as server-sent event
240+
data = token.replace("\n", "\\n")
241+
yield f"data: {data}\n\n"
242+
243+
# End of stream
205244
yield "data: [DONE]\n\n"
206245
except Exception as e:
207246
logger.error(f"Streaming generation failed: {str(e)}")
208-
yield f"data: {{\"error\": \"{str(e)}\"}}\n\n"
209-
yield "data: [DONE]\n\n"
247+
yield f"data: [ERROR] {str(e)}\n\n"
210248

211249

212250
async def stream_chat(
@@ -215,7 +253,9 @@ async def stream_chat(
215253
temperature: float,
216254
top_p: float
217255
) -> Generator[str, None, None]:
218-
"""Stream chat completion tokens"""
256+
"""
257+
Stream chat completion
258+
"""
219259
try:
220260
# Get model-specific generation parameters
221261
model_params = get_model_generation_params(model_manager.current_model)
@@ -224,25 +264,27 @@ async def stream_chat(
224264
generation_params = {
225265
"max_new_tokens": max_tokens,
226266
"temperature": temperature,
227-
"top_p": top_p,
228-
"stream": True
267+
"top_p": top_p
229268
}
230269

231270
# Merge model-specific params with request params
232271
generation_params.update(model_params)
233272

234-
for token in model_manager.generate_stream(
273+
# Generate streaming tokens
274+
async for token in model_manager.generate_stream(
235275
prompt=formatted_prompt,
236276
**generation_params
237277
):
238-
# Format as a server-sent event with proper JSON structure
239-
yield f'data: {{"choices": [{{"delta": {{"content": "{token}"}}}}]}}\n\n'
240-
278+
# Format as a server-sent event with the structure expected by chat clients
279+
data = json.dumps({"role": "assistant", "content": token})
280+
yield f"data: {data}\n\n"
281+
241282
# End of stream marker
242283
yield "data: [DONE]\n\n"
243284
except Exception as e:
244285
logger.error(f"Chat streaming failed: {str(e)}")
245-
yield f"data: {{\"error\": \"{str(e)}\"}}\n\n"
286+
error_data = json.dumps({"error": str(e)})
287+
yield f"data: {error_data}\n\n"
246288
yield "data: [DONE]\n\n"
247289

248290

@@ -270,7 +312,7 @@ async def batch_generate(request: BatchGenerationRequest) -> BatchGenerationResp
270312

271313
responses = []
272314
for prompt in request.prompts:
273-
generated_text = model_manager.generate_text(
315+
generated_text = await model_manager.generate_text(
274316
prompt=prompt,
275317
system_prompt=request.system_prompt,
276318
**generation_params

locallab/routes/models.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,31 @@ async def load_model(model_id: str, background_tasks: BackgroundTasks) -> Dict[s
9595
raise HTTPException(status_code=500, detail=str(e))
9696

9797

98+
class LoadModelRequest(BaseModel):
99+
"""Request model for loading a model with JSON body"""
100+
model_id: str
101+
102+
103+
@router.post("/load", response_model=Dict[str, str])
104+
async def load_model_from_body(request: LoadModelRequest, background_tasks: BackgroundTasks) -> Dict[str, str]:
105+
"""Load a specific model using model_id from request body"""
106+
model_id = request.model_id
107+
if model_id not in MODEL_REGISTRY:
108+
raise HTTPException(status_code=404, detail=f"Model {model_id} not found")
109+
110+
# Check if the model is already loaded
111+
if model_manager.current_model == model_id and model_manager.is_model_loaded(model_id):
112+
return {"status": "success", "message": f"Model {model_id} is already loaded"}
113+
114+
try:
115+
# Load model in background
116+
background_tasks.add_task(model_manager.load_model, model_id)
117+
return {"status": "loading", "message": f"Model {model_id} loading started in background"}
118+
except Exception as e:
119+
logger.error(f"Failed to load model {model_id}: {str(e)}")
120+
raise HTTPException(status_code=500, detail=str(e))
121+
122+
98123
@router.post("/unload", response_model=Dict[str, str])
99124
async def unload_model() -> Dict[str, str]:
100125
"""Unload the current model to free up resources"""

locallab/routes/system.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ class SystemInfoResponse(BaseModel):
3030
"""Response model for system information"""
3131
cpu_usage: float
3232
memory_usage: float
33-
gpu_info: Optional[Dict[str, Any]] = None
33+
gpu_info: Optional[List[Dict[str, Any]]] = None
3434
active_model: Optional[str] = None
3535
uptime: float
3636
request_count: int

0 commit comments

Comments
 (0)