Skip to content

Commit a397af9

Browse files
committed
Updated package v4.0
1 parent 9b140f0 commit a397af9

File tree

4 files changed

+26
-10
lines changed

4 files changed

+26
-10
lines changed

locallab/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
LocalLab: Run LLMs locally with a friendly API similar to OpenAI
33
"""
44

5-
__version__ = "0.3.9"
5+
__version__ = "0.4.0"
66

77
from typing import Dict, Any, Optional
88

locallab/model_manager.py

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,19 @@ def _apply_optimizations(self, model: AutoModelForCausalLM) -> AutoModelForCausa
176176
logger.warning(
177177
f"BetterTransformer optimization failed: {str(e)}")
178178

179+
# Only apply Flash Attention if explicitly enabled and not empty
180+
if os.environ.get('LOCALLAB_ENABLE_FLASH_ATTENTION', '').lower() not in ('false', '0', 'none', ''):
181+
try:
182+
from flash_attn import FlashAttention
183+
model = FlashAttention(model)
184+
logger.info("Flash Attention optimization applied")
185+
except ImportError:
186+
logger.warning(
187+
"Flash Attention not available - install 'flash-attn' for this feature")
188+
except Exception as e:
189+
logger.warning(
190+
f"Flash Attention optimization failed: {str(e)}")
191+
179192
return model
180193
except Exception as e:
181194
logger.warning(f"Some optimizations could not be applied: {str(e)}")
@@ -219,10 +232,13 @@ async def load_model(self, model_id: str) -> bool:
219232
**config
220233
)
221234

222-
# Move model to the appropriate device
235+
# Move model only if quantization is disabled
223236
if not ENABLE_QUANTIZATION or str(ENABLE_QUANTIZATION).lower() in ('false', '0', 'none', ''):
224237
device = "cuda" if torch.cuda.is_available() else "cpu"
238+
logger.info(f"Moving model to {device}")
225239
self.model = self.model.to(device)
240+
else:
241+
logger.info("Skipping device move for quantized model - using device_map='auto'")
226242

227243
# Capture model parameters after loading
228244
model_architecture = self.model.config.architectures[0] if hasattr(self.model.config, 'architectures') else 'Unknown'

locallab/ui/banners.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -187,7 +187,7 @@ def print_api_docs():
187187
}}
188188
189189
• Example:
190-
curl -X POST "http://localhost:8000/generate" \\
190+
curl -X POST "<server-ngrok-public-url>/generate" \\
191191
-H "Content-Type: application/json" \\
192192
-d '{{"prompt": "Write a story about a dragon", "max_tokens": 100}}'
193193
@@ -204,35 +204,35 @@ def print_api_docs():
204204
}}
205205
206206
• Example:
207-
curl -X POST "http://localhost:8000/chat" \\
207+
curl -X POST "<server-ngrok-public-url>/chat" \\
208208
-H "Content-Type: application/json" \\
209209
-d '{{"messages": [{{"role": "user", "content": "Hello, who are you?"}}]}}'
210210
211211
📦 Model Management Endpoints:
212212
213213
1️⃣ /models - List available models
214214
• GET
215-
• Example: curl "http://localhost:8000/models"
215+
• Example: curl "<server-ngrok-public-url>/models"
216216
217217
2️⃣ /models/load - Load a specific model
218218
• POST with JSON body: {{ "model_id": "microsoft/phi-2" }}
219219
• Example:
220-
curl -X POST "http://localhost:8000/models/load" \\
220+
curl -X POST "<server-ngrok-public-url>/models/load" \\
221221
-H "Content-Type: application/json" \\
222222
-d '{{"model_id": "microsoft/phi-2"}}'
223223
224224
ℹ️ System Endpoints:
225225
226226
1️⃣ /system/info - Get system information
227227
• GET
228-
• Example: curl "http://localhost:8000/system/info"
228+
• Example: curl "<server-ngrok-public-url>/system/info"
229229
230230
2️⃣ /system/resources - Get detailed system resources
231231
• GET
232-
• Example: curl "http://localhost:8000/system/resources"
232+
• Example: curl "<server-ngrok-public-url>/system/resources"
233233
234234
3️⃣ /docs - Interactive API documentation (Swagger UI)
235-
• Open in browser: http://localhost:8000/docs
235+
• Open in browser: <server-ngrok-public-url>/docs
236236
237237
{Fore.CYAN}════════════════════════════════════════════════════════════════════════{Style.RESET_ALL}
238238
"""

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
setup(
77
name="locallab",
8-
version="0.3.9",
8+
version="0.4.0",
99
packages=find_packages(include=["locallab", "locallab.*"]),
1010
install_requires=[
1111
"fastapi>=0.95.0,<1.0.0",

0 commit comments

Comments
 (0)