Skip to content

Commit 9b140f0

Browse files
committed
Updated package v3.9
1 parent b119de2 commit 9b140f0

File tree

3 files changed

+12
-5
lines changed

3 files changed

+12
-5
lines changed

locallab/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
LocalLab: Run LLMs locally with a friendly API similar to OpenAI
33
"""
44

5-
__version__ = "0.3.8"
5+
__version__ = "0.3.9"
66

77
from typing import Dict, Any, Optional
88

locallab/model_manager.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -147,7 +147,7 @@ def _apply_optimizations(self, model: AutoModelForCausalLM) -> AutoModelForCausa
147147
"""Apply various optimizations to the model"""
148148
try:
149149
# Only apply attention slicing if explicitly enabled and not empty
150-
if ENABLE_ATTENTION_SLICING and str(ENABLE_ATTENTION_SLICING).lower() not in ('false', '0', 'none', ''):
150+
if os.environ.get('LOCALLAB_ENABLE_ATTENTION_SLICING', '').lower() not in ('false', '0', 'none', ''):
151151
if hasattr(model, 'enable_attention_slicing'):
152152
model.enable_attention_slicing(1)
153153
logger.info("Attention slicing enabled")
@@ -156,15 +156,15 @@ def _apply_optimizations(self, model: AutoModelForCausalLM) -> AutoModelForCausa
156156
"Attention slicing not available for this model")
157157

158158
# Only apply CPU offloading if explicitly enabled and not empty
159-
if ENABLE_CPU_OFFLOADING and str(ENABLE_CPU_OFFLOADING).lower() not in ('false', '0', 'none', ''):
159+
if os.environ.get('LOCALLAB_ENABLE_CPU_OFFLOADING', '').lower() not in ('false', '0', 'none', ''):
160160
if hasattr(model, "enable_cpu_offload"):
161161
model.enable_cpu_offload()
162162
logger.info("CPU offloading enabled")
163163
else:
164164
logger.info("CPU offloading not available for this model")
165165

166166
# Only apply BetterTransformer if explicitly enabled and not empty
167-
if ENABLE_BETTERTRANSFORMER and str(ENABLE_BETTERTRANSFORMER).lower() not in ('false', '0', 'none', ''):
167+
if os.environ.get('LOCALLAB_ENABLE_BETTERTRANSFORMER', '').lower() not in ('false', '0', 'none', ''):
168168
try:
169169
from optimum.bettertransformer import BetterTransformer
170170
model = BetterTransformer.transform(model)
@@ -219,10 +219,17 @@ async def load_model(self, model_id: str) -> bool:
219219
**config
220220
)
221221

222+
# Move model to the appropriate device
222223
if not ENABLE_QUANTIZATION or str(ENABLE_QUANTIZATION).lower() in ('false', '0', 'none', ''):
223224
device = "cuda" if torch.cuda.is_available() else "cpu"
224225
self.model = self.model.to(device)
225226

227+
# Capture model parameters after loading
228+
model_architecture = self.model.config.architectures[0] if hasattr(self.model.config, 'architectures') else 'Unknown'
229+
memory_used = torch.cuda.memory_allocated() if torch.cuda.is_available() else 'N/A'
230+
logger.info(f"Model architecture: {model_architecture}")
231+
logger.info(f"Memory used: {memory_used}")
232+
226233
self.model = self._apply_optimizations(self.model)
227234

228235
self.current_model = model_id

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
setup(
77
name="locallab",
8-
version="0.3.8",
8+
version="0.3.9",
99
packages=find_packages(include=["locallab", "locallab.*"]),
1010
install_requires=[
1111
"fastapi>=0.95.0,<1.0.0",

0 commit comments

Comments
 (0)