Skip to content

Commit 9a6be0b

Browse files
committed
🔧 fix model size calc.
🔧 update to adjust lambda ephemeral storage based on model size.
1 parent b8e84ae commit 9a6be0b

File tree

3 files changed

+236
-68
lines changed

3 files changed

+236
-68
lines changed

merle/functions.py

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -698,7 +698,7 @@ def _generate_zappa_settings(
698698
logger.info(f"Successfully generated {output_path} for stage '{stage}'")
699699

700700

701-
def prepare_deployment_files( # noqa: PLR0915
701+
def prepare_deployment_files( # noqa: PLR0915, PLR0912
702702
model_name: str,
703703
cache_dir: Path,
704704
project_name: str,
@@ -775,6 +775,7 @@ def prepare_deployment_files( # noqa: PLR0915
775775
# Determine if we need to download and potentially split the model
776776
use_split = False
777777
split_metadata = None
778+
size_details: dict | None = None
778779

779780
if not skip_model_download:
780781
# Import model splitting module (here to avoid circular imports at module load)
@@ -848,16 +849,19 @@ def prepare_deployment_files( # noqa: PLR0915
848849
# Get context window size for the model
849850
context_window_size = get_model_context_window_size(model_name)
850851

851-
# Calculate ephemeral storage needed
852-
# For split models, we need space for reassembly
853-
ephemeral_storage = 5120 # Default 5GB
854-
if use_split and split_metadata:
855-
# Need enough space for the full model in /tmp
856-
total_gb = split_metadata["total_size_gb"]
852+
# Calculate ephemeral storage needed based on model size
853+
# Lambda ephemeral storage (/tmp) ranges from 512 MB to 10,240 MB
854+
if size_details:
855+
model_size_gb = size_details["total_size_gb"]
857856
# Add 20% buffer and round up to nearest 512MB
858-
needed_mb = int((total_gb * 1024 * 1.2 + 511) // 512 * 512)
859-
ephemeral_storage = min(max(needed_mb, 5120), 10240) # Clamp to 5-10GB
860-
logger.info(f"Setting ephemeral storage to {ephemeral_storage} MB for split model")
857+
needed_mb = int((model_size_gb * 1024 * 1.2 + 511) // 512 * 512)
858+
# Clamp to Lambda limits: min 512 MB, max 10,240 MB
859+
ephemeral_storage = min(max(needed_mb, 512), 10240)
860+
logger.info(f"Setting ephemeral storage to {ephemeral_storage} MB for {model_size_gb:.2f} GB model")
861+
else:
862+
# Default when skip_model_download=True (model size unknown)
863+
ephemeral_storage = 5120 # 5GB default
864+
logger.info(f"Setting ephemeral storage to {ephemeral_storage} MB (default, model size unknown)")
861865

862866
# Generate main zappa_settings.json using Zappa Python API
863867
# Uses embedded authorizer (authorizer.lambda_handler function in same Lambda)

merle/managers.py

Lines changed: 26 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -183,8 +183,9 @@ def prepare(
183183
# Get context window size for the model
184184
context_window_size = get_model_context_window_size(self.model_name)
185185

186-
# Calculate ephemeral storage needed
187-
ephemeral_storage = self._calculate_ephemeral_storage(use_split, split_metadata)
186+
# Calculate ephemeral storage needed based on model size
187+
model_size_gb = split_metadata.get("total_size_gb", 0) if split_metadata else 0
188+
ephemeral_storage = self._calculate_ephemeral_storage(model_size_gb)
188189

189190
# Generate zappa_settings.json
190191
self._generate_zappa_settings(
@@ -270,16 +271,29 @@ def _handle_model_download(self, s3_bucket: str) -> tuple[bool, dict | None]:
270271

271272
logger.info("Model fits in Docker image, using standard deployment")
272273
copy_model_to_output(self.model_name, self.model_cache_dir)
273-
return False, None
274-
275-
def _calculate_ephemeral_storage(self, use_split: bool, split_metadata: dict | None) -> int:
276-
"""Calculate ephemeral storage needed for Lambda."""
277-
ephemeral_storage = 5120 # Default 5GB
278-
if use_split and split_metadata:
279-
total_gb = split_metadata["total_size_gb"]
280-
needed_mb = int((total_gb * 1024 * 1.2 + 511) // 512 * 512)
281-
ephemeral_storage = min(max(needed_mb, 5120), 10240)
282-
logger.info(f"Setting ephemeral storage to {ephemeral_storage} MB for split model")
274+
# Return size info for ephemeral storage calculation
275+
return False, {"total_size_gb": size_details["total_size_gb"]}
276+
277+
def _calculate_ephemeral_storage(self, model_size_gb: float) -> int:
278+
"""
279+
Calculate ephemeral storage needed for Lambda based on model size.
280+
281+
Lambda ephemeral storage (/tmp) ranges from 512 MB to 10,240 MB.
282+
We need enough space for the model files at runtime.
283+
284+
Args:
285+
model_size_gb: Model size in GB
286+
287+
Returns:
288+
Ephemeral storage in MB (512-10240)
289+
"""
290+
# Calculate needed storage: model size + 20% buffer, rounded up to nearest 512 MB
291+
needed_mb = int((model_size_gb * 1024 * 1.2 + 511) // 512 * 512)
292+
293+
# Clamp to Lambda limits: min 512 MB, max 10,240 MB
294+
ephemeral_storage = min(max(needed_mb, 512), 10240)
295+
296+
logger.info(f"Setting ephemeral storage to {ephemeral_storage} MB for {model_size_gb:.2f} GB model")
283297
return ephemeral_storage
284298

285299
def _generate_zappa_settings(

0 commit comments

Comments
 (0)