@@ -183,8 +183,9 @@ def prepare(
183183 # Get context window size for the model
184184 context_window_size = get_model_context_window_size (self .model_name )
185185
186- # Calculate ephemeral storage needed
187- ephemeral_storage = self ._calculate_ephemeral_storage (use_split , split_metadata )
186+ # Calculate ephemeral storage needed based on model size
187+ model_size_gb = split_metadata .get ("total_size_gb" , 0 ) if split_metadata else 0
188+ ephemeral_storage = self ._calculate_ephemeral_storage (model_size_gb )
188189
189190 # Generate zappa_settings.json
190191 self ._generate_zappa_settings (
@@ -270,16 +271,29 @@ def _handle_model_download(self, s3_bucket: str) -> tuple[bool, dict | None]:
270271
271272 logger .info ("Model fits in Docker image, using standard deployment" )
272273 copy_model_to_output (self .model_name , self .model_cache_dir )
273- return False , None
274-
275- def _calculate_ephemeral_storage (self , use_split : bool , split_metadata : dict | None ) -> int :
276- """Calculate ephemeral storage needed for Lambda."""
277- ephemeral_storage = 5120 # Default 5GB
278- if use_split and split_metadata :
279- total_gb = split_metadata ["total_size_gb" ]
280- needed_mb = int ((total_gb * 1024 * 1.2 + 511 ) // 512 * 512 )
281- ephemeral_storage = min (max (needed_mb , 5120 ), 10240 )
282- logger .info (f"Setting ephemeral storage to { ephemeral_storage } MB for split model" )
274+ # Return size info for ephemeral storage calculation
275+ return False , {"total_size_gb" : size_details ["total_size_gb" ]}
276+
277+ def _calculate_ephemeral_storage (self , model_size_gb : float ) -> int :
278+ """
279+ Calculate ephemeral storage needed for Lambda based on model size.
280+
281+ Lambda ephemeral storage (/tmp) ranges from 512 MB to 10,240 MB.
282+ We need enough space for the model files at runtime.
283+
284+ Args:
285+ model_size_gb: Model size in GB
286+
287+ Returns:
288+ Ephemeral storage in MB (512-10240)
289+ """
290+ # Calculate needed storage: model size + 20% buffer, rounded up to nearest 512 MB
291+ needed_mb = int ((model_size_gb * 1024 * 1.2 + 511 ) // 512 * 512 )
292+
293+ # Clamp to Lambda limits: min 512 MB, max 10,240 MB
294+ ephemeral_storage = min (max (needed_mb , 512 ), 10240 )
295+
296+ logger .info (f"Setting ephemeral storage to { ephemeral_storage } MB for { model_size_gb :.2f} GB model" )
283297 return ephemeral_storage
284298
285299 def _generate_zappa_settings (
0 commit comments