Skip to content

Commit 2b799ce

Browse files
committed
feat: Refactor _resolve_offload_mode of InferenceManager with structured fallback and reasons
1 parent 5f1ec9a commit 2b799ce

File tree

1 file changed

+96
-51
lines changed

1 file changed

+96
-51
lines changed

src/tabicl/model/inference.py

Lines changed: 96 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,17 @@ class OffloadMode(Enum):
180180
AUTO = auto() # Automatically choose based on available memory
181181

182182

183+
@dataclass
184+
class OffloadReason:
185+
"""Structured reason for offload mode resolution."""
186+
187+
key: str
188+
detail: Optional[str] = None
189+
190+
def __str__(self):
191+
return f"{self.key}: {self.detail}" if self.detail else self.key
192+
193+
183194
@dataclass
184195
class OffloadConfig:
185196
"""Configuration for offloading behavior."""
@@ -762,11 +773,11 @@ def get_available_gpu_memory(self) -> float:
762773
Returns
763774
-------
764775
float
765-
Available GPU memory in megabytes, or infinity if CUDA is not
776+
Available GPU memory in megabytes, or 0.0 if CUDA is not
766777
available or execution device is CPU.
767778
"""
768779
if not torch.cuda.is_available() or self.exe_device.type != "cuda":
769-
return float("inf")
780+
return 0.0
770781
torch.cuda.synchronize()
771782
torch.cuda.empty_cache()
772783
return torch.cuda.mem_get_info(self.exe_device)[0] / (1024 * 1024)
@@ -863,62 +874,96 @@ def estimate_safe_batch_size(
863874

864875
def _resolve_offload_mode(
865876
self, output_mb: float, gpu_free_mb: float, cpu_free_mb: float, disk_free_mb: float
866-
) -> Tuple[OffloadMode, str]:
867-
"""Resolve actual offload mode for AUTO, returns (mode, reason).
877+
) -> Tuple[OffloadMode, OffloadReason]:
878+
"""Resolve actual offload mode, returns (mode, reason).
879+
880+
For user-requested modes, the requested mode is used if it fits.
881+
Otherwise, modes fall back: GPU -> CPU -> DISK -> CPU(swap as last resort).
882+
883+
For AUTO mode, the priority is:
884+
GPU (if within threshold) -> CPU -> DISK -> CPU(swap as last resort).
868885
869886
Note: CPU mode can use either pinned or non-pinned memory.
870887
- Pinned memory: faster for async GPU-CPU transfers, but locks physical memory
871888
- Non-pinned memory: slower transfers, but can use virtual memory (swap)
872-
873-
For large outputs, we prefer non-pinned CPU memory over disk when CPU memory is available.
874-
Disk offload is only available when disk_offload_dir is set.
875889
"""
876-
# Disk offload is only available if disk_offload_dir is configured
877-
disk_available = self.disk_offload_dir is not None
878-
effective_disk = max(0, disk_free_mb - self.disk_min_free_mb) if disk_available else 0
879890

891+
has_gpu = gpu_free_mb > 0
892+
has_disk = self.disk_offload_dir is not None
893+
effective_disk = max(0, disk_free_mb - self.disk_min_free_mb) if has_disk else 0
894+
895+
safe_cpu_mb = cpu_free_mb * self.cpu_safety_factor
896+
safe_disk_mb = effective_disk * self.disk_safety_factor
897+
898+
gpu_fits = has_gpu and output_mb <= gpu_free_mb
899+
cpu_fits = output_mb <= safe_cpu_mb
900+
disk_fits = has_disk and output_mb <= safe_disk_mb
901+
902+
# User-requested mode with cascading fallback
903+
# If the requested mode fails, downgrade one tier at a time: GPU -> CPU -> DISK -> CPU (swap as last resort)
880904
if self.offload_mode != OffloadMode.AUTO:
881-
# User explicitly requested a mode - respect it
882-
if self.offload_mode == OffloadMode.DISK and not disk_available:
883-
raise ValueError(
884-
"Disk offload requested but disk_offload_dir is not configured. "
885-
"Please specify disk_offload_dir in the configuration."
886-
)
887-
# For CPU mode, only check if there's enough CPU memory (pinned vs non-pinned handled in allocate)
888-
if self.offload_mode == OffloadMode.CPU:
889-
if output_mb > cpu_free_mb * self.cpu_safety_factor:
890-
# Not enough CPU memory, fall back to disk if available
891-
if disk_available:
892-
return OffloadMode.DISK, f"cpu_requested_but_oom_disk_fallback"
893-
# No disk available, CPU will be attempted anyway (may fail or use swap)
894-
return self.offload_mode, "user_requested"
895-
896-
# AUTO logic
897-
if gpu_free_mb == float("inf"):
898-
# No GPU, must use CPU or disk
899-
if cpu_free_mb * self.cpu_safety_factor >= output_mb:
900-
return OffloadMode.CPU, "no_gpu_cpu_fits"
901-
if disk_available and effective_disk * self.disk_safety_factor >= output_mb:
902-
return OffloadMode.DISK, "no_gpu_disk_fallback"
903-
# Nothing fits well, try CPU anyway (may use swap)
904-
return OffloadMode.CPU, "no_gpu_no_disk_cpu_fallback"
905-
906-
# Check if output fits on GPU
907-
output_pct = output_mb / max(gpu_free_mb, 1e-6)
908-
if output_pct <= self.auto_offload_threshold:
909-
return OffloadMode.GPU, f"output_pct({output_pct:.3f})<=threshold({self.auto_offload_threshold:.3f})"
910-
911-
# GPU too tight - decide between CPU and disk
912-
# Prefer CPU if there's enough memory (pinned or not)
913-
if cpu_free_mb * self.cpu_safety_factor >= output_mb:
914-
return OffloadMode.CPU, "gpu_tight_cpu_fits"
915-
916-
# CPU too tight, try disk if available
917-
if disk_available and effective_disk * self.disk_safety_factor >= output_mb:
918-
return OffloadMode.DISK, "gpu_tight_cpu_tight_disk_fits"
919-
920-
# Nothing fits well, use CPU as it can use swap if needed
921-
return OffloadMode.CPU, "all_tight_cpu_fallback"
905+
requested = self.offload_mode
906+
907+
if requested == OffloadMode.GPU:
908+
if gpu_fits:
909+
return OffloadMode.GPU, OffloadReason(
910+
"user_gpu_fits", f"{output_mb:.0f}MB <= {gpu_free_mb:.0f}MB gpu free"
911+
)
912+
elif cpu_fits:
913+
return OffloadMode.CPU, OffloadReason("user_gpu_fails", "gpu (requested) tight -> cpu")
914+
elif disk_fits:
915+
return OffloadMode.DISK, OffloadReason("user_gpu_fails", "gpu (requested) tight, cpu tight -> disk")
916+
else:
917+
return OffloadMode.CPU, OffloadReason(
918+
"user_gpu_fails", "gpu (requested) tight, cpu tight, disk tight -> cpu (swap)"
919+
)
920+
921+
if requested == OffloadMode.CPU:
922+
if cpu_fits:
923+
return OffloadMode.CPU, OffloadReason(
924+
"user_cpu_fits", f"{output_mb:.0f}MB <= {safe_cpu_mb:.0f}MB safe cpu free"
925+
)
926+
elif disk_fits:
927+
return OffloadMode.DISK, OffloadReason("user_cpu_fails", "cpu (requested) tight -> disk")
928+
else:
929+
return OffloadMode.CPU, OffloadReason(
930+
"user_cpu_fails", "cpu (requested) tight, disk tight -> cpu (swap)"
931+
)
932+
933+
if requested == OffloadMode.DISK:
934+
if not has_disk:
935+
raise ValueError(
936+
"Disk offload requested but disk_offload_dir is not configured. "
937+
"Please specify disk_offload_dir in the configuration."
938+
)
939+
940+
if disk_fits:
941+
return OffloadMode.DISK, OffloadReason(
942+
"user_disk_fits", f"{output_mb:.0f}MB <= {safe_disk_mb:.0f}MB safe disk free"
943+
)
944+
else:
945+
return OffloadMode.CPU, OffloadReason("user_disk_fails", "disk (requested) tight -> cpu (swap)")
946+
947+
# AUTO mode
948+
output_pct = output_mb / max(gpu_free_mb, 1e-6) if has_gpu else 1.0
949+
gpu_within_threshold = has_gpu and output_pct <= self.auto_offload_threshold
950+
951+
if gpu_within_threshold:
952+
return OffloadMode.GPU, OffloadReason(
953+
"auto_gpu_fits",
954+
f"{output_mb:.0f}MB <= {self.auto_offload_threshold * gpu_free_mb:.0f}MB safe gpu free",
955+
)
956+
elif cpu_fits:
957+
return OffloadMode.CPU, OffloadReason(
958+
"auto_cpu_fits", f"gpu tight -> cpu ({output_mb:.0f}MB <= {safe_cpu_mb:.0f}MB safe cpu free)"
959+
)
960+
elif disk_fits:
961+
return OffloadMode.DISK, OffloadReason(
962+
"auto_disk_fits",
963+
f"gpu tight, cpu tight -> disk ({output_mb:.0f}MB <= {safe_disk_mb:.0f}MB safe disk free)",
964+
)
965+
else:
966+
return OffloadMode.CPU, OffloadReason("auto_cpu_swap", "gpu tight, cpu tight, disk tight -> cpu (swap)")
922967

923968
def _allocate_output_buffer(
924969
self,

0 commit comments

Comments
 (0)