Skip to content

Commit 8f0fc23

Browse files
author
Piotr Stankiewicz
committed
inference: Return memory requirement in estimation error
Signed-off-by: Piotr Stankiewicz <[email protected]>
1 parent 229e081 commit 8f0fc23

File tree

3 files changed

+11
-7
lines changed

3 files changed

+11
-7
lines changed

pkg/inference/memory/estimator.go

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ import (
1111
type MemoryEstimator interface {
1212
SetDefaultBackend(MemoryEstimatorBackend)
1313
GetRequiredMemoryForModel(context.Context, string, *inference.BackendConfiguration) (*inference.RequiredMemory, error)
14-
HaveSufficientMemoryForModel(ctx context.Context, model string, config *inference.BackendConfiguration) (bool, error)
14+
HaveSufficientMemoryForModel(ctx context.Context, model string, config *inference.BackendConfiguration) (bool, *inference.RequiredMemory, *inference.RequiredMemory, error)
1515
}
1616

1717
type MemoryEstimatorBackend interface {
@@ -39,10 +39,11 @@ func (m *memoryEstimator) GetRequiredMemoryForModel(ctx context.Context, model s
3939
return m.defaultBackend.GetRequiredMemoryForModel(ctx, model, config)
4040
}
4141

42-
func (m *memoryEstimator) HaveSufficientMemoryForModel(ctx context.Context, model string, config *inference.BackendConfiguration) (bool, error) {
42+
func (m *memoryEstimator) HaveSufficientMemoryForModel(ctx context.Context, model string, config *inference.BackendConfiguration) (bool, *inference.RequiredMemory, *inference.RequiredMemory, error) {
4343
req, err := m.GetRequiredMemoryForModel(ctx, model, config)
4444
if err != nil {
45-
return false, fmt.Errorf("estimating required memory for model: %w", err)
45+
return false, nil, nil, fmt.Errorf("estimating required memory for model: %w", err)
4646
}
47-
return m.systemMemoryInfo.HaveSufficientMemory(*req), nil
47+
totalMemory := m.systemMemoryInfo.GetTotalMemory()
48+
return m.systemMemoryInfo.HaveSufficientMemory(*req), req, &totalMemory, nil
4849
}

pkg/inference/memory/system.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,8 @@ func NewSystemMemoryInfo(log logging.Logger, gpuInfo *gpuinfo.GPUInfo) (SystemMe
4040
log.Infof("Running on system with %d MB RAM", ramSize/1024/1024)
4141
}
4242
}
43+
ramSize = 1024 * 1024 * 1024
44+
vramSize = 1024 * 1024 * 1024
4345
return &systemMemoryInfo{
4446
log: log,
4547
totalMemory: inference.RequiredMemory{RAM: ramSize, VRAM: vramSize},

pkg/inference/models/manager.go

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -163,15 +163,16 @@ func (m *Manager) handleCreateModel(w http.ResponseWriter, r *http.Request) {
163163
// besides pulling (such as model building).
164164
if !request.IgnoreRuntimeMemoryCheck {
165165
m.log.Infof("Will estimate memory required for %q", request.From)
166-
proceed, err := m.memoryEstimator.HaveSufficientMemoryForModel(r.Context(), request.From, nil)
166+
proceed, req, totalMem, err := m.memoryEstimator.HaveSufficientMemoryForModel(r.Context(), request.From, nil)
167167
if err != nil {
168168
m.log.Warnf("Failed to calculate memory required for model %q: %s", request.From, err)
169169
// Prefer staying functional in case of unexpected estimation errors.
170170
proceed = true
171171
}
172172
if !proceed {
173-
m.log.Warnf("Runtime memory requirement for model %q exceeds total system memory", request.From)
174-
http.Error(w, "Runtime memory requirement for model exceeds total system memory", http.StatusInsufficientStorage)
173+
errstr := fmt.Sprintf("Runtime memory requirement for model %q exceeds total system memory: required %d RAM %d VRAM, system %d RAM %d VRAM", request.From, req.RAM, req.VRAM, totalMem.RAM, totalMem.VRAM)
174+
m.log.Warnf(errstr)
175+
http.Error(w, errstr, http.StatusInsufficientStorage)
175176
return
176177
}
177178
}

0 commit comments

Comments
 (0)