Skip to content

Commit b6bf9a1

Browse files
committed
Use modelID as key for runners & runnerConfigs map
1 parent 7210581 commit b6bf9a1

File tree

2 files changed

+10
-6
lines changed

2 files changed

+10
-6
lines changed

pkg/inference/scheduling/loader.go

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ var (
4242
type runnerKey struct {
4343
// backend is the backend associated with the runner.
4444
backend string
45-
// model is the model associated with the runner.
45+
// model is the modelID associated with the runner.
4646
model string
4747
// mode is the operation mode associated with the runner.
4848
mode inference.BackendMode
@@ -254,11 +254,12 @@ func (l *loader) Unload(ctx context.Context, unload UnloadRequest) int {
254254
return l.evict(false)
255255
} else {
256256
for _, model := range unload.Models {
257+
modelID := l.modelManager.ResolveModelID(model)
257258
delete(l.runnerConfigs, runnerKey{unload.Backend, model, inference.BackendModeCompletion})
258259
// Evict both, completion and embedding models. We should consider
259260
// accepting a mode parameter in unload requests.
260-
l.evictRunner(unload.Backend, model, inference.BackendModeCompletion)
261-
l.evictRunner(unload.Backend, model, inference.BackendModeEmbedding)
261+
l.evictRunner(unload.Backend, modelID, inference.BackendModeCompletion)
262+
l.evictRunner(unload.Backend, modelID, inference.BackendModeEmbedding)
262263
}
263264
return len(l.runners)
264265
}

pkg/inference/scheduling/scheduler.go

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -238,8 +238,10 @@ func (s *Scheduler) handleOpenAIInference(w http.ResponseWriter, r *http.Request
238238
s.tracker.TrackModel(model)
239239
}
240240

241+
modelID := s.modelManager.ResolveModelID(request.Model)
242+
241243
// Request a runner to execute the request and defer its release.
242-
runner, err := s.loader.load(r.Context(), backend.Name(), request.Model, backendMode)
244+
runner, err := s.loader.load(r.Context(), backend.Name(), modelID, backendMode)
243245
if err != nil {
244246
http.Error(w, fmt.Errorf("unable to load runner: %w", err).Error(), http.StatusInternalServerError)
245247
return
@@ -410,8 +412,9 @@ func (s *Scheduler) Configure(w http.ResponseWriter, r *http.Request) {
410412
runnerConfig.ContextSize = configureRequest.ContextSize
411413
runnerConfig.RuntimeFlags = runtimeFlags
412414

413-
if err := s.loader.setRunnerConfig(r.Context(), backend.Name(), configureRequest.Model, inference.BackendModeCompletion, runnerConfig); err != nil {
414-
s.log.Warnf("Failed to configure %s runner for %s: %s", backend.Name(), configureRequest.Model, err)
415+
modelID := s.modelManager.ResolveModelID(configureRequest.Model)
416+
if err := s.loader.setRunnerConfig(r.Context(), backend.Name(), modelID, inference.BackendModeCompletion, runnerConfig); err != nil {
417+
s.log.Warnf("Failed to configure %s runner for %s: %s", backend.Name(), modelID, err)
415418
if errors.Is(err, errRunnerAlreadyActive) {
416419
http.Error(w, err.Error(), http.StatusConflict)
417420
} else {

0 commit comments

Comments
 (0)