@@ -238,8 +238,10 @@ func (s *Scheduler) handleOpenAIInference(w http.ResponseWriter, r *http.Request
238238 s .tracker .TrackModel (model )
239239 }
240240
241+ modelID := s .modelManager .ResolveModelID (request .Model )
242+
241243 // Request a runner to execute the request and defer its release.
242- runner , err := s .loader .load (r .Context (), backend .Name (), request . Model , backendMode )
244+ runner , err := s .loader .load (r .Context (), backend .Name (), modelID , backendMode )
243245 if err != nil {
244246 http .Error (w , fmt .Errorf ("unable to load runner: %w" , err ).Error (), http .StatusInternalServerError )
245247 return
@@ -410,8 +412,9 @@ func (s *Scheduler) Configure(w http.ResponseWriter, r *http.Request) {
410412 runnerConfig .ContextSize = configureRequest .ContextSize
411413 runnerConfig .RuntimeFlags = runtimeFlags
412414
413- if err := s .loader .setRunnerConfig (r .Context (), backend .Name (), configureRequest .Model , inference .BackendModeCompletion , runnerConfig ); err != nil {
414- s .log .Warnf ("Failed to configure %s runner for %s: %s" , backend .Name (), configureRequest .Model , err )
415+ modelID := s .modelManager .ResolveModelID (configureRequest .Model )
416+ if err := s .loader .setRunnerConfig (r .Context (), backend .Name (), modelID , inference .BackendModeCompletion , runnerConfig ); err != nil {
417+ s .log .Warnf ("Failed to configure %s runner for %s: %s" , backend .Name (), modelID , err )
415418 if errors .Is (err , errRunnerAlreadyActive ) {
416419 http .Error (w , err .Error (), http .StatusConflict )
417420 } else {
0 commit comments