Skip to content

Commit 291d850

Browse files
committed
integrate model handler into HTTP handler
1 parent e1fd74c commit 291d850

File tree

4 files changed

+12
-15
lines changed

4 files changed

+12
-15
lines changed

main.go

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -152,10 +152,8 @@ func main() {
152152
mlx.Name: mlxBackend,
153153
},
154154
llamaCppBackend,
155-
modelHandler,
156155
modelManager,
157156
http.DefaultClient,
158-
nil,
159157
metrics.NewTracker(
160158
http.DefaultClient,
161159
log.WithField("component", "metrics"),
@@ -166,7 +164,7 @@ func main() {
166164
)
167165

168166
// Create the HTTP handler for the scheduler
169-
schedulerHTTP := scheduling.NewHTTPHandler(scheduler, nil)
167+
schedulerHTTP := scheduling.NewHTTPHandler(scheduler, modelHandler, nil)
170168

171169
router := routing.NewNormalizedServeMux()
172170

pkg/inference/scheduling/http_handler.go

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ import (
1212

1313
"github.com/docker/model-runner/pkg/distribution/distribution"
1414
"github.com/docker/model-runner/pkg/inference"
15+
"github.com/docker/model-runner/pkg/inference/models"
1516
"github.com/docker/model-runner/pkg/metrics"
1617
"github.com/docker/model-runner/pkg/middleware"
1718
)
@@ -23,15 +24,18 @@ type HTTPHandler struct {
2324
scheduler *Scheduler
2425
router *http.ServeMux
2526
httpHandler http.Handler
26-
lock sync.RWMutex
27+
// modelHandler is the shared model handler.
28+
modelHandler *models.HTTPHandler
29+
lock sync.RWMutex
2730
}
2831

2932
// NewHTTPHandler creates a new HTTP handler that wraps the scheduler.
3033
// This is the primary HTTP interface for the scheduling package.
31-
func NewHTTPHandler(s *Scheduler, allowedOrigins []string) *HTTPHandler {
34+
func NewHTTPHandler(s *Scheduler, modelHandler *models.HTTPHandler, allowedOrigins []string) *HTTPHandler {
3235
h := &HTTPHandler{
33-
scheduler: s,
34-
router: http.NewServeMux(),
36+
scheduler: s,
37+
modelHandler: modelHandler,
38+
router: http.NewServeMux(),
3539
}
3640

3741
// Register routes
@@ -217,7 +221,7 @@ func (h *HTTPHandler) handleOpenAIInference(w http.ResponseWriter, r *http.Reque
217221
// handleModels handles GET /engines/{backend}/v1/models* requests
218222
// by delegating to the model manager
219223
func (h *HTTPHandler) handleModels(w http.ResponseWriter, r *http.Request) {
220-
h.scheduler.modelHandler.ServeHTTP(w, r)
224+
h.modelHandler.ServeHTTP(w, r)
221225
}
222226

223227
// GetBackendStatus returns the status of all backends.

pkg/inference/scheduling/scheduler.go

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,6 @@ type Scheduler struct {
3030
backends map[string]inference.Backend
3131
// defaultBackend is the default inference backend. It may be nil.
3232
defaultBackend inference.Backend
33-
// modelHandler is the shared model handler.
34-
modelHandler *models.HTTPHandler
3533
// modelManager is the shared model manager.
3634
modelManager *models.Manager
3735
// installer is the backend installer.
@@ -49,10 +47,8 @@ func NewScheduler(
4947
log logging.Logger,
5048
backends map[string]inference.Backend,
5149
defaultBackend inference.Backend,
52-
handler *models.HTTPHandler,
5350
modelManager *models.Manager,
5451
httpClient *http.Client,
55-
allowedOrigins []string,
5652
tracker *metrics.Tracker,
5753
sysMemInfo memory.SystemMemoryInfo,
5854
) *Scheduler {
@@ -63,7 +59,6 @@ func NewScheduler(
6359
log: log,
6460
backends: backends,
6561
defaultBackend: defaultBackend,
66-
modelHandler: handler,
6762
modelManager: modelManager,
6863
installer: newInstaller(log, backends, httpClient),
6964
loader: newLoader(log, backends, modelManager, openAIRecorder, sysMemInfo),

pkg/inference/scheduling/scheduler_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,8 +44,8 @@ func TestCors(t *testing.T) {
4444
discard := logrus.New()
4545
discard.SetOutput(io.Discard)
4646
log := logrus.NewEntry(discard)
47-
s := NewScheduler(log, nil, nil, nil, nil, nil, []string{"*"}, nil, systemMemoryInfo{})
48-
httpHandler := NewHTTPHandler(s, []string{"*"})
47+
s := NewScheduler(log, nil, nil, nil, nil, nil, systemMemoryInfo{})
48+
httpHandler := NewHTTPHandler(s, nil, []string{"*"})
4949
req := httptest.NewRequest(http.MethodOptions, "http://model-runner.docker.internal"+tt.path, http.NoBody)
5050
req.Header.Set("Origin", "docker.com")
5151
w := httptest.NewRecorder()

0 commit comments

Comments
 (0)