Skip to content

Commit e19011d

Browse files
committed
feat(scheduler): add endpoint to retrieve model configurations
Add GET /engines/_configure endpoint that returns active model configurations, similar to Ollama's /api/show endpoint. Accepts optional "model" query parameter to filter configs for a specific model; returns all configs if no model specified. Signed-off-by: Dorin Geman <[email protected]>
1 parent 2c53258 commit e19011d

File tree

4 files changed

+85
-0
lines changed

4 files changed

+85
-0
lines changed

pkg/inference/backend.go

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,9 @@ package inference
22

33
import (
44
"context"
5+
"fmt"
56
"net/http"
7+
"strings"
68
)
79

810
// BackendMode encodes the mode in which a backend should operate.
@@ -40,6 +42,22 @@ func (m BackendMode) String() string {
4042
}
4143
}
4244

45+
// MarshalJSON implements json.Marshaler for BackendMode.
46+
func (m BackendMode) MarshalJSON() ([]byte, error) {
47+
return []byte(`"` + m.String() + `"`), nil
48+
}
49+
50+
// UnmarshalJSON implements json.Unmarshaler for BackendMode.
51+
func (m *BackendMode) UnmarshalJSON(data []byte) error {
52+
s := strings.Trim(string(data), "\"")
53+
mode, ok := ParseBackendMode(s)
54+
if !ok {
55+
return fmt.Errorf("unknown backend mode: %q", s)
56+
}
57+
*m = mode
58+
return nil
59+
}
60+
4361
// ParseBackendMode converts a string mode to BackendMode.
4462
// It returns the parsed mode and a boolean indicating if the mode was known.
4563
// For unknown modes, it returns BackendModeCompletion and false.

pkg/inference/scheduling/http_handler.go

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,7 @@ func (h *HTTPHandler) routeHandlers() map[string]http.HandlerFunc {
8484
m["POST "+inference.InferencePrefix+"/unload"] = h.Unload
8585
m["POST "+inference.InferencePrefix+"/{backend}/_configure"] = h.Configure
8686
m["POST "+inference.InferencePrefix+"/_configure"] = h.Configure
87+
m["GET "+inference.InferencePrefix+"/_configure"] = h.GetModelConfigs
8788
m["GET "+inference.InferencePrefix+"/requests"] = h.scheduler.openAIRecorder.GetRecordsHandler()
8889
return m
8990
}
@@ -350,6 +351,31 @@ func (h *HTTPHandler) Configure(w http.ResponseWriter, r *http.Request) {
350351
w.WriteHeader(http.StatusAccepted)
351352
}
352353

354+
// GetModelConfigs returns model configurations. If a model is specified in the request body,
355+
// returns only configs for that model; otherwise returns all configs.
356+
func (h *HTTPHandler) GetModelConfigs(w http.ResponseWriter, r *http.Request) {
357+
model := r.URL.Query().Get("model")
358+
359+
configs := h.scheduler.GetAllModelConfigs(r.Context())
360+
361+
if model != "" {
362+
modelID := h.scheduler.modelManager.ResolveID(model)
363+
filtered := configs[:0]
364+
for _, entry := range configs {
365+
if entry.ModelID == modelID {
366+
filtered = append(filtered, entry)
367+
}
368+
}
369+
configs = filtered
370+
}
371+
372+
w.Header().Set("Content-Type", "application/json")
373+
if err := json.NewEncoder(w).Encode(configs); err != nil {
374+
http.Error(w, fmt.Sprintf("Failed to encode response: %v", err), http.StatusInternalServerError)
375+
return
376+
}
377+
}
378+
353379
// ServeHTTP implements net/http.Handler.ServeHTTP.
354380
func (h *HTTPHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
355381
h.lock.RLock()

pkg/inference/scheduling/loader.go

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -626,3 +626,39 @@ func (l *loader) setRunnerConfig(ctx context.Context, backendName, modelID strin
626626
l.runnerConfigs[configKey] = runnerConfig
627627
return nil
628628
}
629+
630+
// ModelConfigEntry represents a model configuration entry with its associated metadata.
631+
type ModelConfigEntry struct {
632+
Backend string
633+
Model string
634+
ModelID string
635+
Mode inference.BackendMode
636+
Config inference.BackendConfiguration
637+
}
638+
639+
// getAllRunnerConfigs retrieves all runner configurations.
640+
func (l *loader) getAllRunnerConfigs(ctx context.Context) []ModelConfigEntry {
641+
if !l.lock(ctx) {
642+
return nil
643+
}
644+
defer l.unlock()
645+
646+
entries := make([]ModelConfigEntry, 0, len(l.runnerConfigs))
647+
for key, config := range l.runnerConfigs {
648+
model, err := l.modelManager.GetLocal(key.modelID)
649+
if err == nil {
650+
modelName := ""
651+
if len(model.Tags()) > 0 {
652+
modelName = model.Tags()[0]
653+
}
654+
entries = append(entries, ModelConfigEntry{
655+
Backend: key.backend,
656+
Model: modelName,
657+
ModelID: key.modelID,
658+
Mode: key.mode,
659+
Config: config,
660+
})
661+
}
662+
}
663+
return entries
664+
}

pkg/inference/scheduling/scheduler.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -300,3 +300,8 @@ func (s *Scheduler) ConfigureRunner(ctx context.Context, backend inference.Backe
300300

301301
return backend, nil
302302
}
303+
304+
// GetAllModelConfigs retrieves all active model configurations.
305+
func (s *Scheduler) GetAllModelConfigs(ctx context.Context) []ModelConfigEntry {
306+
return s.loader.getAllRunnerConfigs(ctx)
307+
}

0 commit comments

Comments
 (0)