Skip to content

Commit db831fd

Browse files
committed
feat(scheduler): add endpoint to retrieve model configurations
Add GET /engines/_configure endpoint that returns active model configurations, similar to Ollama's /api/show endpoint. Accepts optional "model" query parameter to filter configs for a specific model; returns all configs if no model specified. Signed-off-by: Dorin Geman <[email protected]>
1 parent 2c53258 commit db831fd

File tree

4 files changed

+83
-0
lines changed

4 files changed

+83
-0
lines changed

pkg/inference/backend.go

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@ package inference
22

33
import (
44
"context"
5+
"encoding/json"
6+
"fmt"
57
"net/http"
68
)
79

@@ -40,6 +42,25 @@ func (m BackendMode) String() string {
4042
}
4143
}
4244

45+
// MarshalJSON implements json.Marshaler for BackendMode.
46+
func (m BackendMode) MarshalJSON() ([]byte, error) {
47+
return []byte(`"` + m.String() + `"`), nil
48+
}
49+
50+
// UnmarshalJSON implements json.Unmarshaler for BackendMode.
51+
func (m *BackendMode) UnmarshalJSON(data []byte) error {
52+
var s string
53+
if err := json.Unmarshal(data, &s); err != nil {
54+
return err
55+
}
56+
mode, ok := ParseBackendMode(s)
57+
if !ok {
58+
return fmt.Errorf("unknown backend mode: %q", s)
59+
}
60+
*m = mode
61+
return nil
62+
}
63+
4364
// ParseBackendMode converts a string mode to BackendMode.
4465
// It returns the parsed mode and a boolean indicating if the mode was known.
4566
// For unknown modes, it returns BackendModeCompletion and false.

pkg/inference/scheduling/api.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,3 +98,12 @@ type ConfigureRequest struct {
9898
RawRuntimeFlags string `json:"raw-runtime-flags,omitempty"`
9999
inference.BackendConfiguration
100100
}
101+
102+
// ModelConfigEntry represents a model configuration entry with its associated metadata.
103+
type ModelConfigEntry struct {
104+
Backend string
105+
Model string
106+
ModelID string
107+
Mode inference.BackendMode
108+
Config inference.BackendConfiguration
109+
}

pkg/inference/scheduling/http_handler.go

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,7 @@ func (h *HTTPHandler) routeHandlers() map[string]http.HandlerFunc {
8484
m["POST "+inference.InferencePrefix+"/unload"] = h.Unload
8585
m["POST "+inference.InferencePrefix+"/{backend}/_configure"] = h.Configure
8686
m["POST "+inference.InferencePrefix+"/_configure"] = h.Configure
87+
m["GET "+inference.InferencePrefix+"/_configure"] = h.GetModelConfigs
8788
m["GET "+inference.InferencePrefix+"/requests"] = h.scheduler.openAIRecorder.GetRecordsHandler()
8889
return m
8990
}
@@ -350,6 +351,31 @@ func (h *HTTPHandler) Configure(w http.ResponseWriter, r *http.Request) {
350351
w.WriteHeader(http.StatusAccepted)
351352
}
352353

354+
// GetModelConfigs returns model configurations. If a model is specified in the query parameter,
355+
// returns only configs for that model; otherwise returns all configs.
356+
func (h *HTTPHandler) GetModelConfigs(w http.ResponseWriter, r *http.Request) {
357+
model := r.URL.Query().Get("model")
358+
359+
configs := h.scheduler.loader.getAllRunnerConfigs(r.Context())
360+
361+
if model != "" {
362+
modelID := h.scheduler.modelManager.ResolveID(model)
363+
filtered := configs[:0]
364+
for _, entry := range configs {
365+
if entry.ModelID == modelID {
366+
filtered = append(filtered, entry)
367+
}
368+
}
369+
configs = filtered
370+
}
371+
372+
w.Header().Set("Content-Type", "application/json")
373+
if err := json.NewEncoder(w).Encode(configs); err != nil {
374+
http.Error(w, fmt.Sprintf("Failed to encode response: %v", err), http.StatusInternalServerError)
375+
return
376+
}
377+
}
378+
353379
// ServeHTTP implements net/http.Handler.ServeHTTP.
354380
func (h *HTTPHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
355381
h.lock.RLock()

pkg/inference/scheduling/loader.go

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -626,3 +626,30 @@ func (l *loader) setRunnerConfig(ctx context.Context, backendName, modelID strin
626626
l.runnerConfigs[configKey] = runnerConfig
627627
return nil
628628
}
629+
630+
// getAllRunnerConfigs retrieves all runner configurations.
631+
func (l *loader) getAllRunnerConfigs(ctx context.Context) []ModelConfigEntry {
632+
if !l.lock(ctx) {
633+
return nil
634+
}
635+
defer l.unlock()
636+
637+
entries := make([]ModelConfigEntry, 0, len(l.runnerConfigs))
638+
for key, config := range l.runnerConfigs {
639+
model, err := l.modelManager.GetLocal(key.modelID)
640+
if err == nil {
641+
modelName := ""
642+
if len(model.Tags()) > 0 {
643+
modelName = model.Tags()[0]
644+
}
645+
entries = append(entries, ModelConfigEntry{
646+
Backend: key.backend,
647+
Model: modelName,
648+
ModelID: key.modelID,
649+
Mode: key.mode,
650+
Config: config,
651+
})
652+
}
653+
}
654+
return entries
655+
}

0 commit comments

Comments
 (0)