Skip to content

Commit 1067aae

Browse files
authored
feat: add v1/models endpoint (#186)
* FEAT: add v1/models endpoint Signed-off-by: JaredforReal <[email protected]> * add port-endpoint mapping in api docs Signed-off-by: JaredforReal <[email protected]> --------- Signed-off-by: JaredforReal <[email protected]>
1 parent 946585f commit 1067aae

File tree

4 files changed

+177
-0
lines changed

4 files changed

+177
-0
lines changed

src/semantic-router/pkg/api/server.go

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,21 @@ type SystemInfo struct {
4848
GPUAvailable bool `json:"gpu_available"`
4949
}
5050

51+
// OpenAIModel represents a single model in the OpenAI /v1/models response
52+
type OpenAIModel struct {
53+
ID string `json:"id"`
54+
Object string `json:"object"`
55+
Created int64 `json:"created"`
56+
OwnedBy string `json:"owned_by"`
57+
// Keeping the structure minimal; additional fields like permissions can be added later
58+
}
59+
60+
// OpenAIModelList is the container for the models list response
61+
type OpenAIModelList struct {
62+
Object string `json:"object"`
63+
Data []OpenAIModel `json:"data"`
64+
}
65+
5166
// BatchClassificationRequest represents a batch classification request
5267
type BatchClassificationRequest struct {
5368
Texts []string `json:"texts"`
@@ -178,6 +193,9 @@ func (s *ClassificationAPIServer) setupRoutes() *http.ServeMux {
178193
mux.HandleFunc("GET /info/models", s.handleModelsInfo)
179194
mux.HandleFunc("GET /info/classifier", s.handleClassifierInfo)
180195

196+
// OpenAI-compatible endpoints
197+
mux.HandleFunc("GET /v1/models", s.handleOpenAIModels)
198+
181199
// Metrics endpoints
182200
mux.HandleFunc("GET /metrics/classification", s.handleClassificationMetrics)
183201

@@ -355,6 +373,45 @@ func (s *ClassificationAPIServer) handleClassifierInfo(w http.ResponseWriter, r
355373
})
356374
}
357375

376+
// handleOpenAIModels handles OpenAI-compatible model listing at /v1/models
377+
// It returns all models discoverable from the router configuration plus a synthetic "auto" model.
378+
func (s *ClassificationAPIServer) handleOpenAIModels(w http.ResponseWriter, r *http.Request) {
379+
now := time.Now().Unix()
380+
381+
// Start with the special "auto" model always available from the router
382+
models := []OpenAIModel{
383+
{
384+
ID: "auto",
385+
Object: "model",
386+
Created: now,
387+
OwnedBy: "semantic-router",
388+
},
389+
}
390+
391+
// Append underlying models from config (if available)
392+
if s.config != nil {
393+
for _, m := range s.config.GetAllModels() {
394+
// Skip if already added as "auto" (or avoid duplicates in general)
395+
if m == "auto" {
396+
continue
397+
}
398+
models = append(models, OpenAIModel{
399+
ID: m,
400+
Object: "model",
401+
Created: now,
402+
OwnedBy: "upstream-endpoint",
403+
})
404+
}
405+
}
406+
407+
resp := OpenAIModelList{
408+
Object: "list",
409+
Data: models,
410+
}
411+
412+
s.writeJSONResponse(w, http.StatusOK, resp)
413+
}
414+
358415
func (s *ClassificationAPIServer) handleClassificationMetrics(w http.ResponseWriter, r *http.Request) {
359416
s.writeErrorResponse(w, http.StatusNotImplemented, "NOT_IMPLEMENTED", "Classification metrics not implemented yet")
360417
}

src/semantic-router/pkg/api/server_test.go

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -248,3 +248,60 @@ func TestBatchClassificationConfiguration(t *testing.T) {
248248
})
249249
}
250250
}
251+
252+
func TestOpenAIModelsEndpoint(t *testing.T) {
253+
cfg := &config.RouterConfig{
254+
VLLMEndpoints: []config.VLLMEndpoint{
255+
{
256+
Name: "primary",
257+
Address: "localhost",
258+
Port: 8000,
259+
Models: []string{"gpt-4o-mini", "llama-3.1-8b-instruct"},
260+
Weight: 1,
261+
},
262+
},
263+
}
264+
265+
apiServer := &ClassificationAPIServer{
266+
classificationSvc: services.NewPlaceholderClassificationService(),
267+
config: cfg,
268+
}
269+
270+
req := httptest.NewRequest("GET", "/v1/models", nil)
271+
rr := httptest.NewRecorder()
272+
273+
apiServer.handleOpenAIModels(rr, req)
274+
275+
if rr.Code != http.StatusOK {
276+
t.Fatalf("expected 200 OK, got %d", rr.Code)
277+
}
278+
279+
var resp OpenAIModelList
280+
if err := json.Unmarshal(rr.Body.Bytes(), &resp); err != nil {
281+
t.Fatalf("failed to parse response: %v", err)
282+
}
283+
284+
if resp.Object != "list" {
285+
t.Errorf("expected object 'list', got %s", resp.Object)
286+
}
287+
288+
// Build a set for easy lookup
289+
got := map[string]bool{}
290+
for _, m := range resp.Data {
291+
got[m.ID] = true
292+
if m.Object != "model" {
293+
t.Errorf("expected each item.object to be 'model', got %s", m.Object)
294+
}
295+
if m.Created == 0 {
296+
t.Errorf("expected created timestamp to be non-zero")
297+
}
298+
}
299+
300+
// Must contain 'auto' and the configured models
301+
if !got["auto"] {
302+
t.Errorf("expected list to contain 'auto'")
303+
}
304+
if !got["gpt-4o-mini"] || !got["llama-3.1-8b-instruct"] {
305+
t.Errorf("expected configured models to be present, got=%v", got)
306+
}
307+
}

website/docs/api/classification.md

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,24 @@ The Classification API server runs alongside the main Semantic Router ExtProc se
1818
- **ExtProc Server**: `http://localhost:50051` (gRPC for Envoy integration)
1919
- **Metrics Server**: `http://localhost:9190` (Prometheus metrics)
2020

21+
### Endpoint-to-port mapping (quick reference)
22+
23+
- Port 8080 (this API)
24+
- `GET /v1/models` (OpenAI-compatible model list, includes `auto`)
25+
- `GET /health`
26+
- `GET /info/models`, `GET /info/classifier`
27+
- `POST /api/v1/classify/intent|pii|security|batch`
28+
29+
- Port 8801 (Envoy public entry)
30+
- Typically proxies `POST /v1/chat/completions` to upstream LLMs while invoking ExtProc (50051).
31+
- You can expose `GET /v1/models` at 8801 by adding an Envoy route that forwards to `router:8080`.
32+
33+
- Port 50051 (ExtProc, gRPC)
34+
- Used by Envoy for external processing of requests; not an HTTP endpoint.
35+
36+
- Port 9190 (Prometheus)
37+
- `GET /metrics`
38+
2139
Start the server with:
2240

2341
```bash

website/docs/api/router.md

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,28 @@ The Semantic Router provides a gRPC-based API that integrates seamlessly with En
66

77
The Semantic Router operates as an ExtProc server that processes HTTP requests through Envoy Proxy. It doesn't expose direct REST endpoints but rather processes OpenAI-compatible API requests routed through Envoy.
88

9+
> Note: In addition to the ExtProc path, this project also starts a lightweight HTTP Classification API on port 8080 for health/info and classification utilities. The OpenAI-compatible `/v1/models` endpoint is provided by this HTTP API (8080) and can be optionally exposed through Envoy (8801) via routing rules.
10+
11+
### Ports and endpoint mapping
12+
13+
- 8801 (HTTP, Envoy public entry)
14+
- Typical client entry for OpenAI-compatible requests like `POST /v1/chat/completions`.
15+
- Can proxy `GET /v1/models` to Router 8080 if you add an Envoy route; otherwise `/v1/models` at 8801 may return “no healthy upstream”.
16+
17+
- 8080 (HTTP, Classification API)
18+
- `GET /v1/models` → OpenAI-compatible model list (includes synthetic `auto`)
19+
- `GET /health` → Classification API health
20+
- `GET /info/models` → Loaded classifier models + system info
21+
- `GET /info/classifier` → Classifier configuration details
22+
- `POST /api/v1/classify/intent|pii|security|batch` → Direct classification utilities
23+
24+
- 50051 (gRPC, ExtProc)
25+
- Envoy External Processing (ExtProc) for in-path classification/routing of `/v1/chat/completions`.
26+
- Not an HTTP port; not directly accessible via curl.
27+
28+
- 9190 (HTTP, Prometheus metrics)
29+
- `GET /metrics` → Prometheus scrape endpoint (global process metrics).
30+
931
### Request Flow
1032

1133
```mermaid
@@ -30,6 +52,29 @@ sequenceDiagram
3052

3153
The router processes standard OpenAI API requests:
3254

55+
### Models Endpoint
56+
57+
Lists available models and includes a synthetic "auto" model that uses the router's intent classification to select the best underlying model per request.
58+
59+
- Endpoint: `GET /v1/models`
60+
- Response:
61+
62+
```json
63+
{
64+
"object": "list",
65+
"data": [
66+
{ "id": "auto", "object": "model", "created": 1726890000, "owned_by": "semantic-router" },
67+
{ "id": "gpt-4o-mini", "object": "model", "created": 1726890000, "owned_by": "upstream-endpoint" },
68+
{ "id": "llama-3.1-8b-instruct", "object": "model", "created": 1726890000, "owned_by": "upstream-endpoint" }
69+
]
70+
}
71+
```
72+
73+
Notes:
74+
75+
- The concrete model list is sourced from your configured vLLM endpoints in `config.yaml` (see `vllm_endpoints[].models`).
76+
- The special `auto` model is always present and instructs the router to classify and route to the best backend model automatically.
77+
3378
### Chat Completions Endpoint
3479

3580
**Endpoint:** `POST /v1/chat/completions`

0 commit comments

Comments
 (0)