Skip to content

Commit 82b9831

Browse files
committed
Add /v1/models endpoint
This is usually used by OpenAI clients, like OpenWebUI for discovery and health check.
1 parent 97167eb commit 82b9831

File tree

3 files changed

+53
-0
lines changed

3 files changed

+53
-0
lines changed

llamafile/server/client.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -699,6 +699,8 @@ Client::dispatcher()
699699
return v1_completions();
700700
if (p1 == "v1/chat/completions")
701701
return v1_chat_completions();
702+
if (p1 == "v1/models")
703+
return v1_models();
702704
if (p1 == "slotz")
703705
return slotz();
704706
if (p1 == "flagz")

llamafile/server/client.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,8 @@ struct Client
117117
bool v1_chat_completions() __wur;
118118
bool get_v1_chat_completions_params(V1ChatCompletionParams*) __wur;
119119

120+
bool v1_models() __wur;
121+
120122
bool slotz() __wur;
121123
bool flagz() __wur;
122124
bool db_chat(int64_t) __wur;

llamafile/server/v1_models.cpp

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
// -*- mode:c++;indent-tabs-mode:nil;c-basic-offset:4;coding:utf-8 -*-
2+
// vi: set et ft=cpp ts=4 sts=4 sw=4 fenc=utf-8 :vi
3+
//
4+
// Copyright 2024 Mozilla Foundation
5+
//
6+
// Licensed under the Apache License, Version 2.0 (the "License");
7+
// you may not use this file except in compliance with the License.
8+
// You may obtain a copy of the License at
9+
//
10+
// http://www.apache.org/licenses/LICENSE-2.0
11+
//
12+
// Unless required by applicable law or agreed to in writing, software
13+
// distributed under the License is distributed on an "AS IS" BASIS,
14+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
// See the License for the specific language governing permissions and
16+
// limitations under the License.
17+
18+
#include "client.h"
19+
#include "llama.cpp/llama.h"
20+
#include "llamafile/json.h"
21+
#include "llamafile/llamafile.h"
22+
#include "llamafile/string.h"
23+
#include <ctime>
24+
25+
using jt::Json;
26+
27+
namespace lf {
28+
namespace server {
29+
30+
// Use it as reported model creation time
31+
static const time_t model_creation_time = time(0);
32+
33+
bool
34+
Client::v1_models()
35+
{
36+
jt::Json json;
37+
json["object"] = "list";
38+
Json& model = json["data"][0];
39+
model["id"] = stripext(basename(FLAG_model));
40+
model["object"] = "model";
41+
model["created"] = model_creation_time;
42+
model["owned_by"] = "llamafile";
43+
char* p = append_http_response_message(obuf_.p, 200);
44+
p = stpcpy(p, "Content-Type: application/json\r\n");
45+
return send_response(obuf_.p, p, json.toString());
46+
}
47+
48+
} // namespace server
49+
} // namespace lf

0 commit comments

Comments
 (0)