Skip to content

Commit 8c8f846

Browse files
committed
api: support llama.cpp webui
1 parent 546fa4d commit 8c8f846

File tree

3 files changed

+106
-11
lines changed

3 files changed

+106
-11
lines changed

docs/binding.md

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -56,9 +56,12 @@ streamlit run chatllm_st.py -- -i -m path/to/model
5656

5757
Note: "STOP" function is not implemented yet.
5858

59-
### OpenAI/Ollama Compatible API
59+
### OpenAI/Ollama/llama.cpp Compatible API
6060

61-
[Here](../scripts/openai_api.py) is a server providing some OpenAI/Ollama Compatible API. Note that most of
61+
> [!IMPORTANT]
62+
> This is going to be re-written by Nim. Only basic functionalities are provided.
63+
64+
[Here](../scripts/openai_api.py) is a server (default port: 11434) providing some OpenAI/Ollama/llama.cpp Compatible API. Note that most of
6265
the parameters are ignored.
6366

6467
`openai_api.py` supports loading several types models for chatting, code completion (FIM), or text embedding etc.
@@ -84,6 +87,8 @@ Some base models that can be used for code completion:
8487
* CodeGemma v1.1: [Base-2B](https://huggingface.co/google/codegemma-1.1-2b), [Base-7B](https://huggingface.co/google/codegemma-1.1-7b)
8588
* StarCoder2: [Base-3B](https://huggingface.co/bigcode/starcoder2-7b), [Base-7B](https://huggingface.co/bigcode/starcoder2-7b), [Base-15B](https://huggingface.co/bigcode/starcoder2-15b) (not recommended)
8689

90+
#### Ollama
91+
8792
This module provides sufficient Ollama API so that it can be used to emulate Ollama model provider in Visual Studio Code Copilot.
8893
For example, starting the server with a model:
8994

@@ -95,6 +100,15 @@ Select the model from Ollama provider:
95100

96101
![](vscode_ollama.png)
97102

103+
#### WebUI
104+
105+
`openai_api.py` provides a WebUI. WebUI of [llama.cpp](https://github.com/ggml-org/llama.cpp/blob/master/tools/server/README.md) is also (partly) supported.
106+
Use `--ui` to select WebUI llama.cpp:
107+
108+
```sh
109+
python openai_api.py --ui /path/to/index.html.gz ---chat :qwen2.5
110+
```
111+
98112
## JavaScript/TypeScript
99113

100114
### Command line

scripts/openai_api.py

Lines changed: 90 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -182,6 +182,7 @@ def make_id(self) -> str:
182182
http_server: HTTPServer = None
183183

184184
model_info = {}
185+
ui_file_name = 'chat_ui.html'
185186

186187
def get_streamer(model: str) -> ChatLLMStreamer | None:
187188
if model.endswith('fim') or model.startswith('fim'):
@@ -375,14 +376,71 @@ def handle_TAGS(self, obj: dict):
375376
self.wfile.flush()
376377

377378
def handle_UI(self, obj: dict):
379+
fn = ui_file_name
380+
if not os.path.isfile(fn):
381+
fn = 'scripts/' + fn
382+
assert os.path.isfile(fn)
383+
378384
self.send_response(200)
385+
if fn.endswith('.gz'):
386+
self.send_header("Content-Encoding", "gzip")
379387
self.send_header('Content-type', 'text/html; charset=utf-8')
380388
self.end_headers()
381-
fn = 'chat_ui.html'
382-
if not os.path.isfile('chat_ui.html'):
383-
fn = 'scripts/' + fn
384-
with open(fn, 'r', encoding='utf-8') as f:
385-
self.wfile.write(f.read().encode())
389+
390+
with open(fn, 'rb') as f:
391+
self.wfile.write(f.read())
392+
self.wfile.flush()
393+
394+
def handle_llama_props(self, obj: dict):
395+
global model_info
396+
capabilities = model_info['chat']['capabilities']
397+
modalities = {
398+
"vision": "Image Input" in capabilities
399+
}
400+
rsp = {
401+
"default_generation_settings": "",
402+
"total_slots": 1,
403+
"model_alias": model_info['chat']['name'],
404+
"model_path": "",
405+
"modalities": modalities,
406+
"endpoint_slots": 0,
407+
"endpoint_props": {},
408+
"endpoint_metrics": 0,
409+
"webui": 0,
410+
"chat_template": "",
411+
"bos_token": [],
412+
"eos_token": [],
413+
"build_info": "Today",
414+
}
415+
self.send_response(200)
416+
self.send_header('Content-type', 'application/json')
417+
self.end_headers()
418+
self.wfile.write(json.dumps(rsp, indent=True).encode('utf-8'))
419+
self.wfile.flush()
420+
421+
def handle_llama_slots(self, obj: dict):
422+
global model_info
423+
rsp = [
424+
{
425+
"id": 0,
426+
"id_task": 1,
427+
"n_ctx": model_info['chat']['context_length'],
428+
"speculative": False,
429+
"is_processing": False,
430+
"params": {
431+
"n_predict": -1,
432+
"seed": 1,
433+
"temperature": 0.8,
434+
"dynatemp_range": 0.0,
435+
"dynatemp_exponent": 1.0,
436+
"top_k": 40,
437+
}
438+
}
439+
]
440+
self.send_response(200)
441+
self.send_header('Content-type', 'application/json')
442+
self.end_headers()
443+
self.wfile.write(json.dumps(rsp, indent=True).encode('utf-8'))
386444
self.wfile.flush()
387445

388446
def do_GET(self):
@@ -393,9 +451,15 @@ def do_GET(self):
393451
elif self.path.endswith('/tags'):
394452
self.handle_TAGS({})
395453
return
396-
elif self.path.endswith('/ui'):
454+
elif self.path.endswith('/props'):
455+
self.handle_llama_props({})
456+
return
457+
elif self.path.endswith('/ui') or self.path.startswith('/?') or (self.path in ['', '/']):
397458
self.handle_UI({})
398459
return
460+
elif self.path.startswith('/slots'):
461+
self.handle_llama_slots({})
462+
return
399463
else:
400464
self.send_error(404, 'NOT FOUND')
401465
return
@@ -419,13 +483,29 @@ def do_OPTIONS(self):
419483
signal.signal(signal.SIGINT, handler)
420484

421485
ARG_SEP = '---'
486+
port = 11434
422487

423488
args = sys.argv[1:]
424489
if len(args) < 1:
425-
print(f"usage: python openai_api.py [{ARG_SEP}TYPE path/to/model [additional args]]")
490+
print(f"usage: python openai_api.py [app_args] [{ARG_SEP}TYPE path/to/model [additional args]]")
491+
print(f"where app_args :: --ui /path/to/ui --port PORT")
426492
print('where TYPE ::= chat | fim | emb')
427493
exit(-1)
428494

495+
while len(args) > 0:
496+
if args[0] == '--ui':
497+
args.pop(0)
498+
assert len(args) > 0
499+
ui_file_name = args[0]
500+
args.pop(0)
501+
if args[0] == '--port':
502+
args.pop(0)
503+
assert len(args) > 0
504+
port = int(args[0])
505+
args.pop(0)
506+
else:
507+
break
508+
429509
chat_args = ['-m']
430510
fim_args = ['-m']
431511
emb_args = ['-m']
@@ -461,6 +541,7 @@ def do_OPTIONS(self):
461541

462542
print(model_info)
463543

464-
print("LLM Loaded. Starting server...")
465-
http_server = HTTPServer(('0.0.0.0', 11434), HttpHandler)
544+
print(f"LLM Loaded. Starting server on port {port}...")
545+
print(f"http://localhost:{port}")
546+
http_server = HTTPServer(('0.0.0.0', port), HttpHandler)
466547
http_server.serve_forever()

0 commit comments

Comments
 (0)