Skip to content

Commit 4f88da6

Browse files
committed
Document new server flags
1 parent 6d89f8f commit 4f88da6

File tree

9 files changed

+149
-15
lines changed

9 files changed

+149
-15
lines changed

llama.cpp/main/main.1

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -616,6 +616,22 @@ mode to print the llamafile logo in ASCII rather than UNICODE.
616616
.It Fl Fl verbose
617617
Enables verbose logger output in chatbot. This can be helpful for
618618
troubleshooting issues.
619+
.It Fl Fl chat-template Ar NAME
620+
Specifies or overrides chat template for model.
621+
.Pp
622+
Normally the GGUF metadata tokenizer.chat_template will specify this
623+
value for instruct models. This flag may be used to either override the
624+
chat template, or specify one when the GGUF metadata field is absent,
625+
which effectively forces the web ui to enable chatbot mode.
626+
.Pp
627+
Supported chat template names are: chatml, llama2, llama3, mistral
628+
(alias for llama2), phi3, zephyr, monarch, gemma, gemma2 (alias for
629+
gemma), orion, openchat, vicuna, vicuna-orca, deepseek, command-r,
630+
chatglm3, chatglm4, minicpm, deepseek2, or exaone3.
631+
.Pp
632+
It is also possible to pass the jinja2 template itself to this argument.
633+
Since llamafiler doesn't currently support jinja2, a heuristic will be
634+
used to guess which of the above templates the template represents.
619635
.El
620636
.Sh CLI OPTIONS
621637
The following options may be specified when

llama.cpp/main/main.1.asc

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -592,6 +592,26 @@
592592
Enables verbose logger output in chatbot. This can be helpful
593593
for troubleshooting issues.
594594
595+
--chat-template NAME
596+
Specifies or overrides chat template for model.
597+
598+
Normally the GGUF metadata tokenizer.chat_template will specify
599+
this value for instruct models. This flag may be used to either
600+
override the chat template, or specify one when the GGUF meta‐
601+
data field is absent, which effectively forces the web ui to
602+
enable chatbot mode.
603+
604+
Supported chat template names are: chatml, llama2, llama3, mis‐
605+
tral (alias for llama2), phi3, zephyr, monarch, gemma, gemma2
606+
(alias for gemma), orion, openchat, vicuna, vicuna-orca,
607+
deepseek, command-r, chatglm3, chatglm4, minicpm, deepseek2, or
608+
exaone3.
609+
610+
It is also possible to pass the jinja2 template itself to this
611+
argument. Since llamafiler doesn't currently support jinja2, a
612+
heuristic will be used to guess which of the above templates
613+
the template represents.
614+
595615
CLI OPTIONS
596616
The following options may be specified when llamafile is running in
597617
--cli mode.

llamafile/db.cpp

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -86,14 +86,9 @@ static sqlite3 *open_impl() {
8686
return nullptr;
8787
}
8888
char *errmsg = nullptr;
89-
if (sqlite3_exec(db, "PRAGMA journal_mode=WAL;", nullptr, nullptr, &errmsg) != SQLITE_OK) {
90-
fprintf(stderr, "%s: failed to set journal mode to wal: %s\n", path.c_str(), errmsg);
91-
sqlite3_free(errmsg);
92-
sqlite3_close(db);
93-
return nullptr;
94-
}
95-
if (sqlite3_exec(db, "PRAGMA synchronous=NORMAL;", nullptr, nullptr, &errmsg) != SQLITE_OK) {
96-
fprintf(stderr, "%s: failed to set synchronous to normal: %s\n", path.c_str(), errmsg);
89+
if (sqlite3_exec(db, FLAG_db_startup_sql, nullptr, nullptr, &errmsg) != SQLITE_OK) {
90+
fprintf(stderr, "%s: failed to execute startup SQL (%s) because: %s", path.c_str(),
91+
FLAG_db_startup_sql, errmsg);
9792
sqlite3_free(errmsg);
9893
sqlite3_close(db);
9994
return nullptr;

llamafile/flags.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,8 @@ bool FLAG_trace = false;
5454
bool FLAG_unsecure = false;
5555
const char *FLAG_chat_template = "";
5656
const char *FLAG_db = nullptr;
57+
const char *FLAG_db_startup_sql = "PRAGMA journal_mode=WAL;"
58+
"PRAGMA synchronous=NORMAL;";
5759
const char *FLAG_file = nullptr;
5860
const char *FLAG_ip_header = nullptr;
5961
const char *FLAG_listen = "127.0.0.1:8080";
@@ -193,6 +195,13 @@ void llamafile_get_flags(int argc, char **argv) {
193195
continue;
194196
}
195197

198+
if (!strcmp(flag, "--db-startup-sql")) {
199+
if (i == argc)
200+
missing("--db-startup-sql");
201+
FLAG_db_startup_sql = argv[i++];
202+
continue;
203+
}
204+
196205
//////////////////////////////////////////////////////////////////////
197206
// server flags
198207

llamafile/llamafile.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ extern bool FLAG_trap;
2525
extern bool FLAG_unsecure;
2626
extern const char *FLAG_chat_template;
2727
extern const char *FLAG_db;
28+
extern const char *FLAG_db_startup_sql;
2829
extern const char *FLAG_file;
2930
extern const char *FLAG_ip_header;
3031
extern const char *FLAG_listen;

llamafile/server/main.1

Lines changed: 42 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
.Dd August 17, 2024
1+
.Dd November 30, 2024
22
.Dt LLAMAFILER 1
33
.Os Mozilla Ocho
44
.Sh NAME
@@ -30,6 +30,11 @@ recommended that you run multiple instances of llamafiler behind a
3030
reverse proxy such as NGINX or Redbean.
3131
.It Fl mm Ar FNAME , Fl Fl mmproj Ar FNAME
3232
Path of vision model weights.
33+
.It Fl Fl db Ar FILE
34+
Specifies path of sqlite3 database.
35+
.Pp
36+
The default is
37+
.Pa ~/.llamafile/llamafile.sqlite3
3338
.It Fl l Ar HOSTPORT , Fl Fl listen Ar HOSTPORT
3439
Specifies the local [HOST:]PORT on which the HTTP server should listen.
3540
By default this is 0.0.0.0:8080 which means llamafiler will bind to port
@@ -55,8 +60,10 @@ Please note that
5560
has a strong influence on how many slots can be created.
5661
.It Fl p Ar TEXT , Fl Fl prompt Ar TEXT
5762
Specifies system prompt. This value is passed along to the web frontend.
58-
.It Fl Fl no-display-prompt Ar TEXT
63+
.It Fl Fl no-display-prompt
5964
Hide system prompt from web user interface.
65+
.It Fl Fl nologo
66+
Hide llamafile logo icon from web ui.
6067
.It Fl Fl url-prefix Ar URLPREFIX
6168
Specifies a URL prefix (subdirectory) under which the HTTP server will
6269
make the API accessible, e.g. /lamafiler. Useful when running llamafiler
@@ -130,6 +137,39 @@ supported by the host operating system. The default keepalive is 5.
130137
Size of HTTP output buffer size, in bytes. Default is 1048576.
131138
.It Fl Fl http-ibuf-size Ar N
132139
Size of HTTP input buffer size, in bytes. Default is 1048576.
140+
.It Fl Fl chat-template Ar NAME
141+
Specifies or overrides chat template for model.
142+
.Pp
143+
Normally the GGUF metadata tokenizer.chat_template will specify this
144+
value for instruct models. This flag may be used to either override the
145+
chat template, or specify one when the GGUF metadata field is absent,
146+
which effectively forces the web ui to enable chatbot mode.
147+
.Pp
148+
Supported chat template names are: chatml, llama2, llama3, mistral
149+
(alias for llama2), phi3, zephyr, monarch, gemma, gemma2 (alias for
150+
gemma), orion, openchat, vicuna, vicuna-orca, deepseek, command-r,
151+
chatglm3, chatglm4, minicpm, deepseek2, or exaone3.
152+
.Pp
153+
It is also possible to pass the jinja2 template itself to this argument.
154+
Since llamafiler doesn't currently support jinja2, a heuristic will be
155+
used to guess which of the above templates the template represents.
156+
.It Fl Fl completion-mode
157+
Forces web ui to operate in completion mode, rather than chat mode.
158+
Normally the web ui chooses its mode based on the GGUF metadata. Base
159+
models normally don't define tokenizer.chat_template whereas instruct
160+
models do. If it's a base model, then the web ui will automatically use
161+
completion mode only, without needing to specify this flag. This flag is
162+
useful in cases where a prompt template is defined by the gguf, but it
163+
is desirable for the chat interface to be disabled.
164+
.It Fl Fl db-startup-sql
165+
Specifies SQL code that should be executed whenever connecting to the
166+
SQLite database. The default is the following code, which enables the
167+
write-ahead log.
168+
.Bd -literal -offset indent
169+
PRAGMA journal_mode=WAL;
170+
PRAGMA synchronous=NORMAL;
171+
.Ed
172+
.El
133173
.Sh EXAMPLE
134174
Here's an example of how you might start this server:
135175
.Pp

llamafile/server/main.1.asc

Lines changed: 49 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,11 @@
3232
-mm FNAME, --mmproj FNAME
3333
Path of vision model weights.
3434

35+
--db FILE
36+
Specifies path of sqlite3 database.
37+
38+
The default is ~/.llamafile/llamafile.sqlite3
39+
3540
-l HOSTPORT, --listen HOSTPORT
3641
Specifies the local [HOST:]PORT on which the HTTP server should
3742
listen. By default this is 0.0.0.0:8080 which means llamafiler
@@ -63,9 +68,12 @@
6368
Specifies system prompt. This value is passed along to the web
6469
frontend.
6570

66-
[1m--no-display-prompt [4m[22mTEXT[0m
71+
[1m--no-display-prompt[0m
6772
Hide system prompt from web user interface.
6873

74+
--nologo
75+
Hide llamafile logo icon from web ui.
76+
6977
--url-prefix URLPREFIX
7078
Specifies a URL prefix (subdirectory) under which the HTTP
7179
server will make the API accessible, e.g. /lamafiler. Useful
@@ -158,6 +166,44 @@
158166
--http-ibuf-size N
159167
Size of HTTP input buffer size, in bytes. Default is 1048576.
160168

169+
--chat-template NAME
170+
Specifies or overrides chat template for model.
171+
172+
Normally the GGUF metadata tokenizer.chat_template will specify
173+
this value for instruct models. This flag may be used to either
174+
override the chat template, or specify one when the GGUF meta‐
175+
data field is absent, which effectively forces the web ui to
176+
enable chatbot mode.
177+
178+
Supported chat template names are: chatml, llama2, llama3, mis‐
179+
tral (alias for llama2), phi3, zephyr, monarch, gemma, gemma2
180+
(alias for gemma), orion, openchat, vicuna, vicuna-orca,
181+
deepseek, command-r, chatglm3, chatglm4, minicpm, deepseek2, or
182+
exaone3.
183+
184+
It is also possible to pass the jinja2 template itself to this
185+
argument. Since llamafiler doesn't currently support jinja2, a
186+
heuristic will be used to guess which of the above templates
187+
the template represents.
188+
189+
--completion-mode
190+
Forces web ui to operate in completion mode, rather than chat
191+
mode. Normally the web ui chooses its mode based on the GGUF
192+
metadata. Base models normally don't define tokenizer.chat_tem‐
193+
plate whereas instruct models do. If it's a base model, then
194+
the web ui will automatically use completion mode only, without
195+
needing to specify this flag. This flag is useful in cases
196+
where a prompt template is defined by the gguf, but it is de‐
197+
sirable for the chat interface to be disabled.
198+
199+
--db-startup-sql
200+
Specifies SQL code that should be executed whenever connecting
201+
to the SQLite database. The default is the following code,
202+
which enables the write-ahead log.
203+
204+
PRAGMA journal_mode=WAL;
205+
PRAGMA synchronous=NORMAL;
206+
161207
EXAMPLE
162208
Here's an example of how you might start this server:
163209
@@ -172,10 +218,10 @@
172218
curl -v http://127.0.0.1:8080/embedding?content=hello+world
173219
174220
DOCUMENTATION
175-
Read our Markdown documentation for additional help and tutorials. See
221+
Read our Markdown documentation for additional help and tutorials. See
176222
llamafile/server/doc/index.md in the source repository on GitHub.
177223
178224
SEE ALSO
179225
llamafile(1), whisperfile(1)
180226
181-
Mozilla Ocho August 17, 2024 [4mLLAMAFILER[24m(1)
227+
Mozilla Ocho November 30, 2024 [4mLLAMAFILER[24m(1)

llamafile/server/www/chatbot.js

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ const DEFAULT_SYSTEM_PROMPT =
2222
const DEFAULT_FLAGZ = {
2323
"model": null,
2424
"prompt": null,
25+
"nologo": false,
2526
"no_display_prompt": false,
2627
"frequency_penalty": 0,
2728
"presence_penalty": 0,
@@ -369,6 +370,9 @@ function updateModelInfo() {
369370
document.getElementById("model").textContent = modelName;
370371
document.getElementById("model-completions").textContent = modelName;
371372
}
373+
if (!flagz.nologo) {
374+
document.getElementById("logo").style.display = "inline-block";
375+
}
372376
}
373377

374378
function startChat(history) {

llamafile/server/www/index.html

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,11 @@
99
<div class="chat-container" id="chat-interface">
1010
<div class="chat-header">
1111
<h1>
12-
<img src="chatbot.png" alt="[llamafile]" title="llamafile" width="64" height="64">
13-
<span id="model">llamafile</span>
12+
<img src="chatbot.png"
13+
width="64" height="64"
14+
id="logo" style="display:none"
15+
alt="[llamafile]" title="llamafile">
16+
<span id="model">loading...</span>
1417
</h1>
1518
<div class="menu-dropdown">
1619
<button class="menu-trigger"></button>

0 commit comments

Comments
 (0)