Skip to content

Commit c12f076

Browse files
committed
Merge branch 'master' of https://github.com/VJHack/llama.cpp
2 parents 6748421 + 23e0d70 commit c12f076

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

70 files changed

+3566
-3132
lines changed

.github/workflows/build.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,9 @@ env:
2323
BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
2424
GGML_NLOOP: 3
2525
GGML_N_THREADS: 1
26+
LLAMA_LOG_COLORS: 1
27+
LLAMA_LOG_PREFIX: 1
28+
LLAMA_LOG_TIMESTAMPS: 1
2629

2730
jobs:
2831
macOS-latest-cmake-arm64:

.github/workflows/server.yml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,12 @@ on:
2020
types: [opened, synchronize, reopened]
2121
paths: ['.github/workflows/server.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'examples/server/**.*']
2222

23+
env:
24+
LLAMA_LOG_COLORS: 1
25+
LLAMA_LOG_PREFIX: 1
26+
LLAMA_LOG_TIMESTAMPS: 1
27+
LLAMA_LOG_VERBOSITY: 10
28+
2329
concurrency:
2430
group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref || github.run_id }}
2531
cancel-in-progress: true

CMakeLists.txt

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -82,11 +82,11 @@ set(GGML_FATAL_WARNINGS ${LLAMA_FATAL_WARNINGS})
8282

8383
# change the default for these ggml options
8484
if (NOT DEFINED GGML_LLAMAFILE)
85-
set(GGML_LLAMAFILE ON)
85+
set(GGML_LLAMAFILE_DEFAULT ON)
8686
endif()
8787

88-
if (NOT DEFINED GGML_CUDA_USE_GRAPHS)
89-
set(GGML_CUDA_USE_GRAPHS ON)
88+
if (NOT DEFINED GGML_CUDA_GRAPHS)
89+
set(GGML_CUDA_GRAPHS_DEFAULT ON)
9090
endif()
9191

9292
# transition helpers

Makefile

Lines changed: 30 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ TEST_TARGETS = \
5454
tests/test-grammar-parser \
5555
tests/test-json-schema-to-grammar \
5656
tests/test-llama-grammar \
57+
tests/test-log \
5758
tests/test-model-load-cancel \
5859
tests/test-opt \
5960
tests/test-quantize-fns \
@@ -148,6 +149,14 @@ GGML_NO_METAL := 1
148149
DEPRECATE_WARNING := 1
149150
endif
150151

152+
ifdef LLAMA_DISABLE_LOGS
153+
REMOVE_WARNING := 1
154+
endif
155+
156+
ifdef LLAMA_SERVER_VERBOSE
157+
REMOVE_WARNING := 1
158+
endif
159+
151160
ifndef UNAME_S
152161
UNAME_S := $(shell uname -s)
153162
endif
@@ -351,19 +360,11 @@ ifdef LLAMA_SANITIZE_UNDEFINED
351360
MK_LDFLAGS += -fsanitize=undefined -g
352361
endif
353362

354-
ifdef LLAMA_SERVER_VERBOSE
355-
MK_CPPFLAGS += -DSERVER_VERBOSE=$(LLAMA_SERVER_VERBOSE)
356-
endif
357-
358363
ifdef LLAMA_SERVER_SSL
359364
MK_CPPFLAGS += -DCPPHTTPLIB_OPENSSL_SUPPORT
360365
MK_LDFLAGS += -lssl -lcrypto
361366
endif
362367

363-
ifdef LLAMA_DISABLE_LOGS
364-
MK_CPPFLAGS += -DLOG_DISABLE_LOGS
365-
endif # LLAMA_DISABLE_LOGS
366-
367368
# warnings
368369
WARN_FLAGS = \
369370
-Wall \
@@ -618,7 +619,7 @@ ifdef GGML_CUDA
618619
CUDA_PATH ?= /usr/local/cuda
619620
endif
620621

621-
MK_CPPFLAGS += -DGGML_USE_CUDA -I$(CUDA_PATH)/include -I$(CUDA_PATH)/targets/$(UNAME_M)-linux/include -DGGML_CUDA_USE_GRAPHS
622+
MK_CPPFLAGS += -DGGML_USE_CUDA -DGGML_CUDA_USE_GRAPHS -I$(CUDA_PATH)/include -I$(CUDA_PATH)/targets/$(UNAME_M)-linux/include
622623
MK_LDFLAGS += -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L$(CUDA_PATH)/lib64 -L/usr/lib64 -L$(CUDA_PATH)/targets/$(UNAME_M)-linux/lib -L$(CUDA_PATH)/lib64/stubs -L/usr/lib/wsl/lib
623624
MK_NVCCFLAGS += -use_fast_math
624625
endif # GGML_MUSA
@@ -931,6 +932,7 @@ OBJ_LLAMA = \
931932
OBJ_COMMON = \
932933
common/common.o \
933934
common/arg.o \
935+
common/log.o \
934936
common/console.o \
935937
common/ngram-cache.o \
936938
common/sampling.o \
@@ -1027,6 +1029,14 @@ $(info - LLAMA_NO_CCACHE)
10271029
$(info )
10281030
endif
10291031

1032+
ifdef REMOVE_WARNING
1033+
$(info !!! REMOVAL WARNING !!!)
1034+
$(info The following LLAMA_ options have been removed and are no longer supported)
1035+
$(info - LLAMA_DISABLE_LOGS (https://github.com/ggerganov/llama.cpp/pull/9418))
1036+
$(info - LLAMA_SERVER_VERBOSE (https://github.com/ggerganov/llama.cpp/pull/9418))
1037+
$(info )
1038+
endif
1039+
10301040
#
10311041
# Build libraries
10321042
#
@@ -1168,6 +1178,11 @@ common/arg.o: \
11681178
common/arg.h
11691179
$(CXX) $(CXXFLAGS) -c $< -o $@
11701180

1181+
common/log.o: \
1182+
common/log.cpp \
1183+
common/log.h
1184+
$(CXX) $(CXXFLAGS) -c $< -o $@
1185+
11711186
common/sampling.o: \
11721187
common/sampling.cpp \
11731188
common/sampling.h \
@@ -1346,7 +1361,7 @@ llama-cvector-generator: examples/cvector-generator/cvector-generator.cpp \
13461361
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
13471362

13481363
llama-convert-llama2c-to-ggml: examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp \
1349-
$(OBJ_GGML) $(OBJ_LLAMA)
1364+
$(OBJ_ALL)
13501365
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
13511366
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
13521367

@@ -1528,6 +1543,11 @@ tests/test-llama-grammar: tests/test-llama-grammar.cpp \
15281543
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
15291544
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
15301545

1546+
tests/test-log: tests/test-log.cpp \
1547+
$(OBJ_ALL)
1548+
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1549+
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1550+
15311551
tests/test-grammar-parser: tests/test-grammar-parser.cpp \
15321552
$(OBJ_ALL)
15331553
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@ Typically finetunes of the base models below are supported as well.
7777
- [x] [SEA-LION](https://huggingface.co/models?search=sea-lion)
7878
- [x] [GritLM-7B](https://huggingface.co/GritLM/GritLM-7B) + [GritLM-8x7B](https://huggingface.co/GritLM/GritLM-8x7B)
7979
- [x] [OLMo](https://allenai.org/olmo)
80+
- [x] [OLMoE](https://huggingface.co/allenai/OLMoE-1B-7B-0924)
8081
- [x] [Granite models](https://huggingface.co/collections/ibm-granite/granite-code-models-6624c5cec322e4c148c8b330)
8182
- [x] [GPT-NeoX](https://github.com/EleutherAI/gpt-neox) + [Pythia](https://github.com/EleutherAI/pythia)
8283
- [x] [Snowflake-Arctic MoE](https://huggingface.co/collections/Snowflake/arctic-66290090abe542894a5ac520)

ci/run.sh

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -737,6 +737,9 @@ function gg_sum_embd_bge_small {
737737

738738
## main
739739

740+
export LLAMA_LOG_PREFIX=1
741+
export LLAMA_LOG_TIMESTAMPS=1
742+
740743
if [ -z ${GG_BUILD_LOW_PERF} ]; then
741744
# Create symlink: ./llama.cpp/models-mnt -> $MNT/models/models-mnt
742745
rm -rf ${SRC}/models-mnt

common/CMakeLists.txt

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -51,21 +51,23 @@ endif()
5151
set(TARGET common)
5252

5353
add_library(${TARGET} STATIC
54+
arg.cpp
55+
arg.h
5456
base64.hpp
55-
common.h
5657
common.cpp
57-
arg.h
58-
arg.cpp
59-
sampling.h
60-
sampling.cpp
61-
console.h
58+
common.h
6259
console.cpp
63-
json.hpp
60+
console.h
6461
json-schema-to-grammar.cpp
65-
train.h
66-
train.cpp
67-
ngram-cache.h
62+
json.hpp
63+
log.cpp
64+
log.h
6865
ngram-cache.cpp
66+
ngram-cache.h
67+
sampling.cpp
68+
sampling.h
69+
train.cpp
70+
train.h
6971
)
7072

7173
if (BUILD_SHARED_LIBS)

common/arg.cpp

Lines changed: 58 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,17 @@
11
#include "arg.h"
22

3+
#include "log.h"
34
#include "sampling.h"
45

56
#include <algorithm>
6-
#include <string>
7-
#include <vector>
8-
#include <set>
7+
#include <climits>
8+
#include <cstdarg>
99
#include <fstream>
1010
#include <regex>
11-
#include <cstdarg>
12-
#include <climits>
11+
#include <set>
12+
#include <string>
13+
#include <thread>
14+
#include <vector>
1315

1416
#include "json-schema-to-grammar.h"
1517

@@ -383,20 +385,6 @@ gpt_params_context gpt_params_parser_init(gpt_params & params, llama_example ex,
383385
exit(0);
384386
}
385387
));
386-
add_opt(llama_arg(
387-
{"-v", "--verbose"},
388-
"print verbose information",
389-
[](gpt_params & params) {
390-
params.verbosity = 1;
391-
}
392-
));
393-
add_opt(llama_arg(
394-
{"--verbosity"}, "N",
395-
format("set specific verbosity level (default: %d)", params.verbosity),
396-
[](gpt_params & params, int value) {
397-
params.verbosity = value;
398-
}
399-
));
400388
add_opt(llama_arg(
401389
{"--verbose-prompt"},
402390
format("print a verbose prompt before generation (default: %s)", params.verbose_prompt ? "true" : "false"),
@@ -417,7 +405,7 @@ gpt_params_context gpt_params_parser_init(gpt_params & params, llama_example ex,
417405
[](gpt_params & params) {
418406
params.use_color = true;
419407
}
420-
).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_INFILL}));
408+
).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_INFILL, LLAMA_EXAMPLE_SPECULATIVE, LLAMA_EXAMPLE_LOOKUP}));
421409
add_opt(llama_arg(
422410
{"-t", "--threads"}, "N",
423411
format("number of threads to use during generation (default: %d)", params.cpuparams.n_threads),
@@ -697,6 +685,13 @@ gpt_params_context gpt_params_parser_init(gpt_params & params, llama_example ex,
697685
params.n_keep = value;
698686
}
699687
));
688+
add_opt(llama_arg(
689+
{"--no-context-shift"},
690+
format("disables context shift on inifinite text generation (default: %s)", params.ctx_shift ? "disabled" : "enabled"),
691+
[](gpt_params & params) {
692+
params.ctx_shift = false;
693+
}
694+
).set_examples({LLAMA_EXAMPLE_MAIN}));
700695
add_opt(llama_arg(
701696
{"--chunks"}, "N",
702697
format("max number of chunks to process (default: %d, -1 = all)", params.n_chunks),
@@ -876,15 +871,15 @@ gpt_params_context gpt_params_parser_init(gpt_params & params, llama_example ex,
876871
params.input_prefix = value;
877872
params.enable_chat_template = false;
878873
}
879-
).set_examples({LLAMA_EXAMPLE_MAIN}));
874+
).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_INFILL}));
880875
add_opt(llama_arg(
881876
{"--in-suffix"}, "STRING",
882877
"string to suffix after user inputs with (default: empty)",
883878
[](gpt_params & params, const std::string & value) {
884879
params.input_suffix = value;
885880
params.enable_chat_template = false;
886881
}
887-
).set_examples({LLAMA_EXAMPLE_MAIN}));
882+
).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_INFILL}));
888883
add_opt(llama_arg(
889884
{"--no-warmup"},
890885
"skip warming up the model with an empty run",
@@ -1824,19 +1819,6 @@ gpt_params_context gpt_params_parser_init(gpt_params & params, llama_example ex,
18241819
params.system_prompt = system_prompt;
18251820
}
18261821
).set_examples({LLAMA_EXAMPLE_SERVER}));
1827-
add_opt(llama_arg(
1828-
{"--log-format"}, "{text, json}",
1829-
"log output format: json or text (default: json)",
1830-
[](gpt_params & params, const std::string & value) {
1831-
if (value == "json") {
1832-
params.log_json = true;
1833-
} else if (value == "text") {
1834-
params.log_json = false;
1835-
} else {
1836-
throw std::invalid_argument("invalid value");
1837-
}
1838-
}
1839-
).set_examples({LLAMA_EXAMPLE_SERVER}));
18401822
add_opt(llama_arg(
18411823
{"--metrics"},
18421824
format("enable prometheus compatible metrics endpoint (default: %s)", params.endpoint_metrics ? "enabled" : "disabled"),
@@ -1956,40 +1938,57 @@ gpt_params_context gpt_params_parser_init(gpt_params & params, llama_example ex,
19561938
else { std::invalid_argument("invalid value"); }
19571939
}
19581940
).set_examples({LLAMA_EXAMPLE_BENCH}));
1959-
#ifndef LOG_DISABLE_LOGS
1960-
// TODO: make this looks less weird
1961-
add_opt(llama_arg(
1962-
{"--log-test"},
1963-
"Log test",
1964-
[](gpt_params &) { log_param_single_parse("--log-test"); }
1965-
));
19661941
add_opt(llama_arg(
19671942
{"--log-disable"},
19681943
"Log disable",
1969-
[](gpt_params &) { log_param_single_parse("--log-disable"); }
1944+
[](gpt_params &) {
1945+
gpt_log_pause(gpt_log_main());
1946+
}
19701947
));
19711948
add_opt(llama_arg(
1972-
{"--log-enable"},
1973-
"Log enable",
1974-
[](gpt_params &) { log_param_single_parse("--log-enable"); }
1949+
{"--log-file"}, "FNAME",
1950+
"Log to file",
1951+
[](gpt_params &, const std::string & value) {
1952+
gpt_log_set_file(gpt_log_main(), value.c_str());
1953+
}
19751954
));
19761955
add_opt(llama_arg(
1977-
{"--log-new"},
1978-
"Log new",
1979-
[](gpt_params &) { log_param_single_parse("--log-new"); }
1980-
));
1956+
{"--log-colors"},
1957+
"Enable colored logging",
1958+
[](gpt_params &) {
1959+
gpt_log_set_colors(gpt_log_main(), true);
1960+
}
1961+
).set_env("LLAMA_LOG_COLORS"));
19811962
add_opt(llama_arg(
1982-
{"--log-append"},
1983-
"Log append",
1984-
[](gpt_params &) { log_param_single_parse("--log-append"); }
1963+
{"-v", "--verbose", "--log-verbose"},
1964+
"Set verbosity level to infinity (i.e. log all messages, useful for debugging)",
1965+
[](gpt_params & params) {
1966+
params.verbosity = INT_MAX;
1967+
gpt_log_set_verbosity_thold(INT_MAX);
1968+
}
19851969
));
19861970
add_opt(llama_arg(
1987-
{"--log-file"}, "FNAME",
1988-
"Log file",
1989-
[](gpt_params &, const std::string & value) { log_param_pair_parse(false, "--log-file", value); }
1990-
));
1991-
#endif // LOG_DISABLE_LOGS
1971+
{"-lv", "--verbosity", "--log-verbosity"}, "N",
1972+
"Set the verbosity threshold. Messages with a higher verbosity will be ignored.",
1973+
[](gpt_params & params, int value) {
1974+
params.verbosity = value;
1975+
gpt_log_set_verbosity_thold(value);
1976+
}
1977+
).set_env("LLAMA_LOG_VERBOSITY"));
1978+
add_opt(llama_arg(
1979+
{"--log-prefix"},
1980+
"Enable prefx in log messages",
1981+
[](gpt_params &) {
1982+
gpt_log_set_prefix(gpt_log_main(), true);
1983+
}
1984+
).set_env("LLAMA_LOG_PREFIX"));
1985+
add_opt(llama_arg(
1986+
{"--log-timestamps"},
1987+
"Enable timestamps in log messages",
1988+
[](gpt_params &) {
1989+
gpt_log_set_timestamps(gpt_log_main(), true);
1990+
}
1991+
).set_env("LLAMA_LOG_TIMESTAMPS"));
19921992

19931993
return ctx_arg;
19941994
}
1995-

0 commit comments

Comments
 (0)