Skip to content

Commit 32daa38

Browse files
committed
Merge branch 'master' into xsn/vision_2
2 parents ad38e87 + 96f4053 commit 32daa38

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

65 files changed

+7539
-940
lines changed

.github/workflows/build.yml

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,7 @@ jobs:
8787
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
8888
run: |
8989
cp LICENSE ./build/bin/
90+
cp examples/run/linenoise.cpp/LICENSE ./build/bin/LICENSE.linenoise.cpp
9091
zip -r llama-${{ steps.tag.outputs.name }}-bin-macos-arm64.zip ./build/bin/*
9192
9293
- name: Upload artifacts
@@ -149,6 +150,7 @@ jobs:
149150
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
150151
run: |
151152
cp LICENSE ./build/bin/
153+
cp examples/run/linenoise.cpp/LICENSE ./build/bin/LICENSE.linenoise.cpp
152154
zip -r llama-${{ steps.tag.outputs.name }}-bin-macos-x64.zip ./build/bin/*
153155
154156
- name: Upload artifacts
@@ -217,6 +219,7 @@ jobs:
217219
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
218220
run: |
219221
cp LICENSE ./build/bin/
222+
cp examples/run/linenoise.cpp/LICENSE ./build/bin/LICENSE.linenoise.cpp
220223
zip -r llama-${{ steps.tag.outputs.name }}-bin-ubuntu-x64.zip ./build/bin/*
221224
222225
- name: Upload artifacts
@@ -234,7 +237,7 @@ jobs:
234237
strategy:
235238
matrix:
236239
sanitizer: [ADDRESS, THREAD, UNDEFINED]
237-
build_type: [Debug, Release]
240+
build_type: [Debug]
238241

239242
steps:
240243
- name: Clone
@@ -796,6 +799,7 @@ jobs:
796799
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
797800
run: |
798801
Copy-Item LICENSE .\build\bin\Release\llama.cpp.txt
802+
Copy-Item .\examples\run\linenoise.cpp\LICENSE .\build\bin\Release\linenoise.cpp.txt
799803
7z a llama-${{ steps.tag.outputs.name }}-bin-win-${{ matrix.build }}.zip .\build\bin\Release\*
800804
801805
- name: Upload artifacts

.github/workflows/server.yml

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -112,9 +112,9 @@ jobs:
112112
-DGGML_OPENMP=OFF ;
113113
cmake --build build --config ${{ matrix.build_type }} -j $(nproc) --target llama-server
114114
115-
- name: Build
116-
id: cmake_build
117-
if: ${{ matrix.sanitizer != 'THREAD' }}
115+
- name: Build (sanitizers)
116+
id: cmake_build_sanitizers
117+
if: ${{ matrix.sanitizer != '' && matrix.sanitizer != 'THREAD' }}
118118
run: |
119119
cmake -B build \
120120
-DGGML_NATIVE=OFF \
@@ -124,12 +124,31 @@ jobs:
124124
-DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON ;
125125
cmake --build build --config ${{ matrix.build_type }} -j $(nproc) --target llama-server
126126
127+
- name: Build (sanitizers)
128+
id: cmake_build
129+
if: ${{ matrix.sanitizer == '' }}
130+
run: |
131+
cmake -B build \
132+
-DGGML_NATIVE=OFF \
133+
-DLLAMA_BUILD_SERVER=ON \
134+
-DLLAMA_CURL=ON \
135+
-DCMAKE_BUILD_TYPE=${{ matrix.build_type }} ;
136+
cmake --build build --config ${{ matrix.build_type }} -j $(nproc) --target llama-server
137+
127138
- name: Tests
128139
id: server_integration_tests
140+
if: ${{ matrix.sanitizer == '' }}
129141
run: |
130142
cd examples/server/tests
131143
./tests.sh
132144
145+
- name: Tests (sanitizers)
146+
id: server_integration_tests_sanitizers
147+
if: ${{ matrix.sanitizer != '' }}
148+
run: |
149+
cd examples/server/tests
150+
LLAMA_SANITIZE=1 ./tests.sh
151+
133152
- name: Slow tests
134153
id: server_integration_tests_slow
135154
if: ${{ (github.event.schedule || github.event.inputs.slow_tests == 'true') && matrix.build_type == 'Release' }}

CMakeLists.txt

Lines changed: 49 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -83,11 +83,8 @@ include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/build-info.cmake)
8383
include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/common.cmake)
8484

8585
# override ggml options
86-
set(GGML_SANITIZE_THREAD ${LLAMA_SANITIZE_THREAD})
87-
set(GGML_SANITIZE_ADDRESS ${LLAMA_SANITIZE_ADDRESS})
88-
set(GGML_SANITIZE_UNDEFINED ${LLAMA_SANITIZE_UNDEFINED})
89-
set(GGML_ALL_WARNINGS ${LLAMA_ALL_WARNINGS})
90-
set(GGML_FATAL_WARNINGS ${LLAMA_FATAL_WARNINGS})
86+
set(GGML_ALL_WARNINGS ${LLAMA_ALL_WARNINGS})
87+
set(GGML_FATAL_WARNINGS ${LLAMA_FATAL_WARNINGS})
9188

9289
# change the default for these ggml options
9390
if (NOT DEFINED GGML_LLAMAFILE)
@@ -117,16 +114,62 @@ llama_option_depr(WARNING LLAMA_SYCL GGML_SYCL)
117114
llama_option_depr(WARNING LLAMA_SYCL_F16 GGML_SYCL_F16)
118115
llama_option_depr(WARNING LLAMA_CANN GGML_CANN)
119116

117+
if (NOT MSVC)
118+
if (LLAMA_SANITIZE_THREAD)
119+
message(STATUS "Using -fsanitize=thread")
120+
121+
add_compile_options(-fsanitize=thread)
122+
link_libraries (-fsanitize=thread)
123+
endif()
124+
125+
if (LLAMA_SANITIZE_ADDRESS)
126+
message(STATUS "Using -fsanitize=address")
127+
128+
add_compile_options(-fsanitize=address -fno-omit-frame-pointer)
129+
link_libraries (-fsanitize=address)
130+
endif()
131+
132+
if (LLAMA_SANITIZE_UNDEFINED)
133+
message(STATUS "Using -fsanitize=undefined")
134+
135+
add_compile_options(-fsanitize=undefined)
136+
link_libraries (-fsanitize=undefined)
137+
endif()
138+
endif()
139+
120140
#
121-
# build the library
141+
# 3rd-party
122142
#
123143

124144
if (NOT TARGET ggml)
125145
add_subdirectory(ggml)
126146
# ... otherwise assume ggml is added by a parent CMakeLists.txt
127147
endif()
148+
149+
#
150+
# build the library
151+
#
152+
128153
add_subdirectory(src)
129154

155+
#
156+
# utils, programs, examples and tests
157+
#
158+
159+
if (LLAMA_BUILD_COMMON)
160+
add_subdirectory(common)
161+
endif()
162+
163+
if (LLAMA_BUILD_COMMON AND LLAMA_BUILD_TESTS AND NOT CMAKE_JS_VERSION)
164+
include(CTest)
165+
add_subdirectory(tests)
166+
endif()
167+
168+
if (LLAMA_BUILD_COMMON AND LLAMA_BUILD_EXAMPLES)
169+
add_subdirectory(examples)
170+
add_subdirectory(pocs)
171+
endif()
172+
130173
#
131174
# install
132175
#
@@ -200,21 +243,3 @@ configure_file(cmake/llama.pc.in
200243

201244
install(FILES "${CMAKE_CURRENT_BINARY_DIR}/llama.pc"
202245
DESTINATION lib/pkgconfig)
203-
204-
#
205-
# utils, programs, examples and tests
206-
#
207-
208-
if (LLAMA_BUILD_COMMON)
209-
add_subdirectory(common)
210-
endif()
211-
212-
if (LLAMA_BUILD_COMMON AND LLAMA_BUILD_TESTS AND NOT CMAKE_JS_VERSION)
213-
include(CTest)
214-
add_subdirectory(tests)
215-
endif()
216-
217-
if (LLAMA_BUILD_COMMON AND LLAMA_BUILD_EXAMPLES)
218-
add_subdirectory(examples)
219-
add_subdirectory(pocs)
220-
endif()

Makefile

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1361,7 +1361,9 @@ llama-server: \
13611361
examples/server/httplib.h \
13621362
examples/server/index.html.hpp \
13631363
examples/server/loading.html.hpp \
1364+
common/chat-template.hpp \
13641365
common/json.hpp \
1366+
common/minja.hpp \
13651367
$(OBJ_ALL)
13661368
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
13671369
$(CXX) $(CXXFLAGS) $(filter-out %.h %.hpp $<,$^) -Iexamples/server $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) $(LWINSOCK2)

cmake/build-info.cmake

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ if(MSVC)
4444
set(BUILD_TARGET ${CMAKE_VS_PLATFORM_NAME})
4545
else()
4646
execute_process(
47-
COMMAND sh -c "$@ --version | head -1" _ ${CMAKE_C_COMPILER}
47+
COMMAND sh -c "\"$@\" --version | head -1" _ ${CMAKE_C_COMPILER}
4848
OUTPUT_VARIABLE OUT
4949
OUTPUT_STRIP_TRAILING_WHITESPACE
5050
)

common/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ add_library(${TARGET} STATIC
5656
arg.cpp
5757
arg.h
5858
base64.hpp
59+
chat-template.hpp
5960
common.cpp
6061
common.h
6162
console.cpp
@@ -64,6 +65,7 @@ add_library(${TARGET} STATIC
6465
json.hpp
6566
log.cpp
6667
log.h
68+
minja.hpp
6769
ngram-cache.cpp
6870
ngram-cache.h
6971
sampling.cpp

common/arg.cpp

Lines changed: 55 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -133,7 +133,8 @@ static void common_params_handle_model_default(
133133
const std::string & model_url,
134134
std::string & hf_repo,
135135
std::string & hf_file,
136-
const std::string & hf_token) {
136+
const std::string & hf_token,
137+
const std::string & model_default) {
137138
if (!hf_repo.empty()) {
138139
// short-hand to avoid specifying --hf-file -> default it to --model
139140
if (hf_file.empty()) {
@@ -163,7 +164,7 @@ static void common_params_handle_model_default(
163164
model = fs_get_cache_file(string_split<std::string>(f, '/').back());
164165
}
165166
} else if (model.empty()) {
166-
model = DEFAULT_MODEL_PATH;
167+
model = model_default;
167168
}
168169
}
169170

@@ -299,8 +300,9 @@ static bool common_params_parse_ex(int argc, char ** argv, common_params_context
299300
}
300301

301302
// TODO: refactor model params in a common struct
302-
common_params_handle_model_default(params.model, params.model_url, params.hf_repo, params.hf_file, params.hf_token);
303-
common_params_handle_model_default(params.vocoder.model, params.vocoder.model_url, params.vocoder.hf_repo, params.vocoder.hf_file, params.hf_token);
303+
common_params_handle_model_default(params.model, params.model_url, params.hf_repo, params.hf_file, params.hf_token, DEFAULT_MODEL_PATH);
304+
common_params_handle_model_default(params.speculative.model, params.speculative.model_url, params.speculative.hf_repo, params.speculative.hf_file, params.hf_token, "");
305+
common_params_handle_model_default(params.vocoder.model, params.vocoder.model_url, params.vocoder.hf_repo, params.vocoder.hf_file, params.hf_token, "");
304306

305307
if (params.escape) {
306308
string_process_escapes(params.prompt);
@@ -323,6 +325,14 @@ static bool common_params_parse_ex(int argc, char ** argv, common_params_context
323325
throw std::invalid_argument("error: either --embedding or --reranking can be specified, but not both");
324326
}
325327

328+
if (!params.chat_template.empty() && !common_chat_verify_template(params.chat_template, params.use_jinja)) {
329+
throw std::runtime_error(string_format(
330+
"error: the supplied chat template is not supported: %s%s\n",
331+
params.chat_template.c_str(),
332+
params.use_jinja ? "" : "\nnote: llama.cpp was started without --jinja, we only support commonly used templates"
333+
));
334+
}
335+
326336
return true;
327337
}
328338

@@ -1629,6 +1639,13 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
16291639
params.hf_repo = value;
16301640
}
16311641
).set_env("LLAMA_ARG_HF_REPO"));
1642+
add_opt(common_arg(
1643+
{"-hfd", "-hfrd", "--hf-repo-draft"}, "<user>/<model>[:quant]",
1644+
"Same as --hf-repo, but for the draft model (default: unused)",
1645+
[](common_params & params, const std::string & value) {
1646+
params.speculative.hf_repo = value;
1647+
}
1648+
).set_env("LLAMA_ARG_HFD_REPO"));
16321649
add_opt(common_arg(
16331650
{"-hff", "--hf-file"}, "FILE",
16341651
"Hugging Face model file. If specified, it will override the quant in --hf-repo (default: unused)",
@@ -1938,24 +1955,44 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
19381955
}
19391956
}
19401957
).set_examples({LLAMA_EXAMPLE_SERVER}));
1958+
add_opt(common_arg(
1959+
{"--jinja"},
1960+
"use jinja template for chat (default: disabled)",
1961+
[](common_params & params) {
1962+
params.use_jinja = true;
1963+
}
1964+
).set_examples({LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_MAIN}).set_env("LLAMA_ARG_JINJA"));
19411965
add_opt(common_arg(
19421966
{"--chat-template"}, "JINJA_TEMPLATE",
19431967
string_format(
19441968
"set custom jinja chat template (default: template taken from model's metadata)\n"
19451969
"if suffix/prefix are specified, template will be disabled\n"
1970+
"only commonly used templates are accepted (unless --jinja is set before this flag):\n"
19461971
"list of built-in templates:\n%s", list_builtin_chat_templates().c_str()
19471972
),
19481973
[](common_params & params, const std::string & value) {
1949-
if (!common_chat_verify_template(value)) {
1950-
throw std::runtime_error(string_format(
1951-
"error: the supplied chat template is not supported: %s\n"
1952-
"note: llama.cpp does not use jinja parser, we only support commonly used templates\n",
1953-
value.c_str()
1954-
));
1955-
}
19561974
params.chat_template = value;
19571975
}
19581976
).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_CHAT_TEMPLATE"));
1977+
add_opt(common_arg(
1978+
{"--chat-template-file"}, "JINJA_TEMPLATE_FILE",
1979+
string_format(
1980+
"set custom jinja chat template file (default: template taken from model's metadata)\n"
1981+
"if suffix/prefix are specified, template will be disabled\n"
1982+
"only commonly used templates are accepted (unless --jinja is set before this flag):\n"
1983+
"list of built-in templates:\n%s", list_builtin_chat_templates().c_str()
1984+
),
1985+
[](common_params & params, const std::string & value) {
1986+
std::ifstream file(value);
1987+
if (!file) {
1988+
throw std::runtime_error(string_format("error: failed to open file '%s'\n", value.c_str()));
1989+
}
1990+
std::copy(
1991+
std::istreambuf_iterator<char>(file),
1992+
std::istreambuf_iterator<char>(),
1993+
std::back_inserter(params.chat_template));
1994+
}
1995+
).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_CHAT_TEMPLATE_FILE"));
19591996
add_opt(common_arg(
19601997
{"-sps", "--slot-prompt-similarity"}, "SIMILARITY",
19611998
string_format("how much the prompt of a request must match the prompt of a slot in order to use that slot (default: %.2f, 0.0 = disabled)\n", params.slot_prompt_similarity),
@@ -2254,6 +2291,13 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
22542291
params.vocoder.model = value;
22552292
}
22562293
).set_examples({LLAMA_EXAMPLE_TTS, LLAMA_EXAMPLE_SERVER}));
2294+
add_opt(common_arg(
2295+
{"--tts-use-guide-tokens"},
2296+
"Use guide tokens to improve TTS word recall",
2297+
[](common_params & params) {
2298+
params.vocoder.use_guide_tokens = true;
2299+
}
2300+
).set_examples({LLAMA_EXAMPLE_TTS, LLAMA_EXAMPLE_SERVER}));
22572301

22582302
// model-specific
22592303
add_opt(common_arg(

0 commit comments

Comments
 (0)