Skip to content

Commit 6baa78f

Browse files
authored
Merge branch 'ggml-org:master' into mradermacher
2 parents 91b6823 + 814f795 commit 6baa78f

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

84 files changed

+2363
-2447
lines changed

.github/workflows/docker.yml

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,10 +36,14 @@ jobs:
3636
matrix:
3737
config:
3838
# Multi-stage build
39-
- { tag: "cpu", dockerfile: ".devops/cpu.Dockerfile", platforms: "linux/amd64,linux/arm64", full: true, light: true, server: true, free_disk_space: false }
39+
# Note: the arm64 images are failing, which prevents the amd64 images from being built
40+
# https://github.com/ggml-org/llama.cpp/issues/11888
41+
#- { tag: "cpu", dockerfile: ".devops/cpu.Dockerfile", platforms: "linux/amd64,linux/arm64", full: true, light: true, server: true, free_disk_space: false }
42+
- { tag: "cpu", dockerfile: ".devops/cpu.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: false }
4043
- { tag: "cuda", dockerfile: ".devops/cuda.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: false }
4144
- { tag: "musa", dockerfile: ".devops/musa.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: true }
42-
- { tag: "intel", dockerfile: ".devops/intel.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: false }
45+
# Note: the intel images are failing due to an out of disk space error
46+
# - { tag: "intel", dockerfile: ".devops/intel.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: false }
4347
- { tag: "vulkan", dockerfile: ".devops/vulkan.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, free_disk_space: false }
4448
# Note: the rocm images are failing due to a compiler error and are disabled until this is fixed to allow the workflow to complete
4549
#- {tag: "rocm", dockerfile: ".devops/rocm.Dockerfile", platforms: "linux/amd64,linux/arm64", full: true, light: true, server: true, free_disk_space: true }

CMakePresets.json

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -38,15 +38,6 @@
3838
}
3939
},
4040

41-
{
42-
"name": "arm64-windows-msvc", "hidden": true,
43-
"architecture": { "value": "arm64", "strategy": "external" },
44-
"toolset": { "value": "host=x64", "strategy": "external" },
45-
"cacheVariables": {
46-
"CMAKE_TOOLCHAIN_FILE": "${sourceDir}/cmake/arm64-windows-msvc.cmake"
47-
}
48-
},
49-
5041
{
5142
"name": "arm64-windows-llvm", "hidden": true,
5243
"architecture": { "value": "arm64", "strategy": "external" },
@@ -73,10 +64,6 @@
7364
{ "name": "arm64-apple-clang-release", "inherits": [ "base", "arm64-apple-clang", "reldbg" ] },
7465
{ "name": "arm64-apple-clang+static-release", "inherits": [ "base", "arm64-apple-clang", "reldbg", "static" ] },
7566

76-
{ "name": "arm64-windows-msvc-debug", "inherits": [ "base", "arm64-windows-msvc", "debug" ] },
77-
{ "name": "arm64-windows-msvc-release", "inherits": [ "base", "arm64-windows-msvc", "reldbg" ] },
78-
{ "name": "arm64-windows-msvc+static-release", "inherits": [ "base", "arm64-windows-msvc", "reldbg", "static" ] },
79-
8067
{ "name": "x64-windows-llvm-debug", "inherits": [ "base", "x64-windows-llvm", "debug" ] },
8168
{ "name": "x64-windows-llvm-release", "inherits": [ "base", "x64-windows-llvm", "release" ] },
8269
{ "name": "x64-windows-llvm-reldbg", "inherits": [ "base", "x64-windows-llvm", "reldbg" ] },

Makefile

Lines changed: 19 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1187,11 +1187,6 @@ llama-cli: tools/main/main.cpp \
11871187
@echo '==== Run ./llama-cli -h for help. ===='
11881188
@echo
11891189

1190-
llama-infill: examples/infill/infill.cpp \
1191-
$(OBJ_ALL)
1192-
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1193-
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1194-
11951190
llama-run: tools/run/run.cpp \
11961191
$(OBJ_ALL)
11971192
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
@@ -1394,36 +1389,36 @@ llama-gen-docs: examples/gen-docs/gen-docs.cpp \
13941389
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
13951390
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
13961391

1397-
libllava.a: tools/llava/llava.cpp \
1398-
tools/llava/llava.h \
1399-
tools/llava/clip.cpp \
1400-
tools/llava/clip.h \
1392+
libllava.a: tools/mtmd/llava.cpp \
1393+
tools/mtmd/llava.h \
1394+
tools/mtmd/clip.cpp \
1395+
tools/mtmd/clip.h \
14011396
common/stb_image.h \
14021397
common/base64.hpp \
14031398
$(OBJ_ALL)
14041399
$(CXX) $(CXXFLAGS) -static -fPIC -c $< -o $@ -Wno-cast-qual
14051400

1406-
llama-llava-cli: tools/llava/llava-cli.cpp \
1407-
tools/llava/llava.cpp \
1408-
tools/llava/llava.h \
1409-
tools/llava/clip.cpp \
1410-
tools/llava/clip.h \
1401+
llama-llava-cli: tools/mtmd/llava-cli.cpp \
1402+
tools/mtmd/llava.cpp \
1403+
tools/mtmd/llava.h \
1404+
tools/mtmd/clip.cpp \
1405+
tools/mtmd/clip.h \
14111406
$(OBJ_ALL)
14121407
$(CXX) $(CXXFLAGS) $< $(filter-out %.h $<,$^) -o $@ $(LDFLAGS) -Wno-cast-qual
14131408

1414-
llama-minicpmv-cli: tools/llava/minicpmv-cli.cpp \
1415-
tools/llava/llava.cpp \
1416-
tools/llava/llava.h \
1417-
tools/llava/clip.cpp \
1418-
tools/llava/clip.h \
1409+
llama-minicpmv-cli: tools/mtmd/minicpmv-cli.cpp \
1410+
tools/mtmd/llava.cpp \
1411+
tools/mtmd/llava.h \
1412+
tools/mtmd/clip.cpp \
1413+
tools/mtmd/clip.h \
14191414
$(OBJ_ALL)
14201415
$(CXX) $(CXXFLAGS) $< $(filter-out %.h $<,$^) -o $@ $(LDFLAGS) -Wno-cast-qual
14211416

1422-
llama-qwen2vl-cli: tools/llava/qwen2vl-cli.cpp \
1423-
tools/llava/llava.cpp \
1424-
tools/llava/llava.h \
1425-
tools/llava/clip.cpp \
1426-
tools/llava/clip.h \
1417+
llama-qwen2vl-cli: tools/mtmd/qwen2vl-cli.cpp \
1418+
tools/mtmd/llava.cpp \
1419+
tools/mtmd/llava.h \
1420+
tools/mtmd/clip.cpp \
1421+
tools/mtmd/clip.h \
14271422
$(OBJ_ALL)
14281423
$(CXX) $(CXXFLAGS) $< $(filter-out %.h $<,$^) -o $@ $(LDFLAGS) -Wno-cast-qual
14291424

common/arg.cpp

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1283,7 +1283,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
12831283
[](common_params & params) {
12841284
params.use_color = true;
12851285
}
1286-
).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_INFILL, LLAMA_EXAMPLE_SPECULATIVE, LLAMA_EXAMPLE_LOOKUP}));
1286+
).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_SPECULATIVE, LLAMA_EXAMPLE_LOOKUP}));
12871287
add_opt(common_arg(
12881288
{"-t", "--threads"}, "N",
12891289
string_format("number of threads to use during generation (default: %d)", params.cpuparams.n_threads),
@@ -1416,7 +1416,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
14161416
add_opt(common_arg(
14171417
{"-n", "--predict", "--n-predict"}, "N",
14181418
string_format(
1419-
ex == LLAMA_EXAMPLE_MAIN || ex == LLAMA_EXAMPLE_INFILL
1419+
ex == LLAMA_EXAMPLE_MAIN
14201420
? "number of tokens to predict (default: %d, -1 = infinity, -2 = until context filled)"
14211421
: "number of tokens to predict (default: %d, -1 = infinity)",
14221422
params.n_predict),
@@ -1655,15 +1655,15 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
16551655
params.input_prefix = value;
16561656
params.enable_chat_template = false;
16571657
}
1658-
).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_INFILL}));
1658+
).set_examples({LLAMA_EXAMPLE_MAIN}));
16591659
add_opt(common_arg(
16601660
{"--in-suffix"}, "STRING",
16611661
"string to suffix after user inputs with (default: empty)",
16621662
[](common_params & params, const std::string & value) {
16631663
params.input_suffix = value;
16641664
params.enable_chat_template = false;
16651665
}
1666-
).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_INFILL}));
1666+
).set_examples({LLAMA_EXAMPLE_MAIN}));
16671667
add_opt(common_arg(
16681668
{"--no-warmup"},
16691669
"skip warming up the model with an empty run",
@@ -1680,7 +1680,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
16801680
[](common_params & params) {
16811681
params.spm_infill = true;
16821682
}
1683-
).set_examples({LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_INFILL}));
1683+
).set_examples({LLAMA_EXAMPLE_SERVER}));
16841684
add_opt(common_arg(
16851685
{"--samplers"}, "SAMPLERS",
16861686
string_format("samplers that will be used for generation in the order, separated by \';\'\n(default: %s)", sampler_type_names.c_str()),
@@ -2211,14 +2211,14 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
22112211
).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_NO_CONT_BATCHING"));
22122212
add_opt(common_arg(
22132213
{"--mmproj"}, "FILE",
2214-
"path to a multimodal projector file. see tools/llava/README.md",
2214+
"path to a multimodal projector file. see tools/mtmd/README.md",
22152215
[](common_params & params, const std::string & value) {
22162216
params.mmproj.path = value;
22172217
}
22182218
).set_examples(mmproj_examples));
22192219
add_opt(common_arg(
22202220
{"--mmproj-url"}, "URL",
2221-
"URL to a multimodal projector file. see tools/llava/README.md",
2221+
"URL to a multimodal projector file. see tools/mtmd/README.md",
22222222
[](common_params & params, const std::string & value) {
22232223
params.mmproj.url = value;
22242224
}
@@ -2892,7 +2892,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
28922892
[](common_params & params) {
28932893
params.simple_io = true;
28942894
}
2895-
).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_INFILL}));
2895+
).set_examples({LLAMA_EXAMPLE_MAIN}));
28962896
add_opt(common_arg(
28972897
{"--positive-file"}, "FNAME",
28982898
string_format("positive prompts file, one prompt per line (default: '%s')", params.cvector_positive_file.c_str()),

common/common.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,6 @@ enum llama_example {
6666
LLAMA_EXAMPLE_COMMON,
6767
LLAMA_EXAMPLE_SPECULATIVE,
6868
LLAMA_EXAMPLE_MAIN,
69-
LLAMA_EXAMPLE_INFILL,
7069
LLAMA_EXAMPLE_EMBEDDING,
7170
LLAMA_EXAMPLE_PERPLEXITY,
7271
LLAMA_EXAMPLE_RETRIEVAL,
@@ -96,6 +95,7 @@ enum common_sampler_type {
9695
COMMON_SAMPLER_TYPE_XTC = 8,
9796
COMMON_SAMPLER_TYPE_INFILL = 9,
9897
COMMON_SAMPLER_TYPE_PENALTIES = 10,
98+
COMMON_SAMPLER_TYPE_TOP_N_SIGMA = 11,
9999
};
100100

101101
// dimensionality reduction methods, used by cvector-generator
@@ -161,6 +161,7 @@ struct common_params_sampling {
161161
std::vector<enum common_sampler_type> samplers = {
162162
COMMON_SAMPLER_TYPE_PENALTIES,
163163
COMMON_SAMPLER_TYPE_DRY,
164+
COMMON_SAMPLER_TYPE_TOP_N_SIGMA,
164165
COMMON_SAMPLER_TYPE_TOP_K,
165166
COMMON_SAMPLER_TYPE_TYPICAL_P,
166167
COMMON_SAMPLER_TYPE_TOP_P,
@@ -340,7 +341,7 @@ struct common_params {
340341

341342
common_conversation_mode conversation_mode = COMMON_CONVERSATION_MODE_AUTO;
342343

343-
// multimodal models (see tools/llava)
344+
// multimodal models (see tools/mtmd)
344345
struct common_params_model mmproj;
345346
bool mmproj_use_gpu = true; // use GPU for multimodal model
346347
bool no_mmproj = false; // explicitly disable multimodal model

0 commit comments

Comments
 (0)