Skip to content

Commit 6ae1438

Browse files
authored
Merge branch 'ikawrakow:main' into main
2 parents 5f937c9 + 95780cd commit 6ae1438

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

64 files changed

+115458
-8965
lines changed

common/CMakeLists.txt

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -65,15 +65,12 @@ add_library(${TARGET} STATIC
6565
console.cpp
6666
grammar-parser.h
6767
grammar-parser.cpp
68-
json.hpp
6968
json-partial.h
7069
json-partial.cpp
7170
llguidance.cpp
7271
json-schema-to-grammar.cpp
7372
train.h
7473
train.cpp
75-
minja/chat-template.hpp
76-
minja/minja.hpp
7774
ngram-cache.h
7875
ngram-cache.cpp
7976
speculative.cpp
@@ -123,6 +120,6 @@ if (LLAMA_LLGUIDANCE)
123120
set(LLAMA_COMMON_EXTRA_LIBS ${LLAMA_COMMON_EXTRA_LIBS} llguidance)
124121
endif ()
125122

126-
target_include_directories(${TARGET} PUBLIC .)
127-
target_compile_features (${TARGET} PUBLIC cxx_std_11)
123+
target_include_directories(${TARGET} PUBLIC . ../vendor)
124+
target_compile_features (${TARGET} PUBLIC cxx_std_17)
128125
target_link_libraries (${TARGET} PRIVATE ${LLAMA_COMMON_EXTRA_LIBS} PUBLIC llama Threads::Threads)

common/chat-parser.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22

33
#include "chat.h"
44
#include "json-partial.h"
5-
#include "json.hpp"
65
#include "regex-partial.h"
76

87
#include <optional>

common/chat.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -489,11 +489,12 @@ std::string common_chat_format_single(
489489
return ss.str();
490490
}
491491

492-
std::string common_chat_format_example(const struct common_chat_templates * tmpls, bool use_jinja) {
492+
std::string common_chat_format_example(const struct common_chat_templates * tmpls, bool use_jinja, const std::map<std::string, std::string> & chat_template_kwargs) {
493493
common_chat_templates_inputs inputs;
494494
inputs.use_jinja = use_jinja;
495495
inputs.add_bos = tmpls->add_bos;
496496
inputs.add_eos = tmpls->add_eos;
497+
inputs.chat_template_kwargs = chat_template_kwargs;
497498
auto add_simple_msg = [&](auto role, auto content) {
498499
common_chat_msg msg;
499500
msg.role = role;

common/chat.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -188,7 +188,8 @@ std::string common_chat_format_single(
188188
// Returns an example of formatted chat
189189
std::string common_chat_format_example(
190190
const struct common_chat_templates * tmpls,
191-
bool use_jinja);
191+
bool use_jinja,
192+
const std::map<std::string, std::string> & chat_template_kwargs);
192193

193194
const char* common_chat_format_name(common_chat_format format);
194195
const char* common_reasoning_format_name(common_reasoning_format format);

common/common.cpp

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@
1212
#include "common.h"
1313
// Change JSON_ASSERT from assert() to GGML_ASSERT:
1414
#define JSON_ASSERT GGML_ASSERT
15-
#include "json.hpp"
1615
#include "llama-vocab.h"
1716
#include "llama.h"
1817
#include "chat.h"
@@ -899,7 +898,16 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
899898
}
900899
if (arg == "--mmproj") {
901900
CHECK_ARG
902-
params.mmproj = argv[i];
901+
params.mmproj.path = argv[i];
902+
return true;
903+
}
904+
if (arg == "--mmproj-url") {
905+
CHECK_ARG
906+
params.mmproj.url = argv[i];
907+
return true;
908+
}
909+
if (arg == "--no-mmproj-offload") {
910+
params.mmproj_use_gpu = false;
903911
return true;
904912
}
905913
if (arg == "--image") {

common/common.h

Lines changed: 35 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,29 @@ struct llama_control_vector_load_info;
6868
int32_t cpu_get_num_physical_cores();
6969
int32_t cpu_get_num_math();
7070

71+
enum llama_example {
72+
LLAMA_EXAMPLE_COMMON,
73+
LLAMA_EXAMPLE_SPECULATIVE,
74+
LLAMA_EXAMPLE_MAIN,
75+
LLAMA_EXAMPLE_EMBEDDING,
76+
LLAMA_EXAMPLE_PERPLEXITY,
77+
LLAMA_EXAMPLE_RETRIEVAL,
78+
LLAMA_EXAMPLE_PASSKEY,
79+
LLAMA_EXAMPLE_IMATRIX,
80+
LLAMA_EXAMPLE_BENCH,
81+
LLAMA_EXAMPLE_SERVER,
82+
LLAMA_EXAMPLE_CVECTOR_GENERATOR,
83+
LLAMA_EXAMPLE_EXPORT_LORA,
84+
LLAMA_EXAMPLE_MTMD,
85+
LLAMA_EXAMPLE_LOOKUP,
86+
LLAMA_EXAMPLE_PARALLEL,
87+
LLAMA_EXAMPLE_TTS,
88+
LLAMA_EXAMPLE_DIFFUSION,
89+
LLAMA_EXAMPLE_FINETUNE,
90+
91+
LLAMA_EXAMPLE_COUNT,
92+
};
93+
7194
//
7295
// CLI argument parsing
7396
//
@@ -86,6 +109,14 @@ enum common_reasoning_format {
86109
COMMON_REASONING_FORMAT_DEEPSEEK, // Extract thinking tag contents and return as `message.reasoning_content`, including in streaming deltas.
87110
};
88111

112+
struct model_paths {
113+
std::string path = ""; // model local path // NOLINT
114+
std::string url = ""; // model url to download // NOLINT
115+
std::string hf_repo = ""; // HF repo // NOLINT
116+
std::string hf_file = ""; // HF file // NOLINT
117+
std::string docker_repo = ""; // Docker repo // NOLINT
118+
};
119+
89120
struct gpt_params {
90121
uint32_t seed = LLAMA_DEFAULT_SEED; // RNG seed
91122

@@ -230,8 +261,10 @@ struct gpt_params {
230261
std::string cache_type_k_draft = ""; // KV cache data type for K for the draft model
231262
std::string cache_type_v_draft = ""; // KV cache data type for V for the draft model
232263

233-
// multimodal models (see examples/llava)
234-
std::string mmproj = ""; // path to multimodal projector
264+
// multimodal models (see examples/mtmd)
265+
model_paths mmproj;
266+
bool mmproj_use_gpu = true; // use GPU for multimodal model
267+
bool no_mmproj = false; // explicitly disable multimodal model
235268
std::vector<std::string> image; // path to image file(s)
236269

237270
// embedding

common/json-partial.cpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,6 @@
33
#include "log.h"
44
#include <string>
55

6-
#include <json.hpp>
7-
86
using json = nlohmann::ordered_json;
97

108
enum common_json_stack_element_type {

common/json-partial.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
#pragma once
2-
#include <json.hpp>
2+
#include <nlohmann/json.hpp>
33

44
// Healing marker (empty if the JSON was fully parsed / wasn't healed).
55
struct common_healing_marker {

common/json-schema-to-grammar.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
#include "ggml.h"
44
// Change JSON_ASSERT from assert() to GGML_ASSERT:
55
#define JSON_ASSERT GGML_ASSERT
6-
#include "json.hpp"
6+
#include <nlohmann/json.hpp>
77

88
std::string json_schema_to_grammar(const nlohmann::ordered_json & schema,
99
bool force_gbnf = false);

common/sampling.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
#include "llama-vocab.h"
44
#include "common.h"
55
#include <random>
6-
#include "json.hpp"
6+
#include <nlohmann/json.hpp>
77
using json = nlohmann::ordered_json;
88

99
struct llama_sampling_context * llama_sampling_init(const struct llama_vocab* vocab, const struct llama_sampling_params & params) {

0 commit comments

Comments
 (0)