Skip to content

Commit 2d10c89

Browse files
authored
Merge pull request #69 from l3utterfly/merge
merge from upstream
2 parents 8e70dd9 + 7c054e2 commit 2d10c89

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

50 files changed

+5222
-3834
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,7 @@ Instructions for adding support for new models: [HOWTO-add-model.md](docs/develo
130130
<details>
131131
<summary>Bindings</summary>
132132

133+
- Python: [ddh0/easy-llama](https://github.com/ddh0/easy-llama)
133134
- Python: [abetlen/llama-cpp-python](https://github.com/abetlen/llama-cpp-python)
134135
- Go: [go-skynet/go-llama.cpp](https://github.com/go-skynet/go-llama.cpp)
135136
- Node.js: [withcatai/node-llama-cpp](https://github.com/withcatai/node-llama-cpp)

common/arg.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2869,6 +2869,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
28692869
"(default: deepseek)",
28702870
[](common_params & params, const std::string & value) {
28712871
/**/ if (value == "deepseek") { params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK; }
2872+
else if (value == "deepseek-legacy") { params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK_LEGACY; }
28722873
else if (value == "none") { params.reasoning_format = COMMON_REASONING_FORMAT_NONE; }
28732874
else { throw std::invalid_argument("invalid value"); }
28742875
}

common/chat.cpp

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -82,10 +82,10 @@ json common_chat_msg::to_json_oaicompat() const
8282

8383
std::vector<common_chat_msg_diff> common_chat_msg_diff::compute_diffs(const common_chat_msg & previous_msg, const common_chat_msg & new_msg) {
8484
std::vector<common_chat_msg_diff> diffs;
85-
// if (previous_msg.reasoning_content != current.reasoning_content) {
86-
// auto & diff = diffs.emplace_back();
87-
// diff.reasoning_content_delta = string_diff(previous_msg.reasoning_content, current.reasoning_content);
88-
// }
85+
if (previous_msg.reasoning_content != new_msg.reasoning_content) {
86+
auto & diff = diffs.emplace_back();
87+
diff.reasoning_content_delta = string_diff(previous_msg.reasoning_content, new_msg.reasoning_content);
88+
}
8989
if (previous_msg.content != new_msg.content) {
9090
auto & diff = diffs.emplace_back();
9191
diff.content_delta = string_diff(previous_msg.content, new_msg.content);
@@ -385,9 +385,9 @@ json common_chat_tools_to_json_oaicompat(const std::vector<common_chat_tool> & t
385385

386386
template <> json common_chat_msg_diff_to_json_oaicompat(const common_chat_msg_diff & diff) {
387387
json delta = json::object();
388-
// if (!diff.reasoning_content_delta.empty()) {
389-
// delta["reasoning_content"] = msg.reasoning_content;
390-
// }
388+
if (!diff.reasoning_content_delta.empty()) {
389+
delta["reasoning_content"] = diff.reasoning_content_delta;
390+
}
391391
if (!diff.content_delta.empty()) {
392392
delta["content"] = diff.content_delta;
393393
}
@@ -598,6 +598,7 @@ const char * common_reasoning_format_name(common_reasoning_format format) {
598598
switch (format) {
599599
case COMMON_REASONING_FORMAT_NONE: return "none";
600600
case COMMON_REASONING_FORMAT_DEEPSEEK: return "deepseek";
601+
case COMMON_REASONING_FORMAT_DEEPSEEK_LEGACY: return "deepseek-legacy";
601602
default:
602603
throw std::runtime_error("Unknown reasoning format");
603604
}

common/chat.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ struct common_chat_msg {
7070
};
7171

7272
struct common_chat_msg_diff {
73-
// std::string reasoning_content_delta;
73+
std::string reasoning_content_delta;
7474
std::string content_delta;
7575
size_t tool_call_index = std::string::npos;
7676
common_chat_tool_call tool_call_delta;

common/common.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -215,7 +215,8 @@ struct common_params_vocoder {
215215

216216
enum common_reasoning_format {
217217
COMMON_REASONING_FORMAT_NONE,
218-
COMMON_REASONING_FORMAT_DEEPSEEK, // Extract thinking tag contents and return as `message.reasoning_content`
218+
COMMON_REASONING_FORMAT_DEEPSEEK_LEGACY, // Extract thinking tag contents and return as `message.reasoning_content`, or leave inline in <think> tags in stream mode
219+
COMMON_REASONING_FORMAT_DEEPSEEK, // Extract thinking tag contents and return as `message.reasoning_content`, including in streaming deltas.
219220
};
220221

221222
struct common_params {

convert_hf_to_gguf.py

Lines changed: 27 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -3814,7 +3814,7 @@ def _xlmroberta_set_vocab(self) -> None:
38143814
remove_whitespaces = tokenizer.clean_up_tokenization_spaces
38153815
precompiled_charsmap = b64decode(tokenizer_json["normalizer"]["precompiled_charsmap"])
38163816

3817-
vocab_size = self.hparams.get("vocab_size", tokenizer.vocab_size)
3817+
vocab_size = max(self.hparams.get("vocab_size", 0), tokenizer.vocab_size)
38183818
else:
38193819
sentencepiece_model = model.ModelProto() # pyright: ignore[reportAttributeAccessIssue]
38203820
sentencepiece_model.ParseFromString(open(tokenizer_path, "rb").read())
@@ -3827,7 +3827,7 @@ def _xlmroberta_set_vocab(self) -> None:
38273827
tokenizer = SentencePieceProcessor()
38283828
tokenizer.LoadFromFile(str(tokenizer_path))
38293829

3830-
vocab_size = self.hparams.get('vocab_size', tokenizer.vocab_size())
3830+
vocab_size = max(self.hparams.get("vocab_size", 0), tokenizer.vocab_size())
38313831

38323832
tokens: list[bytes] = [f"[PAD{i}]".encode("utf-8") for i in range(vocab_size)]
38333833
scores: list[float] = [-10000.0] * vocab_size
@@ -3857,33 +3857,26 @@ def _xlmroberta_set_vocab(self) -> None:
38573857
unk_token = tokenizer_config_json.get("unk_token")
38583858
unk_token_id = added_vocab.get(unk_token, tokenizer_json["model"].get("unk_id", 3))
38593859

3860-
for token_id in range(vocab_size):
3860+
for token_id in range(tokenizer.vocab_size):
38613861
piece = tokenizer._convert_id_to_token(token_id)
3862-
text = piece.encode("utf-8")
3863-
score = tokenizer_json["model"]["vocab"][token_id][1]
3864-
3865-
toktype = SentencePieceTokenTypes.NORMAL
3866-
if token_id == unk_token_id:
3867-
toktype = SentencePieceTokenTypes.UNKNOWN
3868-
elif token_id in tokenizer.all_special_ids:
3869-
toktype = SentencePieceTokenTypes.CONTROL
3870-
elif token_id in added_vocab.values():
3871-
toktype = SentencePieceTokenTypes.USER_DEFINED
3872-
# No reliable way to detect this, but jina doesn't have any
3873-
# elif tokenizer.IsByte(token_id):
3874-
# toktype = SentencePieceTokenTypes.BYTE
3875-
3876-
tokens[token_id] = text
3877-
scores[token_id] = score
3878-
toktypes[token_id] = toktype
3879-
3880-
if vocab_size > len(tokens):
3881-
pad_count = vocab_size - len(tokens)
3882-
logger.debug(f"Padding vocab with {pad_count} token(s) - [PAD1] through [PAD{pad_count}]")
3883-
for i in range(1, pad_count + 1):
3884-
tokens.append(bytes(f"[PAD{i}]", encoding="utf-8"))
3885-
scores.append(-1000.0)
3886-
toktypes.append(SentencePieceTokenTypes.UNUSED)
3862+
if (piece := tokenizer._convert_id_to_token(token_id)) is not None:
3863+
text = piece.encode("utf-8")
3864+
score = tokenizer_json["model"]["vocab"][token_id][1]
3865+
3866+
toktype = SentencePieceTokenTypes.NORMAL
3867+
if token_id == unk_token_id:
3868+
toktype = SentencePieceTokenTypes.UNKNOWN
3869+
elif token_id in tokenizer.all_special_ids:
3870+
toktype = SentencePieceTokenTypes.CONTROL
3871+
elif token_id in added_vocab.values():
3872+
toktype = SentencePieceTokenTypes.USER_DEFINED
3873+
# No reliable way to detect this, but jina doesn't have any
3874+
# elif tokenizer.IsByte(token_id):
3875+
# toktype = SentencePieceTokenTypes.BYTE
3876+
3877+
tokens[token_id] = text
3878+
scores[token_id] = score
3879+
toktypes[token_id] = toktype
38873880

38883881
if isinstance(tokenizer, SentencePieceProcessor):
38893882
# realign tokens (see HF tokenizer code)
@@ -3896,6 +3889,12 @@ def _xlmroberta_set_vocab(self) -> None:
38963889
SentencePieceTokenTypes.UNKNOWN,
38973890
] + toktypes[3:-1]
38983891

3892+
if self.model_arch == gguf.MODEL_ARCH.NOMIC_BERT_MOE:
3893+
# Add mask token missing from sentencepiece.bpe.model
3894+
tokens[250001] = b'<mask>'
3895+
scores[250001] = 0.0
3896+
toktypes[250001] = SentencePieceTokenTypes.CONTROL
3897+
38993898
self.gguf_writer.add_tokenizer_model("t5")
39003899
self.gguf_writer.add_tokenizer_pre("default")
39013900
self.gguf_writer.add_token_list(tokens)

examples/parallel/parallel.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,7 @@ int main(int argc, char ** argv) {
158158
common_params params;
159159

160160
params.n_predict = 128;
161-
params.n_junk = 0;
161+
params.n_junk = 1;
162162

163163
if (!common_params_parse(argc, argv, params, LLAMA_EXAMPLE_PARALLEL)) {
164164
return 1;
@@ -182,7 +182,7 @@ int main(int argc, char ** argv) {
182182
const bool is_sp_shared = params.is_pp_shared;
183183

184184
// extra text to insert in each client's prompt in order to make it larger
185-
const int32_t n_junk = params.n_junk;
185+
const int32_t n_junk = std::max(1, params.n_junk);
186186

187187
// init llama.cpp
188188
llama_backend_init();

ggml/include/ggml.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2095,9 +2095,6 @@ extern "C" {
20952095
GGML_API struct ggml_tensor * ggml_graph_get_grad (const struct ggml_cgraph * cgraph, const struct ggml_tensor * node);
20962096
GGML_API struct ggml_tensor * ggml_graph_get_grad_acc(const struct ggml_cgraph * cgraph, const struct ggml_tensor * node);
20972097

2098-
GGML_API void ggml_graph_export(const struct ggml_cgraph * cgraph, const char * fname);
2099-
GGML_API struct ggml_cgraph * ggml_graph_import(const char * fname, struct ggml_context ** ctx_data, struct ggml_context ** ctx_eval);
2100-
21012098
// print info and performance information for the graph
21022099
GGML_API void ggml_graph_print(const struct ggml_cgraph * cgraph);
21032100

ggml/src/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -196,6 +196,7 @@ add_library(ggml-base
196196
../include/ggml-opt.h
197197
../include/gguf.h
198198
ggml.c
199+
ggml.cpp
199200
ggml-alloc.c
200201
ggml-backend.cpp
201202
ggml-opt.cpp
@@ -234,6 +235,7 @@ function(ggml_add_backend_library backend)
234235
set_target_properties(${backend} PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${CMAKE_RUNTIME_OUTPUT_DIRECTORY})
235236
target_compile_definitions(${backend} PRIVATE GGML_BACKEND_DL)
236237
add_dependencies(ggml ${backend})
238+
install(TARGETS ${backend} LIBRARY DESTINATION ${CMAKE_INSTALL_BINDIR})
237239
else()
238240
add_library(${backend} ${ARGN})
239241
target_link_libraries(ggml PUBLIC ${backend})

ggml/src/ggml-blas/CMakeLists.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,5 +96,4 @@ else()
9696
message(ERROR "BLAS not found, please refer to "
9797
"https://cmake.org/cmake/help/latest/module/FindBLAS.html#blas-lapack-vendors"
9898
" to set correct GGML_BLAS_VENDOR")
99-
endif()
10099
endif()

0 commit comments

Comments
 (0)