Skip to content

Commit 3b79a68

Browse files
committed
Merge branch 'ggml-org:master' into finelayer
2 parents 0ee5c63 + 7675c55 commit 3b79a68

31 files changed

+3850
-3749
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,7 @@ Instructions for adding support for new models: [HOWTO-add-model.md](docs/develo
130130
<details>
131131
<summary>Bindings</summary>
132132

133+
- Python: [ddh0/easy-llama](https://github.com/ddh0/easy-llama)
133134
- Python: [abetlen/llama-cpp-python](https://github.com/abetlen/llama-cpp-python)
134135
- Go: [go-skynet/go-llama.cpp](https://github.com/go-skynet/go-llama.cpp)
135136
- Node.js: [withcatai/node-llama-cpp](https://github.com/withcatai/node-llama-cpp)

common/arg.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1301,7 +1301,7 @@ common_params_context common_params_parser_init(common_params & params,
13011301
sampler_type_names.pop_back();
13021302

13031303
params.optimize = ggml_opt_get_default_optimizer_params(NULL);
1304-
params.optimize.common.alpha = 1e-8;
1304+
params.optimize.adamw.alpha = 1e-8;
13051305

13061306
/**
13071307
* filter options by example
@@ -2127,15 +2127,15 @@ common_params_context common_params_parser_init(common_params & params,
21272127
{ "-lr", "--learning-rate" }, "ALPHA",
21282128
string_format(
21292129
"adamw or sgd optimizer alpha (default: %.2g); note: sgd alpha recommended ~100x (no momentum)",
2130-
(double) params.optimize.common.alpha),
2131-
[](common_params & params, const std::string & value) { params.optimize.common.alpha = std::stof(value); })
2130+
(double) params.optimize.adamw.alpha),
2131+
[](common_params & params, const std::string & value) { params.optimize.adamw.alpha = std::stof(value); })
21322132
.set_examples({ LLAMA_EXAMPLE_FINETUNE }));
21332133
add_opt(common_arg(
21342134
{ "-wd", "--weight-decay" }, "WD",
21352135
string_format(
21362136
"adamw or sgd optimizer weight decay (0 is off; recommend very small e.g. 1e-9) (default: %.2g).",
2137-
(double) params.optimize.common.wd),
2138-
[](common_params & params, const std::string & value) { params.optimize.common.wd = std::stof(value); })
2137+
(double) params.optimize.adamw.wd),
2138+
[](common_params & params, const std::string & value) { params.optimize.adamw.wd = std::stof(value); })
21392139
.set_examples({ LLAMA_EXAMPLE_FINETUNE }));
21402140
add_opt(common_arg({ "-epochs", "--epochs" }, "N",
21412141
string_format("optimizer max # of epochs (default: %d)", params.epochs),

convert_hf_to_gguf.py

Lines changed: 27 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -3814,7 +3814,7 @@ def _xlmroberta_set_vocab(self) -> None:
38143814
remove_whitespaces = tokenizer.clean_up_tokenization_spaces
38153815
precompiled_charsmap = b64decode(tokenizer_json["normalizer"]["precompiled_charsmap"])
38163816

3817-
vocab_size = self.hparams.get("vocab_size", tokenizer.vocab_size)
3817+
vocab_size = max(self.hparams.get("vocab_size", 0), tokenizer.vocab_size)
38183818
else:
38193819
sentencepiece_model = model.ModelProto() # pyright: ignore[reportAttributeAccessIssue]
38203820
sentencepiece_model.ParseFromString(open(tokenizer_path, "rb").read())
@@ -3827,7 +3827,7 @@ def _xlmroberta_set_vocab(self) -> None:
38273827
tokenizer = SentencePieceProcessor()
38283828
tokenizer.LoadFromFile(str(tokenizer_path))
38293829

3830-
vocab_size = self.hparams.get('vocab_size', tokenizer.vocab_size())
3830+
vocab_size = max(self.hparams.get("vocab_size", 0), tokenizer.vocab_size())
38313831

38323832
tokens: list[bytes] = [f"[PAD{i}]".encode("utf-8") for i in range(vocab_size)]
38333833
scores: list[float] = [-10000.0] * vocab_size
@@ -3857,33 +3857,26 @@ def _xlmroberta_set_vocab(self) -> None:
38573857
unk_token = tokenizer_config_json.get("unk_token")
38583858
unk_token_id = added_vocab.get(unk_token, tokenizer_json["model"].get("unk_id", 3))
38593859

3860-
for token_id in range(vocab_size):
3860+
for token_id in range(tokenizer.vocab_size):
38613861
piece = tokenizer._convert_id_to_token(token_id)
3862-
text = piece.encode("utf-8")
3863-
score = tokenizer_json["model"]["vocab"][token_id][1]
3864-
3865-
toktype = SentencePieceTokenTypes.NORMAL
3866-
if token_id == unk_token_id:
3867-
toktype = SentencePieceTokenTypes.UNKNOWN
3868-
elif token_id in tokenizer.all_special_ids:
3869-
toktype = SentencePieceTokenTypes.CONTROL
3870-
elif token_id in added_vocab.values():
3871-
toktype = SentencePieceTokenTypes.USER_DEFINED
3872-
# No reliable way to detect this, but jina doesn't have any
3873-
# elif tokenizer.IsByte(token_id):
3874-
# toktype = SentencePieceTokenTypes.BYTE
3875-
3876-
tokens[token_id] = text
3877-
scores[token_id] = score
3878-
toktypes[token_id] = toktype
3879-
3880-
if vocab_size > len(tokens):
3881-
pad_count = vocab_size - len(tokens)
3882-
logger.debug(f"Padding vocab with {pad_count} token(s) - [PAD1] through [PAD{pad_count}]")
3883-
for i in range(1, pad_count + 1):
3884-
tokens.append(bytes(f"[PAD{i}]", encoding="utf-8"))
3885-
scores.append(-1000.0)
3886-
toktypes.append(SentencePieceTokenTypes.UNUSED)
3862+
if (piece := tokenizer._convert_id_to_token(token_id)) is not None:
3863+
text = piece.encode("utf-8")
3864+
score = tokenizer_json["model"]["vocab"][token_id][1]
3865+
3866+
toktype = SentencePieceTokenTypes.NORMAL
3867+
if token_id == unk_token_id:
3868+
toktype = SentencePieceTokenTypes.UNKNOWN
3869+
elif token_id in tokenizer.all_special_ids:
3870+
toktype = SentencePieceTokenTypes.CONTROL
3871+
elif token_id in added_vocab.values():
3872+
toktype = SentencePieceTokenTypes.USER_DEFINED
3873+
# No reliable way to detect this, but jina doesn't have any
3874+
# elif tokenizer.IsByte(token_id):
3875+
# toktype = SentencePieceTokenTypes.BYTE
3876+
3877+
tokens[token_id] = text
3878+
scores[token_id] = score
3879+
toktypes[token_id] = toktype
38873880

38883881
if isinstance(tokenizer, SentencePieceProcessor):
38893882
# realign tokens (see HF tokenizer code)
@@ -3896,6 +3889,12 @@ def _xlmroberta_set_vocab(self) -> None:
38963889
SentencePieceTokenTypes.UNKNOWN,
38973890
] + toktypes[3:-1]
38983891

3892+
if self.model_arch == gguf.MODEL_ARCH.NOMIC_BERT_MOE:
3893+
# Add mask token missing from sentencepiece.bpe.model
3894+
tokens[250001] = b'<mask>'
3895+
scores[250001] = 0.0
3896+
toktypes[250001] = SentencePieceTokenTypes.CONTROL
3897+
38993898
self.gguf_writer.add_tokenizer_model("t5")
39003899
self.gguf_writer.add_tokenizer_pre("default")
39013900
self.gguf_writer.add_token_list(tokens)

examples/parallel/parallel.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,7 @@ int main(int argc, char ** argv) {
158158
common_params params;
159159

160160
params.n_predict = 128;
161-
params.n_junk = 0;
161+
params.n_junk = 1;
162162

163163
if (!common_params_parse(argc, argv, params, LLAMA_EXAMPLE_PARALLEL)) {
164164
return 1;
@@ -182,7 +182,7 @@ int main(int argc, char ** argv) {
182182
const bool is_sp_shared = params.is_pp_shared;
183183

184184
// extra text to insert in each client's prompt in order to make it larger
185-
const int32_t n_junk = params.n_junk;
185+
const int32_t n_junk = std::max(1, params.n_junk);
186186

187187
// init llama.cpp
188188
llama_backend_init();

examples/training/finetune.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ int main(int argc, char ** argv) {
6262
struct ggml_opt_optimizer_params & optimizer_params = params.optimize;
6363

6464
LOG_INF("-optimizer %s -lr %.2g -wd %.2g -epochs %d\n", ggml_opt_optimizer_name(optimizer_params.optimizer),
65-
(double) optimizer_params.common.alpha, (double) optimizer_params.common.wd, params.epochs);
65+
(double) optimizer_params.adamw.alpha, (double) optimizer_params.adamw.wd, params.epochs);
6666

6767
struct llama_opt_params lopt_params {
6868
/*n_ctx_train =*/ 0,

ggml/include/ggml-opt.h

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -90,13 +90,10 @@ extern "C" {
9090
// SGD and AdamW optimizer parameters
9191
struct {
9292
float alpha; // learning rate
93-
float wd; // weight decay for SGD or AdamW, use 0.0f to disable
94-
} common;
95-
96-
struct {
9793
float beta1; // adamw
9894
float beta2; // adamw
9995
float eps; // epsilon for numerical stability
96+
float wd; // weight decay for SGD or AdamW, use 0.0f to disable
10097
} adamw;
10198

10299
// only GGML_OPT_OPTIMIZER_ADAMW allocates m, v per parameter

ggml/include/ggml.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2104,9 +2104,6 @@ extern "C" {
21042104
GGML_API struct ggml_tensor * ggml_graph_get_grad (const struct ggml_cgraph * cgraph, const struct ggml_tensor * node);
21052105
GGML_API struct ggml_tensor * ggml_graph_get_grad_acc(const struct ggml_cgraph * cgraph, const struct ggml_tensor * node);
21062106

2107-
GGML_API void ggml_graph_export(const struct ggml_cgraph * cgraph, const char * fname);
2108-
GGML_API struct ggml_cgraph * ggml_graph_import(const char * fname, struct ggml_context ** ctx_data, struct ggml_context ** ctx_eval);
2109-
21102107
// print info and performance information for the graph
21112108
GGML_API void ggml_graph_print(const struct ggml_cgraph * cgraph);
21122109

ggml/src/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -196,6 +196,7 @@ add_library(ggml-base
196196
../include/ggml-opt.h
197197
../include/gguf.h
198198
ggml.c
199+
ggml.cpp
199200
ggml-alloc.c
200201
ggml-backend.cpp
201202
ggml-opt.cpp
@@ -226,6 +227,7 @@ function(ggml_add_backend_library backend)
226227
set_target_properties(${backend} PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${CMAKE_RUNTIME_OUTPUT_DIRECTORY})
227228
target_compile_definitions(${backend} PRIVATE GGML_BACKEND_DL)
228229
add_dependencies(ggml ${backend})
230+
install(TARGETS ${backend} LIBRARY DESTINATION ${CMAKE_INSTALL_BINDIR})
229231
else()
230232
add_library(${backend} ${ARGN})
231233
target_link_libraries(ggml PUBLIC ${backend})

ggml/src/ggml-blas/CMakeLists.txt

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ if (BLAS_FOUND)
8181
target_link_libraries (ggml-blas PRIVATE ${BLAS_LIBRARIES})
8282
target_include_directories(ggml-blas PRIVATE ${BLAS_INCLUDE_DIRS})
8383
else()
84-
message(ERROR "BLAS not found, please refer to "
85-
"https://cmake.org/cmake/help/latest/module/FindBLAS.html#blas-lapack-vendors"
86-
" to set correct GGML_BLAS_VENDOR")
84+
message(FATAL_ERROR "BLAS not found, please refer to "
85+
"https://cmake.org/cmake/help/latest/module/FindBLAS.html#blas-lapack-vendors"
86+
" to set correct GGML_BLAS_VENDOR")
8787
endif()

ggml/src/ggml-impl.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,8 @@
3232
extern "C" {
3333
#endif
3434

35+
void ggml_print_backtrace(void);
36+
3537
#ifndef MIN
3638
# define MIN(a, b) ((a) < (b) ? (a) : (b))
3739
#endif

0 commit comments

Comments
 (0)