Skip to content

Commit 853f344

Browse files
committed
more cleanup
1 parent 40249dd commit 853f344

File tree

4 files changed

+10
-19
lines changed

4 files changed

+10
-19
lines changed

ggml/src/ggml.c

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3015,7 +3015,6 @@ struct ggml_tensor * ggml_mul_mat(
30153015
struct ggml_context * ctx,
30163016
struct ggml_tensor * a,
30173017
struct ggml_tensor * b) {
3018-
30193018
GGML_ASSERT(ggml_can_mul_mat(a, b));
30203019
GGML_ASSERT(!ggml_is_transposed(a));
30213020

src/llama-model.cpp

Lines changed: 10 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -451,7 +451,6 @@ void llama_model::load_arch(llama_model_loader & ml) {
451451
}
452452

453453
void llama_model::load_hparams(llama_model_loader & ml) {
454-
455454
const gguf_context * ctx = ml.meta.get();
456455

457456
// get metadata as string
@@ -465,7 +464,6 @@ void llama_model::load_hparams(llama_model_loader & ml) {
465464
gguf_kv.emplace(name, value);
466465
}
467466

468-
469467
// get general kv
470468
ml.get_key(LLM_KV_GENERAL_NAME, name, false);
471469

@@ -586,7 +584,6 @@ void llama_model::load_hparams(llama_model_loader & ml) {
586584
}
587585

588586
// arch-specific KVs
589-
LLAMA_LOG_INFO("Switching Arch\n");
590587
switch (arch) {
591588
case LLM_ARCH_LLAMA:
592589
{
@@ -1901,6 +1898,7 @@ void llama_model::load_hparams(llama_model_loader & ml) {
19011898

19021899
void llama_model::load_vocab(llama_model_loader & ml) {
19031900
const auto kv = LLM_KV(arch);
1901+
19041902
vocab.load(ml, kv);
19051903
}
19061904

@@ -2045,7 +2043,7 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
20452043

20462044
auto create_tensor = [&](const LLM_TN_IMPL & tn, const std::initializer_list<int64_t> & ne, int flags) -> ggml_tensor * {
20472045
ggml_tensor * t_meta = ml.get_tensor_meta(tn.str().c_str());
2048-
LLAMA_LOG_INFO("Creating Tensor: %s\n", tn.str().c_str());
2046+
20492047
if (!t_meta) {
20502048
if (flags & TENSOR_NOT_REQUIRED) {
20512049
return nullptr;
@@ -2120,6 +2118,7 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
21202118
}
21212119

21222120
ggml_backend_buffer_type_t buft = nullptr;
2121+
21232122
// check overrides
21242123
if (ml.tensor_buft_overrides) {
21252124
std::string tensor_name = tn.str();
@@ -2167,6 +2166,7 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
21672166
first_moved_to_buft = buft;
21682167
}
21692168
}
2169+
21702170
ggml_context * ctx = ctx_for_buft(buft);
21712171

21722172
// if duplicated, check if the original tensor was allocated in the same buffer type context and avoid creating a new one
@@ -2624,26 +2624,26 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
26242624
case LLM_ARCH_NOMIC_BERT_MOE:
26252625
{
26262626
tok_embd = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, 0);
2627-
tok_norm = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD_NORM, "weight"), {n_embd}, 0);
2628-
26292627
type_embd = create_tensor(tn(LLM_TENSOR_TOKEN_TYPES, "weight"), {n_embd, n_token_types}, TENSOR_NOT_REQUIRED);
2630-
tok_norm_b = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD_NORM, "bias"), {n_embd}, 0);
2631-
26322628

26332629
if (arch == LLM_ARCH_BERT) {
26342630
pos_embd = create_tensor(tn(LLM_TENSOR_POS_EMBD, "weight"), {n_embd, n_ctx_train}, 0);
2631+
26352632
cls = create_tensor(tn(LLM_TENSOR_CLS, "weight"), {n_embd, n_embd}, TENSOR_NOT_REQUIRED);
26362633
cls_b = create_tensor(tn(LLM_TENSOR_CLS, "bias"), {n_embd}, TENSOR_NOT_REQUIRED);
26372634

26382635
cls_out = create_tensor(tn(LLM_TENSOR_CLS_OUT, "weight"), {n_embd, hparams.n_cls_out}, TENSOR_NOT_REQUIRED);
26392636
cls_out_b = create_tensor(tn(LLM_TENSOR_CLS_OUT, "bias"), {hparams.n_cls_out}, TENSOR_NOT_REQUIRED);
26402637
}
26412638

2639+
tok_norm = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD_NORM, "weight"), {n_embd}, 0);
2640+
tok_norm_b = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD_NORM, "bias"), {n_embd}, 0);
2641+
26422642
for (int i = 0; i < n_layer; ++i) {
26432643
auto & layer = layers[i];
26442644

26452645
layer.wqkv = create_tensor(tn(LLM_TENSOR_ATTN_QKV, "weight", i), {n_embd, n_embd + 2*n_embd_gqa}, TENSOR_NOT_REQUIRED);
2646-
2646+
layer.bqkv = create_tensor(tn(LLM_TENSOR_ATTN_QKV, "bias", i), {n_embd + 2*n_embd_gqa}, TENSOR_NOT_REQUIRED);
26472647

26482648
if (!layer.wqkv) {
26492649
layer.wq = create_tensor(tn(LLM_TENSOR_ATTN_Q, "weight", i), {n_embd, n_embd}, 0);
@@ -2657,8 +2657,7 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
26572657
}
26582658

26592659
layer.wo = create_tensor(tn(LLM_TENSOR_ATTN_OUT, "weight", i), {n_embd, n_embd}, 0);
2660-
2661-
layer.bqkv = create_tensor(tn(LLM_TENSOR_ATTN_QKV, "bias", i), {n_embd + 2*n_embd_gqa}, TENSOR_NOT_REQUIRED);
2660+
26622661
layer.attn_out_norm = create_tensor(tn(LLM_TENSOR_ATTN_OUT_NORM, "weight", i), {n_embd}, 0);
26632662
layer.attn_out_norm_b = create_tensor(tn(LLM_TENSOR_ATTN_OUT_NORM, "bias", i), {n_embd}, 0);
26642663

@@ -2668,7 +2667,6 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
26682667
layer.ffn_down_exps = create_tensor(tn(LLM_TENSOR_FFN_DOWN_EXPS, "weight", i), { n_ff, n_embd, n_expert}, 0);
26692668
layer.ffn_gate_inp = create_tensor(tn(LLM_TENSOR_FFN_GATE_INP, "weight", i), {n_embd, n_expert}, 0);
26702669
} else {
2671-
26722670
layer.ffn_up = create_tensor(tn(LLM_TENSOR_FFN_UP, "weight", i), {n_embd, n_ff}, 0);
26732671
layer.ffn_down = create_tensor(tn(LLM_TENSOR_FFN_DOWN, "weight", i), {n_ff, n_embd}, 0);
26742672

@@ -2683,7 +2681,6 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
26832681

26842682
layer.layer_out_norm = create_tensor(tn(LLM_TENSOR_LAYER_OUT_NORM, "weight", i), {n_embd}, 0);
26852683
layer.layer_out_norm_b = create_tensor(tn(LLM_TENSOR_LAYER_OUT_NORM, "bias", i), {n_embd}, 0);
2686-
26872684
}
26882685
} break;
26892686
case LLM_ARCH_MODERN_BERT:
@@ -7549,7 +7546,6 @@ struct llm_build_modern_bert : public llm_graph_context {
75497546
const int64_t n_embd_head = hparams.n_embd_head_v;
75507547
const int64_t n_embd_gqa = hparams.n_embd_v_gqa();
75517548
const int64_t n_tokens = ubatch.n_tokens;
7552-
const int64_t n_ff = hparams.n_ff();
75537549

75547550
GGML_ASSERT(n_embd_head == hparams.n_embd_head_k);
75557551

src/llama-vocab.cpp

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1661,13 +1661,10 @@ struct llama_vocab::impl {
16611661
void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
16621662
struct gguf_context * ctx = ml.meta.get();
16631663

1664-
LLAMA_LOG_INFO("Determining Vocab Type\n");
16651664
// determine vocab type
16661665
{
16671666
ml.get_key(LLM_KV_TOKENIZER_MODEL, tokenizer_model);
16681667
ml.get_key(LLM_KV_TOKENIZER_PRE, tokenizer_pre, false);
1669-
LLAMA_LOG_INFO("pre tokenizer model: %s\n", tokenizer_pre.c_str());
1670-
LLAMA_LOG_INFO("tokenizer model: %s\n", tokenizer_model.c_str());
16711668

16721669
ml.get_key(LLM_KV_TOKENIZER_TOKEN_TYPE_COUNT, n_token_types, false);
16731670

src/llama.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,6 @@ static int llama_model_load(const std::string & fname, std::vector<std::string>
126126
if (!model.load_tensors(ml)) {
127127
return -2;
128128
}
129-
130129
} catch (const std::exception & err) {
131130
LLAMA_LOG_ERROR("%s: error loading model: %s\n", __func__, err.what());
132131
return -1;

0 commit comments

Comments
 (0)