Skip to content

Commit 41ba05a

Browse files
committed
Latest commits up to T5 support
1 parent bea077f commit 41ba05a

File tree

5 files changed

+1096
-99
lines changed

5 files changed

+1096
-99
lines changed

base/common.cpp

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1175,7 +1175,25 @@ std::tuple<struct llama_model *, struct llama_context *> llama_init_from_gpt_par
11751175
{
11761176
printf("warming up the model with an empty run\n");
11771177

1178-
std::vector<llama_token> tmp = { llama_token_bos(model), llama_token_eos(model), };
1178+
//std::vector<llama_token> tmp = { llama_token_bos(model), llama_token_eos(model), };
1179+
std::vector<llama_token> tmp;
1180+
llama_token bos = llama_token_bos(model);
1181+
llama_token eos = llama_token_eos(model);
1182+
// some models (e.g. T5) don't have a BOS token
1183+
if (bos != -1) {
1184+
tmp.push_back(bos);
1185+
}
1186+
tmp.push_back(eos);
1187+
1188+
if (llama_model_has_encoder(model)) {
1189+
llama_encode(lctx, llama_batch_get_one(tmp.data(), tmp.size(), 0, 0));
1190+
llama_token decoder_start_token_id = llama_model_decoder_start_token(model);
1191+
if (decoder_start_token_id == -1) {
1192+
decoder_start_token_id = bos;
1193+
}
1194+
tmp.clear();
1195+
tmp.push_back(decoder_start_token_id);
1196+
}
11791197
llama_decode(lctx, llama_batch_get_one(tmp.data(), std::min(tmp.size(), (size_t) params.n_batch), 0, 0));
11801198
llama_kv_cache_clear(lctx);
11811199
llama_synchronize(lctx);

base/ggml.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5321,7 +5321,7 @@ void ggml_mul_mat_set_prec(
53215321
as -> [cols, rows, n_expert]
53225322
ids -> [n_experts_used, n_tokens] (i32)
53235323
b -> [cols, n_expert_used, n_tokens]
5324-
c -> [cols, n_expert_used, n_tokens]
5324+
c -> [rows, n_expert_used, n_tokens]
53255325

53265326
in b, n_experts_used can be broadcasted to match the n_expert_used of ids
53275327

0 commit comments

Comments
 (0)