Skip to content

Commit 1d9f1f2

Browse files
committed
cont
ggml-ci
1 parent 1586ed5 commit 1d9f1f2

40 files changed

+134
-96
lines changed

common/common.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -886,7 +886,7 @@ struct common_init_result common_init_from_params(common_params & params) {
886886

887887
auto cparams = common_context_params_to_llama(params);
888888

889-
llama_context * lctx = llama_new_context_with_model(model, cparams);
889+
llama_context * lctx = llama_init_from_model(model, cparams);
890890
if (lctx == NULL) {
891891
LOG_ERR("%s: failed to create context with model '%s'\n", __func__, params.model.c_str());
892892
llama_model_free(model);
@@ -900,7 +900,7 @@ struct common_init_result common_init_from_params(common_params & params) {
900900

901901
if (!params.control_vectors.empty()) {
902902
if (params.control_vector_layer_start <= 0) params.control_vector_layer_start = 1;
903-
if (params.control_vector_layer_end <= 0) params.control_vector_layer_end = llama_n_layer(model);
903+
if (params.control_vector_layer_end <= 0) params.control_vector_layer_end = llama_model_n_layer(model);
904904

905905
const auto cvec = common_control_vector_load(params.control_vectors);
906906
if (cvec.n_embd == -1) {
@@ -949,7 +949,7 @@ struct common_init_result common_init_from_params(common_params & params) {
949949
}
950950

951951
if (params.sampling.ignore_eos) {
952-
for (llama_token i = 0; i < llama_n_vocab(vocab); i++) {
952+
for (llama_token i = 0; i < llama_vocab_n_vocab(vocab); i++) {
953953
if (llama_token_is_eog(vocab, i)) {
954954
LOG_INF("%s: added %s logit bias = %f\n", __func__, common_token_to_piece(lctx, i).c_str(), -INFINITY);
955955
params.sampling.logit_bias.push_back({i, -INFINITY});

common/sampling.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ struct common_sampler {
116116
const llama_model * model = llama_get_model(ctx);
117117
const llama_vocab * vocab = llama_model_get_vocab(model);
118118

119-
const int n_vocab = llama_n_vocab(vocab);
119+
const int n_vocab = llama_vocab_n_vocab(vocab);
120120

121121
cur.resize(n_vocab);
122122

@@ -162,7 +162,7 @@ struct common_sampler * common_sampler_init(const struct llama_model * model, co
162162

163163
llama_sampler_chain_add(result->chain,
164164
llama_sampler_init_logit_bias(
165-
llama_n_vocab(vocab),
165+
llama_vocab_n_vocab(vocab),
166166
params.logit_bias.size(),
167167
params.logit_bias.data()));
168168

@@ -177,7 +177,7 @@ struct common_sampler * common_sampler_init(const struct llama_model * model, co
177177
c_breakers.push_back(str.c_str());
178178
}
179179

180-
llama_sampler_chain_add(result->chain, llama_sampler_init_dry (vocab, llama_n_ctx_train(model), params.dry_multiplier, params.dry_base, params.dry_allowed_length, params.dry_penalty_last_n, c_breakers.data(), c_breakers.size()));
180+
llama_sampler_chain_add(result->chain, llama_sampler_init_dry (vocab, llama_model_n_ctx_train(model), params.dry_multiplier, params.dry_base, params.dry_allowed_length, params.dry_penalty_last_n, c_breakers.data(), c_breakers.size()));
181181
}
182182
break;
183183
case COMMON_SAMPLER_TYPE_TOP_K:
@@ -211,7 +211,7 @@ struct common_sampler * common_sampler_init(const struct llama_model * model, co
211211
llama_sampler_chain_add(result->chain, llama_sampler_init_dist(params.seed));
212212
} else if (params.mirostat == 1) {
213213
llama_sampler_chain_add(result->chain, llama_sampler_init_temp(params.temp));
214-
llama_sampler_chain_add(result->chain, llama_sampler_init_mirostat(llama_n_vocab(vocab), params.seed, params.mirostat_tau, params.mirostat_eta, 100));
214+
llama_sampler_chain_add(result->chain, llama_sampler_init_mirostat(llama_vocab_n_vocab(vocab), params.seed, params.mirostat_tau, params.mirostat_eta, 100));
215215
} else if (params.mirostat == 2) {
216216
llama_sampler_chain_add(result->chain, llama_sampler_init_temp(params.temp));
217217
llama_sampler_chain_add(result->chain, llama_sampler_init_mirostat_v2(params.seed, params.mirostat_tau, params.mirostat_eta));

common/speculative.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -105,15 +105,15 @@ bool common_speculative_are_compatible(
105105
}
106106

107107
{
108-
const int n_vocab_tgt = llama_n_vocab(vocab_tgt);
109-
const int n_vocab_dft = llama_n_vocab(vocab_dft);
108+
const int n_vocab_tgt = llama_vocab_n_vocab(vocab_tgt);
109+
const int n_vocab_dft = llama_vocab_n_vocab(vocab_dft);
110110

111111
const int vocab_diff = std::abs(n_vocab_tgt - n_vocab_dft);
112112

113113
if (vocab_diff > SPEC_VOCAB_MAX_SIZE_DIFFERENCE) {
114114
LOG_ERR("%s: draft model vocab must closely match target model to use speculation but "
115115
"target vocab size %d does not match draft vocab size %d - difference %d, max allowed %d\n",
116-
__func__, n_vocab_tgt, llama_n_vocab(vocab_dft), vocab_diff, SPEC_VOCAB_MAX_SIZE_DIFFERENCE);
116+
__func__, n_vocab_tgt, llama_vocab_n_vocab(vocab_dft), vocab_diff, SPEC_VOCAB_MAX_SIZE_DIFFERENCE);
117117
return false;
118118
}
119119

examples/batched-bench/batched-bench.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ int main(int argc, char ** argv) {
5050
// ensure enough sequences are available
5151
ctx_params.n_seq_max = n_pl.empty() ? 1 : *std::max_element(n_pl.begin(), n_pl.end());
5252

53-
llama_context * ctx = llama_new_context_with_model(model, ctx_params);
53+
llama_context * ctx = llama_init_from_model(model, ctx_params);
5454

5555
if (ctx == NULL) {
5656
fprintf(stderr , "%s: error: failed to create the llama_context\n" , __func__);

examples/batched/batched.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ int main(int argc, char ** argv) {
6464
ctx_params.n_ctx = n_kv_req;
6565
ctx_params.n_batch = std::max(n_predict, n_parallel);
6666

67-
llama_context * ctx = llama_new_context_with_model(model, ctx_params);
67+
llama_context * ctx = llama_init_from_model(model, ctx_params);
6868

6969
auto sparams = llama_sampler_chain_default_params();
7070
sparams.no_perf = false;

examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -911,7 +911,7 @@ int main(int argc, char ** argv) {
911911
load_vocab(params.fn_vocab_model, &config, &vocab);
912912

913913
struct my_llama_model model;
914-
model.hparams.n_vocab = config.vocab_size; //llama_n_vocab(lctx);
914+
model.hparams.n_vocab = config.vocab_size; //llama_vocab_n_vocab(lctx);
915915
model.hparams.n_ctx = params.n_ctx;
916916
model.hparams.n_embd = config.dim; //params.n_embd;
917917
model.hparams.n_ff = config.hidden_dim;

examples/cvector-generator/cvector-generator.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -423,8 +423,8 @@ int main(int argc, char ** argv) {
423423
llama_context * ctx = llama_init.context.get();
424424

425425
// int n_ctx = llama_n_ctx(ctx);
426-
int n_layers = llama_n_layer(model);
427-
int n_embd = llama_n_embd(model);
426+
int n_layers = llama_model_n_layer(model);
427+
int n_embd = llama_model_n_embd(model);
428428

429429
// get model hint param (a.k.a model arch name)
430430
char model_hint[128];

examples/embedding/embedding.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@ int main(int argc, char ** argv) {
107107

108108
const llama_vocab * vocab = llama_model_get_vocab(model);
109109

110-
const int n_ctx_train = llama_n_ctx_train(model);
110+
const int n_ctx_train = llama_model_n_ctx_train(model);
111111
const int n_ctx = llama_n_ctx(ctx);
112112

113113
const enum llama_pooling_type pooling_type = llama_pooling_type(ctx);
@@ -183,7 +183,7 @@ int main(int argc, char ** argv) {
183183
}
184184

185185
// allocate output
186-
const int n_embd = llama_n_embd(model);
186+
const int n_embd = llama_model_n_embd(model);
187187
std::vector<float> embeddings(n_embd_count * n_embd, 0);
188188
float * emb = embeddings.data();
189189

examples/gritlm/gritlm.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ static std::vector<std::vector<float>> encode(llama_context * ctx, const std::ve
5353
llama_decode(ctx, batch);
5454

5555
// get embedding dimensions
56-
uint64_t n_embd = llama_n_embd(model);
56+
uint64_t n_embd = llama_model_n_embd(model);
5757

5858
// allocate embedding output
5959
std::vector<float> emb_unorm(n_embd, 0.0f);
@@ -171,7 +171,7 @@ int main(int argc, char * argv[]) {
171171
llama_model * model = llama_model_load_from_file(params.model.c_str(), mparams);
172172

173173
// create generation context
174-
llama_context * ctx = llama_new_context_with_model(model, cparams);
174+
llama_context * ctx = llama_init_from_model(model, cparams);
175175

176176
auto sparams = llama_sampler_chain_default_params();
177177

@@ -200,7 +200,7 @@ int main(int argc, char * argv[]) {
200200
const std::vector<std::vector<float>> d_rep = encode(ctx, documents, gritlm_instruction(""));
201201
const std::vector<std::vector<float>> q_rep = encode(ctx, queries, gritlm_instruction(instruction));
202202

203-
const int n_embd = llama_n_embd(model);
203+
const int n_embd = llama_model_n_embd(model);
204204

205205
const float cosine_sim_q0_d0 = common_embd_similarity_cos(q_rep[0].data(), d_rep[0].data(), n_embd);
206206
const float cosine_sim_q0_d1 = common_embd_similarity_cos(q_rep[0].data(), d_rep[1].data(), n_embd);

examples/imatrix/imatrix.cpp

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
#include <cstdio>
88
#include <cstring>
99
#include <ctime>
10-
#include <sstream>
1110
#include <thread>
1211
#include <mutex>
1312
#include <vector>
@@ -40,7 +39,7 @@ class IMatrixCollector {
4039
void set_params(common_params params) { m_params = std::move(params); }
4140
bool collect_imatrix(struct ggml_tensor * t, bool ask, void * user_data);
4241
void save_imatrix(int ncall = -1) const;
43-
bool load_imatrix(const char * file_name);
42+
bool load_imatrix(const char * fname);
4443
private:
4544
std::unordered_map<std::string, Stats> m_stats;
4645
common_params m_params;
@@ -471,7 +470,7 @@ static bool compute_imatrix(llama_context * ctx, const common_params & params) {
471470
const int n_chunk_max = tokens.size() / n_ctx;
472471

473472
const int n_chunk = params.n_chunks < 0 ? n_chunk_max : std::min(params.n_chunks, n_chunk_max);
474-
const int n_vocab = llama_n_vocab(vocab);
473+
const int n_vocab = llama_vocab_n_vocab(vocab);
475474
const int n_batch = params.n_batch;
476475

477476
int count = 0;
@@ -630,7 +629,7 @@ int main(int argc, char ** argv) {
630629
return 1;
631630
}
632631

633-
const int n_ctx_train = llama_n_ctx_train(model);
632+
const int n_ctx_train = llama_model_n_ctx_train(model);
634633
if (params.n_ctx > n_ctx_train) {
635634
LOG_WRN("%s: model was trained on only %d context tokens (%d specified)\n",
636635
__func__, n_ctx_train, params.n_ctx);

0 commit comments

Comments
 (0)