ggml-org
diff --git a/‎common/common.cpp‎
Lines changed: 3 additions & 3 deletions b/‎common/common.cpp‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎common/sampling.cpp‎
Lines changed: 4 additions & 4 deletions b/‎common/sampling.cpp‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎common/speculative.cpp‎
Lines changed: 3 additions & 3 deletions b/‎common/speculative.cpp‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎examples/batched-bench/batched-bench.cpp‎
Lines changed: 1 addition & 1 deletion b/‎examples/batched-bench/batched-bench.cpp‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/batched/batched.cpp‎
Lines changed: 1 addition & 1 deletion b/‎examples/batched/batched.cpp‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp‎
Lines changed: 1 addition & 1 deletion b/‎examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/cvector-generator/cvector-generator.cpp‎
Lines changed: 2 additions & 2 deletions b/‎examples/cvector-generator/cvector-generator.cpp‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎examples/embedding/embedding.cpp‎
Lines changed: 2 additions & 2 deletions b/‎examples/embedding/embedding.cpp‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎examples/gritlm/gritlm.cpp‎
Lines changed: 3 additions & 3 deletions b/‎examples/gritlm/gritlm.cpp‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎examples/imatrix/imatrix.cpp‎
Lines changed: 3 additions & 4 deletions b/‎examples/imatrix/imatrix.cpp‎
Lines changed: 3 additions & 4 deletions
@@ -886,7 +886,7 @@ struct common_init_result common_init_from_params(common_params & params) {
 
     auto cparams = common_context_params_to_llama(params);
 
-    llama_context * lctx = llama_new_context_with_model(model, cparams);
+    llama_context * lctx = llama_init_from_model(model, cparams);
     if (lctx == NULL) {
         LOG_ERR("%s: failed to create context with model '%s'\n", __func__, params.model.c_str());
         llama_model_free(model);
@@ -900,7 +900,7 @@ struct common_init_result common_init_from_params(common_params & params) {
 
     if (!params.control_vectors.empty()) {
         if (params.control_vector_layer_start <= 0) params.control_vector_layer_start = 1;
-        if (params.control_vector_layer_end   <= 0) params.control_vector_layer_end   = llama_n_layer(model);
+        if (params.control_vector_layer_end   <= 0) params.control_vector_layer_end   = llama_model_n_layer(model);
 
         const auto cvec = common_control_vector_load(params.control_vectors);
         if (cvec.n_embd == -1) {
@@ -949,7 +949,7 @@ struct common_init_result common_init_from_params(common_params & params) {
     }
 
     if (params.sampling.ignore_eos) {
-        for (llama_token i = 0; i < llama_n_vocab(vocab); i++) {
+        for (llama_token i = 0; i < llama_vocab_n_vocab(vocab); i++) {
             if (llama_token_is_eog(vocab, i)) {
                 LOG_INF("%s: added %s logit bias = %f\n", __func__, common_token_to_piece(lctx, i).c_str(), -INFINITY);
                 params.sampling.logit_bias.push_back({i, -INFINITY});
 
@@ -116,7 +116,7 @@ struct common_sampler {
         const llama_model * model = llama_get_model(ctx);
         const llama_vocab * vocab = llama_model_get_vocab(model);
 
-        const int n_vocab = llama_n_vocab(vocab);
+        const int n_vocab = llama_vocab_n_vocab(vocab);
 
         cur.resize(n_vocab);
 
@@ -162,7 +162,7 @@ struct common_sampler * common_sampler_init(const struct llama_model * model, co
 
     llama_sampler_chain_add(result->chain,
             llama_sampler_init_logit_bias(
-                llama_n_vocab(vocab),
+                llama_vocab_n_vocab(vocab),
                 params.logit_bias.size(),
                 params.logit_bias.data()));
 
@@ -177,7 +177,7 @@ struct common_sampler * common_sampler_init(const struct llama_model * model, co
                             c_breakers.push_back(str.c_str());
                         }
 
-                        llama_sampler_chain_add(result->chain, llama_sampler_init_dry      (vocab, llama_n_ctx_train(model), params.dry_multiplier, params.dry_base, params.dry_allowed_length, params.dry_penalty_last_n, c_breakers.data(), c_breakers.size()));
+                        llama_sampler_chain_add(result->chain, llama_sampler_init_dry      (vocab, llama_model_n_ctx_train(model), params.dry_multiplier, params.dry_base, params.dry_allowed_length, params.dry_penalty_last_n, c_breakers.data(), c_breakers.size()));
                     }
                     break;
                 case COMMON_SAMPLER_TYPE_TOP_K:
@@ -211,7 +211,7 @@ struct common_sampler * common_sampler_init(const struct llama_model * model, co
         llama_sampler_chain_add(result->chain, llama_sampler_init_dist(params.seed));
     } else if (params.mirostat == 1) {
         llama_sampler_chain_add(result->chain, llama_sampler_init_temp(params.temp));
-        llama_sampler_chain_add(result->chain, llama_sampler_init_mirostat(llama_n_vocab(vocab), params.seed, params.mirostat_tau, params.mirostat_eta, 100));
+        llama_sampler_chain_add(result->chain, llama_sampler_init_mirostat(llama_vocab_n_vocab(vocab), params.seed, params.mirostat_tau, params.mirostat_eta, 100));
     } else if (params.mirostat == 2) {
         llama_sampler_chain_add(result->chain, llama_sampler_init_temp(params.temp));
         llama_sampler_chain_add(result->chain, llama_sampler_init_mirostat_v2(params.seed, params.mirostat_tau, params.mirostat_eta));
 
@@ -105,15 +105,15 @@ bool common_speculative_are_compatible(
     }
 
     {
-        const int n_vocab_tgt = llama_n_vocab(vocab_tgt);
-        const int n_vocab_dft = llama_n_vocab(vocab_dft);
+        const int n_vocab_tgt = llama_vocab_n_vocab(vocab_tgt);
+        const int n_vocab_dft = llama_vocab_n_vocab(vocab_dft);
 
         const int vocab_diff = std::abs(n_vocab_tgt - n_vocab_dft);
 
         if (vocab_diff > SPEC_VOCAB_MAX_SIZE_DIFFERENCE) {
             LOG_ERR("%s: draft model vocab must closely match target model to use speculation but "
                          "target vocab size %d does not match draft vocab size %d - difference %d, max allowed %d\n",
-                    __func__, n_vocab_tgt, llama_n_vocab(vocab_dft), vocab_diff, SPEC_VOCAB_MAX_SIZE_DIFFERENCE);
+                    __func__, n_vocab_tgt, llama_vocab_n_vocab(vocab_dft), vocab_diff, SPEC_VOCAB_MAX_SIZE_DIFFERENCE);
             return false;
         }
 
 
@@ -50,7 +50,7 @@ int main(int argc, char ** argv) {
     // ensure enough sequences are available
     ctx_params.n_seq_max = n_pl.empty() ? 1 : *std::max_element(n_pl.begin(), n_pl.end());
 
-    llama_context * ctx = llama_new_context_with_model(model, ctx_params);
+    llama_context * ctx = llama_init_from_model(model, ctx_params);
 
     if (ctx == NULL) {
         fprintf(stderr , "%s: error: failed to create the llama_context\n" , __func__);
 
@@ -64,7 +64,7 @@ int main(int argc, char ** argv) {
     ctx_params.n_ctx   = n_kv_req;
     ctx_params.n_batch = std::max(n_predict, n_parallel);
 
-    llama_context * ctx = llama_new_context_with_model(model, ctx_params);
+    llama_context * ctx = llama_init_from_model(model, ctx_params);
 
     auto sparams = llama_sampler_chain_default_params();
     sparams.no_perf = false;
 
@@ -911,7 +911,7 @@ int main(int argc, char ** argv) {
     load_vocab(params.fn_vocab_model, &config, &vocab);
 
     struct my_llama_model model;
-    model.hparams.n_vocab   = config.vocab_size; //llama_n_vocab(lctx);
+    model.hparams.n_vocab   = config.vocab_size; //llama_vocab_n_vocab(lctx);
     model.hparams.n_ctx     = params.n_ctx;
     model.hparams.n_embd    = config.dim; //params.n_embd;
     model.hparams.n_ff      = config.hidden_dim;
 
@@ -423,8 +423,8 @@ int main(int argc, char ** argv) {
     llama_context * ctx = llama_init.context.get();
 
     // int n_ctx = llama_n_ctx(ctx);
-    int n_layers = llama_n_layer(model);
-    int n_embd = llama_n_embd(model);
+    int n_layers = llama_model_n_layer(model);
+    int n_embd = llama_model_n_embd(model);
 
     // get model hint param (a.k.a model arch name)
     char model_hint[128];
 
@@ -107,7 +107,7 @@ int main(int argc, char ** argv) {
 
     const llama_vocab * vocab = llama_model_get_vocab(model);
 
-    const int n_ctx_train = llama_n_ctx_train(model);
+    const int n_ctx_train = llama_model_n_ctx_train(model);
     const int n_ctx = llama_n_ctx(ctx);
 
     const enum llama_pooling_type pooling_type = llama_pooling_type(ctx);
@@ -183,7 +183,7 @@ int main(int argc, char ** argv) {
     }
 
     // allocate output
-    const int n_embd = llama_n_embd(model);
+    const int n_embd = llama_model_n_embd(model);
     std::vector<float> embeddings(n_embd_count * n_embd, 0);
     float * emb = embeddings.data();
 
 
@@ -53,7 +53,7 @@ static std::vector<std::vector<float>> encode(llama_context * ctx, const std::ve
         llama_decode(ctx, batch);
 
         // get embedding dimensions
-        uint64_t n_embd = llama_n_embd(model);
+        uint64_t n_embd = llama_model_n_embd(model);
 
         // allocate embedding output
         std::vector<float> emb_unorm(n_embd, 0.0f);
@@ -171,7 +171,7 @@ int main(int argc, char * argv[]) {
     llama_model * model = llama_model_load_from_file(params.model.c_str(), mparams);
 
     // create generation context
-    llama_context * ctx = llama_new_context_with_model(model, cparams);
+    llama_context * ctx = llama_init_from_model(model, cparams);
 
     auto sparams = llama_sampler_chain_default_params();
 
@@ -200,7 +200,7 @@ int main(int argc, char * argv[]) {
         const std::vector<std::vector<float>> d_rep = encode(ctx, documents, gritlm_instruction(""));
         const std::vector<std::vector<float>> q_rep = encode(ctx, queries,   gritlm_instruction(instruction));
 
-        const int n_embd = llama_n_embd(model);
+        const int n_embd = llama_model_n_embd(model);
 
         const float cosine_sim_q0_d0 = common_embd_similarity_cos(q_rep[0].data(), d_rep[0].data(), n_embd);
         const float cosine_sim_q0_d1 = common_embd_similarity_cos(q_rep[0].data(), d_rep[1].data(), n_embd);
 
@@ -7,7 +7,6 @@
 #include <cstdio>
 #include <cstring>
 #include <ctime>
-#include <sstream>
 #include <thread>
 #include <mutex>
 #include <vector>
@@ -40,7 +39,7 @@ class IMatrixCollector {
     void set_params(common_params params) { m_params = std::move(params); }
     bool collect_imatrix(struct ggml_tensor * t, bool ask, void * user_data);
     void save_imatrix(int ncall = -1) const;
-    bool load_imatrix(const char * file_name);
+    bool load_imatrix(const char * fname);
 private:
     std::unordered_map<std::string, Stats> m_stats;
     common_params                          m_params;
@@ -471,7 +470,7 @@ static bool compute_imatrix(llama_context * ctx, const common_params & params) {
     const int n_chunk_max = tokens.size() / n_ctx;
 
     const int n_chunk = params.n_chunks < 0 ? n_chunk_max : std::min(params.n_chunks, n_chunk_max);
-    const int n_vocab = llama_n_vocab(vocab);
+    const int n_vocab = llama_vocab_n_vocab(vocab);
     const int n_batch = params.n_batch;
 
     int count = 0;
@@ -630,7 +629,7 @@ int main(int argc, char ** argv) {
         return 1;
     }
 
-    const int n_ctx_train = llama_n_ctx_train(model);
+    const int n_ctx_train = llama_model_n_ctx_train(model);
     if (params.n_ctx > n_ctx_train) {
         LOG_WRN("%s: model was trained on only %d context tokens (%d specified)\n",
                 __func__, n_ctx_train, params.n_ctx);
Original file line number	Diff line number	Diff line change
`@@ -105,15 +105,15 @@ bool common_speculative_are_compatible(`
`105`	`105`	`}`
`106`	`106`
`107`	`107`	`{`
`108`		`- const int n_vocab_tgt = llama_n_vocab(vocab_tgt);`
`109`		`- const int n_vocab_dft = llama_n_vocab(vocab_dft);`
	`108`	`+ const int n_vocab_tgt = llama_vocab_n_vocab(vocab_tgt);`
	`109`	`+ const int n_vocab_dft = llama_vocab_n_vocab(vocab_dft);`
`110`	`110`
`111`	`111`	`const int vocab_diff = std::abs(n_vocab_tgt - n_vocab_dft);`
`112`	`112`
`113`	`113`	`if (vocab_diff > SPEC_VOCAB_MAX_SIZE_DIFFERENCE) {`
`114`	`114`	`LOG_ERR("%s: draft model vocab must closely match target model to use speculation but "`
`115`	`115`	`"target vocab size %d does not match draft vocab size %d - difference %d, max allowed %d\n",`
`116`		`- __func__, n_vocab_tgt, llama_n_vocab(vocab_dft), vocab_diff, SPEC_VOCAB_MAX_SIZE_DIFFERENCE);`
	`116`	`+ __func__, n_vocab_tgt, llama_vocab_n_vocab(vocab_dft), vocab_diff, SPEC_VOCAB_MAX_SIZE_DIFFERENCE);`
`117`	`117`	`return false;`
`118`	`118`	`}`
`119`	`119`