Skip to content

Commit acd6320

Browse files
committed
Merge remote-tracking branch 'upstream/master' into dry-sampling-post-refactor
2 parents 304c815 + e01c67a commit acd6320

File tree

15 files changed

+997
-303
lines changed

15 files changed

+997
-303
lines changed

README.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,7 @@ Typically finetunes of the base models below are supported as well.
122122
- Rust (nicer API): [mdrokz/rust-llama.cpp](https://github.com/mdrokz/rust-llama.cpp)
123123
- Rust (more direct bindings): [utilityai/llama-cpp-rs](https://github.com/utilityai/llama-cpp-rs)
124124
- C#/.NET: [SciSharp/LLamaSharp](https://github.com/SciSharp/LLamaSharp)
125+
- C#/VB.NET (more features - community license): [LM-Kit.NET](https://docs.lm-kit.com/lm-kit-net/index.html)
125126
- Scala 3: [donderom/llm4s](https://github.com/donderom/llm4s)
126127
- Clojure: [phronmophobic/llama.clj](https://github.com/phronmophobic/llama.clj)
127128
- React Native: [mybigday/llama.rn](https://github.com/mybigday/llama.rn)
@@ -172,6 +173,7 @@ Unless otherwise noted these projects are open-source with permissive licensing:
172173
- [LARS - The LLM & Advanced Referencing Solution](https://github.com/abgulati/LARS) (AGPL)
173174
- [LLMUnity](https://github.com/undreamai/LLMUnity) (MIT)
174175
- [Llama Assistant](https://github.com/vietanhdev/llama-assistant) (GPL)
176+
- [PocketPal AI - An iOS and Android App](https://github.com/a-ghorbani/pocketpal-ai) (MIT)
175177

176178
*(to have a project listed here, it should clearly state that it depends on `llama.cpp`)*
177179

@@ -187,6 +189,7 @@ Unless otherwise noted these projects are open-source with permissive licensing:
187189

188190
- [Paddler](https://github.com/distantmagic/paddler) - Stateful load balancer custom-tailored for llama.cpp
189191
- [GPUStack](https://github.com/gpustack/gpustack) - Manage GPU clusters for running LLMs
192+
- [llama_cpp_canister](https://github.com/onicai/llama_cpp_canister) - llama.cpp as a smart contract on the Internet Computer, using WebAssembly
190193

191194
**Games:**
192195
- [Lucy's Labyrinth](https://github.com/MorganRO8/Lucys_Labyrinth) - A simple maze game where agents controlled by an AI model will try to trick you.

common/arg.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1125,7 +1125,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
11251125
}
11261126
).set_examples({LLAMA_EXAMPLE_EMBEDDING, LLAMA_EXAMPLE_RETRIEVAL, LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_POOLING"));
11271127
add_opt(common_arg(
1128-
{"--attention"}, "{causal,non,causal}",
1128+
{"--attention"}, "{causal,non-causal}",
11291129
"attention type for embeddings, use model default if unspecified",
11301130
[](common_params & params, const std::string & value) {
11311131
/**/ if (value == "causal") { params.attention_type = LLAMA_ATTENTION_TYPE_CAUSAL; }

common/common.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1035,7 +1035,7 @@ static ggml_type kv_cache_type_from_str(const std::string & s) {
10351035
return GGML_TYPE_Q5_1;
10361036
}
10371037

1038-
throw std::runtime_error("Invalid cache type: " + s);
1038+
throw std::runtime_error("Unsupported cache type: " + s);
10391039
}
10401040

10411041
struct llama_context_params common_context_params_to_llama(const common_params & params) {
@@ -1047,7 +1047,7 @@ struct llama_context_params common_context_params_to_llama(const common_params &
10471047
cparams.n_ubatch = params.n_ubatch;
10481048
cparams.n_threads = params.cpuparams.n_threads;
10491049
cparams.n_threads_batch = params.cpuparams_batch.n_threads == -1 ?
1050-
params.cpuparams.n_threads : params.cpuparams_batch.n_threads;
1050+
params.cpuparams.n_threads : params.cpuparams_batch.n_threads;
10511051
cparams.logits_all = params.logits_all;
10521052
cparams.embeddings = params.embedding;
10531053
cparams.rope_scaling_type = params.rope_scaling_type;

common/sampling.cpp

Lines changed: 37 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -177,10 +177,9 @@ struct common_sampler * common_sampler_init(const struct llama_model * model, co
177177
params.penalize_nl,
178178
params.ignore_eos));
179179

180-
if (params.temp > 0.0f) {
181-
if (params.mirostat == 0) {
182-
for (const auto & cnstr : params.samplers) {
183-
switch (cnstr) {
180+
if (params.mirostat == 0) {
181+
for (const auto & cnstr : params.samplers) {
182+
switch (cnstr) {
184183
case COMMON_SAMPLER_TYPE_DRY:
185184
{
186185
std::vector<const char*> c_breakers;
@@ -192,56 +191,43 @@ struct common_sampler * common_sampler_init(const struct llama_model * model, co
192191
llama_sampler_chain_add(result->chain, llama_sampler_init_dry (model, context_size, params.dry_multiplier, params.dry_base, params.dry_allowed_length, params.dry_penalty_last_n, c_breakers.data(), c_breakers.size()));
193192
}
194193
break;
195-
case COMMON_SAMPLER_TYPE_TOP_K:
196-
llama_sampler_chain_add(result->chain, llama_sampler_init_top_k (params.top_k));
197-
break;
198-
case COMMON_SAMPLER_TYPE_TOP_P:
199-
llama_sampler_chain_add(result->chain, llama_sampler_init_top_p (params.top_p, params.min_keep));
200-
break;
201-
case COMMON_SAMPLER_TYPE_MIN_P:
202-
llama_sampler_chain_add(result->chain, llama_sampler_init_min_p (params.min_p, params.min_keep));
203-
break;
204-
case COMMON_SAMPLER_TYPE_XTC:
205-
llama_sampler_chain_add(result->chain, llama_sampler_init_xtc (params.xtc_probability, params.xtc_threshold, params.min_keep, params.seed));
206-
break;
207-
case COMMON_SAMPLER_TYPE_TFS_Z:
208-
llama_sampler_chain_add(result->chain, llama_sampler_init_tail_free(params.tfs_z, params.min_keep));
209-
break;
210-
case COMMON_SAMPLER_TYPE_TYPICAL_P:
211-
llama_sampler_chain_add(result->chain, llama_sampler_init_typical (params.typ_p, params.min_keep));
212-
break;
213-
case COMMON_SAMPLER_TYPE_TEMPERATURE:
214-
llama_sampler_chain_add(result->chain, llama_sampler_init_temp_ext (params.temp, params.dynatemp_range, params.dynatemp_exponent));
215-
break;
216-
case COMMON_SAMPLER_TYPE_INFILL:
217-
llama_sampler_chain_add(result->chain, llama_sampler_init_infill (model));
218-
break;
219-
default:
220-
GGML_ASSERT(false && "unknown sampler type");
221-
}
194+
case COMMON_SAMPLER_TYPE_TOP_K:
195+
llama_sampler_chain_add(result->chain, llama_sampler_init_top_k (params.top_k));
196+
break;
197+
case COMMON_SAMPLER_TYPE_TOP_P:
198+
llama_sampler_chain_add(result->chain, llama_sampler_init_top_p (params.top_p, params.min_keep));
199+
break;
200+
case COMMON_SAMPLER_TYPE_MIN_P:
201+
llama_sampler_chain_add(result->chain, llama_sampler_init_min_p (params.min_p, params.min_keep));
202+
break;
203+
case COMMON_SAMPLER_TYPE_XTC:
204+
llama_sampler_chain_add(result->chain, llama_sampler_init_xtc (params.xtc_probability, params.xtc_threshold, params.min_keep, params.seed));
205+
break;
206+
case COMMON_SAMPLER_TYPE_TFS_Z:
207+
llama_sampler_chain_add(result->chain, llama_sampler_init_tail_free(params.tfs_z, params.min_keep));
208+
break;
209+
case COMMON_SAMPLER_TYPE_TYPICAL_P:
210+
llama_sampler_chain_add(result->chain, llama_sampler_init_typical (params.typ_p, params.min_keep));
211+
break;
212+
case COMMON_SAMPLER_TYPE_TEMPERATURE:
213+
llama_sampler_chain_add(result->chain, llama_sampler_init_temp_ext (params.temp, params.dynatemp_range, params.dynatemp_exponent));
214+
break;
215+
case COMMON_SAMPLER_TYPE_INFILL:
216+
llama_sampler_chain_add(result->chain, llama_sampler_init_infill (model));
217+
break;
218+
default:
219+
GGML_ASSERT(false && "unknown sampler type");
222220
}
223-
llama_sampler_chain_add(result->chain, llama_sampler_init_softmax());
224-
llama_sampler_chain_add(result->chain, llama_sampler_init_dist(params.seed));
225-
} else if (params.mirostat == 1) {
226-
llama_sampler_chain_add(result->chain, llama_sampler_init_temp(params.temp));
227-
llama_sampler_chain_add(result->chain, llama_sampler_init_mirostat(llama_n_vocab(model), params.seed, params.mirostat_tau, params.mirostat_eta, 100));
228-
} else if (params.mirostat == 2) {
229-
llama_sampler_chain_add(result->chain, llama_sampler_init_temp(params.temp));
230-
llama_sampler_chain_add(result->chain, llama_sampler_init_mirostat_v2(params.seed, params.mirostat_tau, params.mirostat_eta));
231-
} else {
232-
GGML_ASSERT(false && "unknown mirostat version");
233221
}
222+
llama_sampler_chain_add(result->chain, llama_sampler_init_dist(params.seed));
223+
} else if (params.mirostat == 1) {
224+
llama_sampler_chain_add(result->chain, llama_sampler_init_temp(params.temp));
225+
llama_sampler_chain_add(result->chain, llama_sampler_init_mirostat(llama_n_vocab(model), params.seed, params.mirostat_tau, params.mirostat_eta, 100));
226+
} else if (params.mirostat == 2) {
227+
llama_sampler_chain_add(result->chain, llama_sampler_init_temp(params.temp));
228+
llama_sampler_chain_add(result->chain, llama_sampler_init_mirostat_v2(params.seed, params.mirostat_tau, params.mirostat_eta));
234229
} else {
235-
if (params.n_probs > 0) {
236-
// some use cases require to sample greedily, but still obtain the probabilities of the top tokens
237-
// ref: https://github.com/ggerganov/llama.cpp/pull/9605
238-
//
239-
// the following will not produce exactly the same probs as applyging softmax to the full vocabulary, but
240-
// it is much faster, since we avoid sorting all tokens and should give a good approximation
241-
llama_sampler_chain_add(result->chain, llama_sampler_init_top_k(params.n_probs));
242-
llama_sampler_chain_add(result->chain, llama_sampler_init_softmax());
243-
}
244-
llama_sampler_chain_add(result->chain, llama_sampler_init_greedy());
230+
GGML_ASSERT(false && "unknown mirostat version");
245231
}
246232

247233
return result;

examples/llama.swiftui/llama.cpp.swift/LibLlama.swift

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,6 @@ actor LlamaContext {
4646
let sparams = llama_sampler_chain_default_params()
4747
self.sampling = llama_sampler_chain_init(sparams)
4848
llama_sampler_chain_add(self.sampling, llama_sampler_init_temp(0.4))
49-
llama_sampler_chain_add(self.sampling, llama_sampler_init_softmax())
5049
llama_sampler_chain_add(self.sampling, llama_sampler_init_dist(1234))
5150
}
5251

0 commit comments

Comments
 (0)