Skip to content

Commit 8f297a8

Browse files
committed
core: Modify default values for inference parameters
1 parent a4e3412 commit 8f297a8

File tree

3 files changed

+12
-7
lines changed

3 files changed

+12
-7
lines changed

app/src/main/java/io/shubham0204/smollmandroid/data/ChatsDB.kt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -59,8 +59,8 @@ data class Chat(
5959
/**
6060
* LLM inference parameters that are used for this chat.
6161
*/
62-
var minP: Float = 0.05f,
63-
var temperature: Float = 1.0f,
62+
var minP: Float = 0.1f,
63+
var temperature: Float = 0.8f,
6464
var nThreads: Int = 4,
6565
var useMmap: Boolean = true,
6666
var useMlock: Boolean = false,

smollm/src/main/cpp/LLMInference.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,9 +62,14 @@ LLMInference::loadModel(const char* model_path, float minP, float temperature, b
6262
llama_sampler_chain_params sampler_params = llama_sampler_chain_default_params();
6363
sampler_params.no_perf = true; // disable performance metrics
6464
_sampler = llama_sampler_chain_init(sampler_params);
65-
llama_sampler_chain_add(_sampler, llama_sampler_init_min_p(minP, 1));
65+
6666
llama_sampler_chain_add(_sampler, llama_sampler_init_temp(temperature));
6767
llama_sampler_chain_add(_sampler, llama_sampler_init_dist(LLAMA_DEFAULT_SEED));
68+
if (minP >= 0.01f) {
69+
// minP = 0.0 (disabled)
70+
// minP can be adjusted across 100 steps between [0.0,1.0], the smallest step being 0.01
71+
llama_sampler_chain_add(_sampler, llama_sampler_init_min_p(minP, 1));
72+
}
6873

6974
_formattedMessages = std::vector<char>(llama_n_ctx(_ctx));
7075
_messages.clear();

smollm/src/main/java/io/shubham0204/smollm/SmolLM.kt

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -145,9 +145,9 @@ class SmolLM {
145145
* Data class to hold the inference parameters for the LLM.
146146
*
147147
* @property minP The minimum probability for a token to be considered.
148-
* Also known as top-P sampling. (Default: 0.01f)
148+
* Also known as top-P sampling. (Default: 0.1f)
149149
* @property temperature The temperature for sampling. Higher values make the output more random.
150-
* (Default: 1.0f)
150+
* (Default: 0.8f)
151151
* @property storeChats Whether to store the chat history in memory. If true, the LLM will
152152
* remember previous interactions in the current session. (Default: true)
153153
* @property contextSize The context size (in tokens) for the LLM. This determines how much
@@ -165,8 +165,8 @@ class SmolLM {
165165
* being swapped out to disk, potentially improving performance. (Default: false)
166166
*/
167167
data class InferenceParams(
168-
val minP: Float = 0.01f,
169-
val temperature: Float = 1.0f,
168+
val minP: Float = 0.1f,
169+
val temperature: Float = 0.8f,
170170
val storeChats: Boolean = true,
171171
val contextSize: Long? = null,
172172
val chatTemplate: String? = null,

0 commit comments

Comments
 (0)