core: Modify default values for inference parameters

shubham0204 · shubham0204 · commit 8f297a85a5db · 2025-08-31T18:36:26.000+05:30
diff --git a/app/src/main/java/io/shubham0204/smollmandroid/data/ChatsDB.kt b/app/src/main/java/io/shubham0204/smollmandroid/data/ChatsDB.kt
@@ -59,8 +59,8 @@ data class Chat(
     /**
      * LLM inference parameters that are used for this chat.
      */
-    var minP: Float = 0.05f,
-    var temperature: Float = 1.0f,
+    var minP: Float = 0.1f,
+    var temperature: Float = 0.8f,
     var nThreads: Int = 4,
     var useMmap: Boolean = true,
     var useMlock: Boolean = false,
diff --git a/smollm/src/main/cpp/LLMInference.cpp b/smollm/src/main/cpp/LLMInference.cpp
@@ -62,9 +62,14 @@ LLMInference::loadModel(const char* model_path, float minP, float temperature, b
     llama_sampler_chain_params sampler_params = llama_sampler_chain_default_params();
     sampler_params.no_perf                    = true; // disable performance metrics
     _sampler                                  = llama_sampler_chain_init(sampler_params);
-    llama_sampler_chain_add(_sampler, llama_sampler_init_min_p(minP, 1));
+
     llama_sampler_chain_add(_sampler, llama_sampler_init_temp(temperature));
     llama_sampler_chain_add(_sampler, llama_sampler_init_dist(LLAMA_DEFAULT_SEED));
+    if (minP >= 0.01f) {
+        // minP = 0.0 (disabled)
+        // minP can be adjusted across 100 steps between [0.0,1.0], the smallest step being 0.01
+        llama_sampler_chain_add(_sampler, llama_sampler_init_min_p(minP, 1));
+    }
 
     _formattedMessages = std::vector<char>(llama_n_ctx(_ctx));
     _messages.clear();
diff --git a/smollm/src/main/java/io/shubham0204/smollm/SmolLM.kt b/smollm/src/main/java/io/shubham0204/smollm/SmolLM.kt
@@ -145,9 +145,9 @@ class SmolLM {
      * Data class to hold the inference parameters for the LLM.
      *
      * @property minP The minimum probability for a token to be considered.
-     *                Also known as top-P sampling. (Default: 0.01f)
+     *                Also known as top-P sampling. (Default: 0.1f)
      * @property temperature The temperature for sampling. Higher values make the output more random.
-     *                       (Default: 1.0f)
+     *                       (Default: 0.8f)
      * @property storeChats Whether to store the chat history in memory. If true, the LLM will
      *                      remember previous interactions in the current session. (Default: true)
      * @property contextSize The context size (in tokens) for the LLM. This determines how much
@@ -165,8 +165,8 @@ class SmolLM {
      *                    being swapped out to disk, potentially improving performance. (Default: false)
      */
     data class InferenceParams(
-        val minP: Float = 0.01f,
-        val temperature: Float = 1.0f,
+        val minP: Float = 0.1f,
+        val temperature: Float = 0.8f,
         val storeChats: Boolean = true,
         val contextSize: Long? = null,
         val chatTemplate: String? = null,