Nexesenex
diff --git a/‎.github/workflows/build-riscv-native.yml‎
Lines changed: 0 additions & 43 deletions b/‎.github/workflows/build-riscv-native.yml‎
Lines changed: 0 additions & 43 deletions
diff --git a/‎Makefile‎
Lines changed: 49 additions & 47 deletions b/‎Makefile‎
Lines changed: 49 additions & 47 deletions
diff --git a/‎colab.ipynb‎
Lines changed: 3 additions & 1 deletion b/‎colab.ipynb‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎common/arg.cpp‎
Lines changed: 8 additions & 1 deletion b/‎common/arg.cpp‎
Lines changed: 8 additions & 1 deletion
diff --git a/‎common/common.h‎
Lines changed: 1 addition & 1 deletion b/‎common/common.h‎
Lines changed: 1 addition & 1 deletion
@@ -70,7 +70,7 @@
     "WCommand = \"\"\n",
     "#@markdown <hr>\n",
     "LoadTTSModel = False #@param {type:\"boolean\"}\n",
-    "TTSModel = \"https://huggingface.co/koboldcpp/tts/resolve/main/OuteTTS-0.2-500M-Q4_0.gguf\" #@param [\"https://huggingface.co/koboldcpp/tts/resolve/main/OuteTTS-0.2-500M-Q4_0.gguf\"]{allow-input: true}\n",
+    "TTSModel = \"https://huggingface.co/koboldcpp/tts/resolve/main/OuteTTS-0.2-500M-Q4_0.gguf\" #@param [\"https://huggingface.co/koboldcpp/tts/resolve/main/OuteTTS-0.2-500M-Q4_0.gguf\",\"https://huggingface.co/koboldcpp/tts/resolve/main/Kokoro_no_espeak_Q4.gguf\"]{allow-input: true}\n",
     "WavTokModel = \"https://huggingface.co/koboldcpp/tts/resolve/main/WavTokenizer-Large-75-Q4_0.gguf\" #@param [\"https://huggingface.co/koboldcpp/tts/resolve/main/WavTokenizer-Large-75-Q4_0.gguf\"]{allow-input: true}\n",
     "TTSCommand = \"\"\n",
     "#@markdown <hr>\n",
@@ -127,6 +127,8 @@
     "  WCommand = \"\"\n",
     "if TTSModel and WavTokModel and LoadTTSModel:\n",
     "  TTSCommand = f\"--ttsmodel {TTSModel} --ttswavtokenizer {WavTokModel} --ttsgpu\"\n",
+    "elif TTSModel and LoadTTSModel:\n",
+    "  TTSCommand = f\"--ttsmodel {TTSModel} --ttsgpu\"\n",
     "else:\n",
     "  TTSCommand = \"\"\n",
     "if EmbeddingsModel and LoadEmbeddingsModel:\n",
 
@@ -1533,6 +1533,13 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
             params.ctx_shift = false;
         }
     ).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_IMATRIX, LLAMA_EXAMPLE_PERPLEXITY}).set_env("LLAMA_ARG_NO_CONTEXT_SHIFT"));
+    add_opt(common_arg(
+        {"--context-shift"},
+        string_format("enables context shift on infinite text generation (default: %s)", params.ctx_shift ? "enabled" : "disabled"),
+        [](common_params & params) {
+            params.ctx_shift = true;
+        }
+    ).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_IMATRIX, LLAMA_EXAMPLE_PERPLEXITY}).set_env("LLAMA_ARG_CONTEXT_SHIFT"));
     add_opt(common_arg(
         {"--chunks"}, "N",
         string_format("max number of chunks to process (default: %d, -1 = all)", params.n_chunks),
@@ -1826,7 +1833,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
         [](common_params & params, const std::string & value) {
             params.sampling.top_n_sigma = std::stof(value);
         }
-    ).set_examples({LLAMA_EXAMPLE_MAIN}).set_sparam());
+    ).set_sparam());
     add_opt(common_arg(
         {"--xtc-probability"}, "N",
         string_format("xtc probability (default: %.1f, 0.0 = disabled)", (double)params.sampling.xtc_probability),
 
@@ -366,7 +366,7 @@ struct common_params {
     bool cont_batching     = true;  // insert new sequences for decoding on-the-fly
     bool flash_attn        = false; // flash attention
     bool no_perf           = false; // disable performance metrics
-    bool ctx_shift         = true;  // context shift on inifinite text generation
+    bool ctx_shift         = false;  // context shift on infinite text generation
     bool swa_full          = false; // use full-size SWA cache (https://github.com/ggml-org/llama.cpp/pull/13194#issuecomment-2868343055)
     bool kv_unified        = false; // enable unified KV cache