updated readme and colab

LostRuins · LostRuins · commit 4d92b4e98ea7 · 2025-01-14T00:31:52.000+08:00
diff --git a/README.md b/README.md
@@ -176,3 +176,4 @@ when you can't use the precompiled binary directly, we provide an automated buil
   - Image Generation: [Anything v3](https://huggingface.co/admruul/anything-v3.0/resolve/main/Anything-V3.0-pruned-fp16.safetensors) or [Deliberate V2](https://huggingface.co/Yntec/Deliberate2/resolve/main/Deliberate_v2.safetensors) or [Dreamshaper SDXL](https://huggingface.co/Lykon/dreamshaper-xl-v2-turbo/resolve/main/DreamShaperXL_Turbo_v2_1.safetensors)
   - Image Recognition MMproj: [Pick the correct one for your model architecture here](https://huggingface.co/koboldcpp/mmproj/tree/main)
   - Speech Recognition: [Whisper models for Speech-To-Text](https://huggingface.co/koboldcpp/whisper/tree/main)
+  - Text-To-Speech: [TTS models for Narration](https://huggingface.co/koboldcpp/tts/tree/main)
diff --git a/colab.ipynb b/colab.ipynb
@@ -67,6 +67,11 @@
     "LoadSpeechModel = False #@param {type:\"boolean\"}\n",
     "SpeechModel = \"https://huggingface.co/koboldcpp/whisper/resolve/main/whisper-base.en-q5_1.bin\" #@param [\"https://huggingface.co/koboldcpp/whisper/resolve/main/whisper-base.en-q5_1.bin\"]{allow-input: true}\n",
     "WCommand = \"\"\n",
+    "#@markdown <hr>\n",
+    "LoadTTSModel = False #@param {type:\"boolean\"}\n",
+    "TTSModel = \"https://huggingface.co/koboldcpp/tts/resolve/main/OuteTTS-0.2-500M-Q4_0.gguf\" #@param [\"https://huggingface.co/koboldcpp/tts/resolve/main/OuteTTS-0.2-500M-Q4_0.gguf\"]{allow-input: true}\n",
+    "WavTokModel = \"https://huggingface.co/koboldcpp/tts/resolve/main/WavTokenizer-Large-75-Q4_0.gguf\" #@param [\"https://huggingface.co/koboldcpp/tts/resolve/main/WavTokenizer-Large-75-Q4_0.gguf\"]{allow-input: true}\n",
+    "TTSCommand = \"\"\n",
     "\n",
     "import os\n",
     "if not os.path.isfile(\"/opt/bin/nvidia-smi\"):\n",
@@ -85,6 +90,10 @@
     "  WCommand = \"--whispermodel wmodel.bin\"\n",
     "else:\n",
     "  WCommand = \"\"\n",
+    "if TTSModel and WavTokModel and LoadTTSModel:\n",
+    "  TTSCommand = \"--ttsmodel ttsmodel.bin --ttswavtokenizer ttswavtok.bin --ttsgpu\"\n",
+    "else:\n",
+    "  TTSCommand = \"\"\n",
     "if FlashAttention:\n",
     "  FACommand = \"--flashattention\"\n",
     "else:\n",
@@ -110,7 +119,10 @@
     "  !aria2c -x 10 -o imodel.gguf --summary-interval=5 --download-result=default --allow-overwrite=true --file-allocation=none $ImgModel\n",
     "if WCommand:\n",
     "  !aria2c -x 10 -o wmodel.bin --summary-interval=5 --download-result=default --allow-overwrite=true --file-allocation=none $SpeechModel\n",
-    "!./koboldcpp_linux model.gguf --usecublas 0 mmq --multiuser --gpulayers $Layers --contextsize $ContextSize --websearch --quiet --remotetunnel $FACommand $MPCommand $VCommand $SCommand $WCommand\n"
+    "if TTSCommand:\n",
+    "  !aria2c -x 10 -o ttsmodel.bin --summary-interval=5 --download-result=default --allow-overwrite=true --file-allocation=none $TTSModel\n",
+    "  !aria2c -x 10 -o ttswavtok.bin --summary-interval=5 --download-result=default --allow-overwrite=true --file-allocation=none $WavTokModel\n",
+    "!./koboldcpp_linux model.gguf --usecublas 0 mmq --multiuser --gpulayers $Layers --contextsize $ContextSize --websearch --quiet --remotetunnel $FACommand $MPCommand $VCommand $SCommand $WCommand $TTSCommand\n"
    ]
   }
  ],