LostRuins
diff --git a/‎Makefile‎
Lines changed: 1 addition & 1 deletion b/‎Makefile‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/diffusion/CMakeLists.txt‎
Lines changed: 0 additions & 5 deletions b/‎examples/diffusion/CMakeLists.txt‎
Lines changed: 0 additions & 5 deletions
diff --git a/‎ggml/src/ggml-webgpu/CMakeLists.txt‎
Lines changed: 0 additions & 54 deletions b/‎ggml/src/ggml-webgpu/CMakeLists.txt‎
Lines changed: 0 additions & 54 deletions
diff --git a/‎ggml/src/ggml-zdnn/CMakeLists.txt‎
Lines changed: 0 additions & 36 deletions b/‎ggml/src/ggml-zdnn/CMakeLists.txt‎
Lines changed: 0 additions & 36 deletions
diff --git a/‎koboldcpp.py‎
Lines changed: 7 additions & 6 deletions b/‎koboldcpp.py‎
Lines changed: 7 additions & 6 deletions
diff --git a/‎model_adapter.cpp‎
Lines changed: 14 additions & 0 deletions b/‎model_adapter.cpp‎
Lines changed: 14 additions & 0 deletions
diff --git a/‎model_adapter.h‎
Lines changed: 1 addition & 0 deletions b/‎model_adapter.h‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎otherarch/sdcpp/util.cpp‎
Lines changed: 1 addition & 1 deletion b/‎otherarch/sdcpp/util.cpp‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎otherarch/tts_adapter.cpp‎
Lines changed: 91 additions & 64 deletions b/‎otherarch/tts_adapter.cpp‎
Lines changed: 91 additions & 64 deletions
diff --git a/‎otherarch/ttscpp/cli/cli.cpp‎
Lines changed: 1 addition & 1 deletion b/‎otherarch/ttscpp/cli/cli.cpp‎
Lines changed: 1 addition & 1 deletion
@@ -729,7 +729,7 @@ mainvk: tools/main/main.cpp common/arg.cpp build-info.h ggml_v4_vulkan.o ggml-cp
 	$(CXX) $(CXXFLAGS) -DGGML_USE_VULKAN -DSD_USE_VULKAN $(filter-out %.h,$^) -o $@ $(LDFLAGS)
 embedding: examples/embedding/embedding.cpp common/arg.cpp build-info.h ggml.o ggml-cpu.o ggml-ops.o ggml-vec.o ggml-binops.o ggml-unops.o llama.o console.o llavaclip_default.o llava.o ggml-backend_default.o ggml-backend-reg_default.o ggml-repack.o $(OBJS_FULL) $(OBJS)
 	$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
-ttscppmain: otherarch/ttscpp/cli/cli.cpp otherarch/ttscpp/cli/playback.cpp otherarch/ttscpp/cli/playback.h otherarch/ttscpp/cli/write_file.cpp otherarch/ttscpp/cli/write_file.h otherarch/ttscpp/cli/vad.cpp otherarch/ttscpp/cli/vad.h otherarch/ttscpp/src/tts.cpp otherarch/ttscpp/src/ttstokenizer.cpp otherarch/ttscpp/src/ttssampler.cpp otherarch/ttscpp/src/parler_model.cpp otherarch/ttscpp/src/dac_model.cpp otherarch/ttscpp/src/ttsutil.cpp otherarch/ttscpp/src/ttsargs.cpp otherarch/ttscpp/src/ttst5_encoder_model.cpp otherarch/ttscpp/src/phonemizer.cpp otherarch/ttscpp/src/tts_model.cpp otherarch/ttscpp/src/kokoro_model.cpp otherarch/ttscpp/src/dia_model.cpp otherarch/ttscpp/src/orpheus_model.cpp otherarch/ttscpp/src/snac_model.cpp otherarch/ttscpp/src/general_neural_audio_codec.cpp ggml.o ggml-cpu.o ggml-ops.o ggml-vec.o ggml-binops.o ggml-unops.o llama.o console.o llavaclip_default.o llava.o ggml-backend_default.o ggml-backend-reg_default.o ggml-repack.o $(OBJS_FULL) $(OBJS)
+ttscppmain: otherarch/ttscpp/cli/cli.cpp otherarch/ttscpp/cli/playback.cpp otherarch/ttscpp/cli/playback.h otherarch/ttscpp/cli/write_file.cpp otherarch/ttscpp/cli/write_file.h otherarch/ttscpp/cli/vad.cpp otherarch/ttscpp/cli/vad.h otherarch/ttscpp/src/ttscpp.cpp otherarch/ttscpp/src/ttstokenizer.cpp otherarch/ttscpp/src/ttssampler.cpp otherarch/ttscpp/src/parler_model.cpp otherarch/ttscpp/src/dac_model.cpp otherarch/ttscpp/src/ttsutil.cpp otherarch/ttscpp/src/ttsargs.cpp otherarch/ttscpp/src/ttst5_encoder_model.cpp otherarch/ttscpp/src/phonemizer.cpp otherarch/ttscpp/src/tts_model.cpp otherarch/ttscpp/src/kokoro_model.cpp otherarch/ttscpp/src/dia_model.cpp otherarch/ttscpp/src/orpheus_model.cpp otherarch/ttscpp/src/snac_model.cpp otherarch/ttscpp/src/general_neural_audio_codec.cpp ggml.o ggml-cpu.o ggml-ops.o ggml-vec.o ggml-binops.o ggml-unops.o llama.o console.o llavaclip_default.o llava.o ggml-backend_default.o ggml-backend-reg_default.o ggml-repack.o $(OBJS_FULL) $(OBJS)
 	$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
 
 ggml/src/ggml-vulkan-shaders.cpp:
 
@@ -1826,8 +1826,8 @@ def whisper_generate(genparams):
 def tts_load_model(ttc_model_filename,cts_model_filename):
     global args
     inputs = tts_load_model_inputs()
-    inputs.ttc_model_filename = ttc_model_filename.encode("UTF-8")
-    inputs.cts_model_filename = cts_model_filename.encode("UTF-8")
+    inputs.ttc_model_filename = ttc_model_filename.encode("UTF-8") if ttc_model_filename else "".encode("UTF-8")
+    inputs.cts_model_filename = cts_model_filename.encode("UTF-8") if cts_model_filename else "".encode("UTF-8")
     inputs.gpulayers = (999 if args.ttsgpu else 0)
     inputs.flash_attention =  args.flashattention
     thds = args.threads
@@ -5602,7 +5602,7 @@ def export_vars():
             args.embeddingsmaxctx = (0 if embeddings_ctx_var.get()=="" else int(embeddings_ctx_var.get()))
         args.embeddingsgpu = (embeddings_gpu_var.get()==1)
 
-        if tts_model_var.get() != "" and wavtokenizer_var.get() != "":
+        if tts_model_var.get() != "":
             args.ttsthreads = (0 if tts_threads_var.get()=="" else int(tts_threads_var.get()))
             args.ttsmodel = tts_model_var.get()
             args.ttswavtokenizer = wavtokenizer_var.get()
@@ -7201,8 +7201,8 @@ def kcpp_main_process(launch_args, g_memory=None, gui_launcher=False):
                 exit_with_error(3,"Could not load whisper model: " + whispermodel)
 
     #handle tts model
-    if args.ttsmodel and args.ttsmodel!="" and args.ttswavtokenizer and args.ttswavtokenizer!="":
-        if not os.path.exists(args.ttsmodel) or not os.path.exists(args.ttswavtokenizer):
+    if args.ttsmodel and args.ttsmodel!="":
+        if not os.path.exists(args.ttsmodel) or (args.ttswavtokenizer and args.ttswavtokenizer!="" and not os.path.exists(args.ttswavtokenizer)):
             if args.ignoremissing:
                 print("Ignoring missing TTS model files!")
                 args.ttsmodel = None
@@ -7214,7 +7214,8 @@ def kcpp_main_process(launch_args, g_memory=None, gui_launcher=False):
             ttsmodelpath = args.ttsmodel
             ttsmodelpath = os.path.abspath(ttsmodelpath)
             wavtokpath = args.ttswavtokenizer
-            wavtokpath = os.path.abspath(wavtokpath)
+            if wavtokpath:
+                wavtokpath = os.path.abspath(wavtokpath)
             loadok = tts_load_model(ttsmodelpath,wavtokpath)
             print("Load TTS Model OK: " + str(loadok))
             if not loadok:
 
@@ -115,6 +115,20 @@ bool gguf_tensor_exists(const std::string & gguf_filename, std::string tensor_na
     return found;
 }
 
+std::string gguf_get_model_arch(const std::string & gguf_filename)
+{
+    struct gguf_init_params ggufparams;
+    ggufparams.no_alloc = true;
+    ggufparams.ctx = NULL;
+    struct gguf_context * ctx = gguf_init_from_file(gguf_filename.c_str(), ggufparams);
+    if (!ctx) return "";
+    auto keyidx = gguf_find_key(ctx, "general.architecture");
+    std::string modelarch = "";
+    if (keyidx != -1) { modelarch = gguf_get_val_str(ctx, keyidx); }
+    gguf_free(ctx);
+    return modelarch;
+}
+
 //return val: 0=fail, 1=(original ggml, alpaca), 2=(ggmf), 3=(ggjt)
  FileFormat check_file_format(const std::string & fname, FileFormatExtraMeta * fileformatmeta)
  {
 
@@ -132,6 +132,7 @@ void ContextFastForward(std::vector<int> &current_context_tokens, std::vector<in
  int &n_past, std::vector<int> &last_n_tokens, const int nctx, std::vector<int> &smartcontext,
  const bool useSmartContext, const bool requireFullSubset);
 bool gguf_tensor_exists(const std::string & filename, std::string tensor_name, bool exactmatch);
+std::string gguf_get_model_arch(const std::string & filename);
 
 size_t gpttype_calc_new_state_kv();
 size_t gpttype_calc_new_state_tokencount();
 
@@ -357,7 +357,7 @@ void pretty_progress(int step, int steps, float time) {
         }
     }
     progress += "|";
-    printf(time > 1.0f ? "\r%s %i/%i - %.2fs/it" : "\r%s %i/%i - %.2fit/s",
+    printf(time > 1.0f ? "\r%s %i/%i - %.2fs/it" : "\r%s %i/%i - %.2fit/s    ",
            progress.c_str(), step, steps,
            time > 1.0f || time == 0 ? time : (1.0f / time));
     fflush(stdout);  // for linux
 
@@ -26,7 +26,8 @@
 #endif
 
 //imports required for tts.cpp to work
-#include "tts.cpp"
+#include "ttscommon.h"
+#include "ttscpp.cpp"
 #include "ttstokenizer.cpp"
 #include "ttssampler.cpp"
 #include "parler_model.cpp"
@@ -497,6 +498,10 @@ static int code_terminate_id = 151670;
 static int nthreads = 4;
 static int tts_max_len = 4096;
 
+//ttscpp specific
+static generation_configuration * ttscpp_config = nullptr;
+static struct tts_runner * ttscpp_runner = nullptr;
+
 int total_tts_gens = 0;
 
 bool ttstype_load_model(const tts_load_model_inputs inputs)
@@ -532,81 +537,103 @@ bool ttstype_load_model(const tts_load_model_inputs inputs)
 
     std::string modelfile_ttc = inputs.ttc_model_filename;
     std::string modelfile_cts = inputs.cts_model_filename;
-    printf("\nLoading TTS Model, OuteTTS: %s \nWavTokenizer: %s \n",modelfile_ttc.c_str(),modelfile_cts.c_str());
+    std::string detectedarch = gguf_get_model_arch(modelfile_ttc);
+
+    bool is_ttscpp_file = false;
+    if (detectedarch!="" && SUPPORTED_ARCHITECTURES.find(detectedarch) != SUPPORTED_ARCHITECTURES.end()) {
+        is_ttscpp_file = true;
+        printf("\nLoading TTS.CPP Model Arch: %s \n", detectedarch.c_str());
+    }else{
+        printf("\nLoading OuteTTS Model, OuteTTS: %s \nWavTokenizer: %s \n",modelfile_ttc.c_str(),modelfile_cts.c_str());
+        if(modelfile_ttc=="" || modelfile_cts=="")
+        {
+             printf("\nWarning: KCPP OuteTTS missing a file! Make sure both TTS and WavTokenizer models are loaded.\n");
+              return false;
+        }
+    }
 
     ttsdebugmode = inputs.debugmode;
 
     // tts init
-    llama_model_params tts_model_params = llama_model_default_params();
-    llama_context_params tts_ctx_params = llama_context_default_params();
-
-    nthreads = inputs.threads;
-
-    tts_max_len = inputs.ttsmaxlen;
-
-    tts_model_params.use_mmap = false;
-    tts_model_params.use_mlock = false;
-    tts_model_params.n_gpu_layers = inputs.gpulayers; //offload if possible
-    tts_model_params.split_mode = llama_split_mode::LLAMA_SPLIT_MODE_LAYER;
-    int kcpp_parseinfo_maindevice = inputs.kcpp_main_gpu<=0?0:inputs.kcpp_main_gpu;
-    tts_model_params.main_gpu = kcpp_parseinfo_maindevice;
-    tts_ctx_params.n_ctx = 8192;
-    tts_ctx_params.offload_kqv = true;
-    tts_ctx_params.n_batch = 8192;
-    tts_ctx_params.n_ubatch = 512;
-    tts_ctx_params.n_threads = nthreads;
-    tts_ctx_params.n_threads_batch = nthreads;
-    tts_ctx_params.flash_attn = inputs.flash_attention;
-    tts_ctx_params.kv_unified = true;
-
-    llama_model * ttcmodel = llama_model_load_from_file(modelfile_ttc.c_str(), tts_model_params);
-    ttc_ctx = llama_init_from_model(ttcmodel, tts_ctx_params);
-
-    if (ttc_ctx == nullptr) {
-        printf("\nTTS Load Error: Failed to initialize ttc context!\n");
-        return false;
-    }
+    if (is_ttscpp_file) {
+        ttscpp_config = new generation_configuration("af_alloy", 50, 1.0, 1.0, true, "", 0, 1.0);
+        ttscpp_runner = runner_from_file(modelfile_ttc, inputs.threads, ttscpp_config, true);
+        if (ttscpp_runner == nullptr) {
+            printf("\nTTS Load Error: Failed to initialize TTSCPP!\n");
+            return false;
+        }
+    } else { //outetts only
+        llama_model_params tts_model_params = llama_model_default_params();
+        llama_context_params tts_ctx_params = llama_context_default_params();
+
+        nthreads = inputs.threads;
+
+        tts_max_len = inputs.ttsmaxlen;
+
+        tts_model_params.use_mmap = false;
+        tts_model_params.use_mlock = false;
+        tts_model_params.n_gpu_layers = inputs.gpulayers; //offload if possible
+        tts_model_params.split_mode = llama_split_mode::LLAMA_SPLIT_MODE_LAYER;
+        int kcpp_parseinfo_maindevice = inputs.kcpp_main_gpu<=0?0:inputs.kcpp_main_gpu;
+        tts_model_params.main_gpu = kcpp_parseinfo_maindevice;
+        tts_ctx_params.n_ctx = 8192;
+        tts_ctx_params.offload_kqv = true;
+        tts_ctx_params.n_batch = 8192;
+        tts_ctx_params.n_ubatch = 512;
+        tts_ctx_params.n_threads = nthreads;
+        tts_ctx_params.n_threads_batch = nthreads;
+        tts_ctx_params.flash_attn = inputs.flash_attention;
+        tts_ctx_params.kv_unified = true;
+
+        llama_model * ttcmodel = llama_model_load_from_file(modelfile_ttc.c_str(), tts_model_params);
+        ttc_ctx = llama_init_from_model(ttcmodel, tts_ctx_params);
+
+        if (ttc_ctx == nullptr) {
+            printf("\nTTS Load Error: Failed to initialize ttc context!\n");
+            return false;
+        }
 
-    llama_model * ctsmodel = llama_model_load_from_file(modelfile_cts.c_str(), tts_model_params);
+        llama_model * ctsmodel = llama_model_load_from_file(modelfile_cts.c_str(), tts_model_params);
 
-    tts_ctx_params.embeddings = true; //this requires embeddings instead
-    tts_ctx_params.n_ubatch = tts_ctx_params.n_batch;
-    cts_ctx = llama_init_from_model(ctsmodel, tts_ctx_params);
+        tts_ctx_params.embeddings = true; //this requires embeddings instead
+        tts_ctx_params.n_ubatch = tts_ctx_params.n_batch;
+        cts_ctx = llama_init_from_model(ctsmodel, tts_ctx_params);
 
-    if (cts_ctx == nullptr) {
-        printf("\nTTS Load Error: Failed to initialize cts context!\n");
-        return false;
-    }
+        if (cts_ctx == nullptr) {
+            printf("\nTTS Load Error: Failed to initialize cts context!\n");
+            return false;
+        }
 
-    std::vector<int> tmp = {1, 2, 3, 4};
-    llama_memory_clear(llama_get_memory(ttc_ctx),true);
-    auto er = llama_decode(ttc_ctx, llama_batch_get_one(tmp.data(), tmp.size()));
-    if(er!=0)
-    {
-        printf("\nTTS Eval returned nonzero: %d\n",er);
-        return false;
-    }
+        std::vector<int> tmp = {1, 2, 3, 4};
+        llama_memory_clear(llama_get_memory(ttc_ctx),true);
+        auto er = llama_decode(ttc_ctx, llama_batch_get_one(tmp.data(), tmp.size()));
+        if(er!=0)
+        {
+            printf("\nTTS Eval returned nonzero: %d\n",er);
+            return false;
+        }
 
-    const llama_vocab * ttcvocab = llama_model_get_vocab(ttcmodel);
-    llama_tokens testoks = common_tokenize(ttcvocab,"<|space|>",false,true);
-    if (testoks.size() == 1) {
-        ttsver = TTS_VER_3;
-        printf("\nUsing v0.3 mode");
-        //note that the final word does NOT have a space at the end.
-        space_id = testoks[0];
-        testoks = common_tokenize(ttcvocab,"<|audio_end|>",false,true);
+        const llama_vocab * ttcvocab = llama_model_get_vocab(ttcmodel);
+        llama_tokens testoks = common_tokenize(ttcvocab,"<|space|>",false,true);
         if (testoks.size() == 1) {
-            code_terminate_id = testoks[0];
+            ttsver = TTS_VER_3;
+            printf("\nUsing v0.3 mode");
+            //note that the final word does NOT have a space at the end.
+            space_id = testoks[0];
+            testoks = common_tokenize(ttcvocab,"<|audio_end|>",false,true);
+            if (testoks.size() == 1) {
+                code_terminate_id = testoks[0];
+            }
+        } else {
+            ttsver = TTS_VER_2;
+            printf("\nUsing v0.2 mode");
         }
-    } else {
-        ttsver = TTS_VER_2;
-        printf("\nUsing v0.2 mode");
-    }
 
-    //determine offset of <|0|>
-    testoks = common_tokenize(ttcvocab,"<|0|>",false,true);
-    if (testoks.size() == 1) {
-        cts_offset = testoks[0];
+        //determine offset of <|0|>
+        testoks = common_tokenize(ttcvocab,"<|0|>",false,true);
+        if (testoks.size() == 1) {
+            cts_offset = testoks[0];
+        }
     }
 
     printf("\nTTS Load Complete.\n");
 
@@ -1,4 +1,4 @@
-#include "tts.h"
+#include "ttscpp.h"
 #include "ttsargs.h"
 #include "ttscommon.h"
 #include "playback.h"
Original file line number	Diff line number	Diff line change
`@@ -357,7 +357,7 @@ void pretty_progress(int step, int steps, float time) {`
`357`	`357`	`}`
`358`	`358`	`}`
`359`	`359`	`progress += "\|";`
`360`		`- printf(time > 1.0f ? "\r%s %i/%i - %.2fs/it" : "\r%s %i/%i - %.2fit/s",`
	`360`	`+ printf(time > 1.0f ? "\r%s %i/%i - %.2fs/it" : "\r%s %i/%i - %.2fit/s ",`
`361`	`361`	`progress.c_str(), step, steps,`
`362`	`362`	`time > 1.0f \|\| time == 0 ? time : (1.0f / time));`
`363`	`363`	`fflush(stdout); // for linux`
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-#include "tts.h"`
	`1`	`+#include "ttscpp.h"`
`2`	`2`	`#include "ttsargs.h"`
`3`	`3`	`#include "ttscommon.h"`
`4`	`4`	`#include "playback.h"`