llama-tts

marcoStocchi · marcoStocchi · commit ddddfd742814 · 2025-03-08T08:03:59.000+01:00
* enabled optional cmdline argument '-o' on tts to specify an output filename. It defaults to 'output.wav'.

* program now returns ENOENT in case of file write failure
diff --git a/.gitignore b/.gitignore
@@ -1,3 +1,6 @@
+# NOTES
+NOTES-*
+
 # Extensions
 
 *.a
@@ -33,7 +36,7 @@
 .vs/
 .vscode/
 nppBackup
-
+*.code-workspace
 
 # Coverage
 
diff --git a/common/arg.cpp b/common/arg.cpp
@@ -1872,13 +1872,15 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
                 ? params.lora_outfile.c_str()
                 : ex == LLAMA_EXAMPLE_CVECTOR_GENERATOR
                     ? params.cvector_outfile.c_str()
-                    : params.out_file.c_str()),
+                    : ex == LLAMA_EXAMPLE_TTS
+                        ? params.ttss_outfile.c_str()
+                        : params.out_file.c_str()),
         [](common_params & params, const std::string & value) {
             params.out_file = value;
             params.cvector_outfile = value;
             params.lora_outfile = value;
         }
-    ).set_examples({LLAMA_EXAMPLE_IMATRIX, LLAMA_EXAMPLE_CVECTOR_GENERATOR, LLAMA_EXAMPLE_EXPORT_LORA}));
+    ).set_examples({LLAMA_EXAMPLE_IMATRIX, LLAMA_EXAMPLE_CVECTOR_GENERATOR, LLAMA_EXAMPLE_EXPORT_LORA, LLAMA_EXAMPLE_TTS}));
     add_opt(common_arg(
         {"-ofreq", "--output-frequency"}, "N",
         string_format("output the imatrix every N iterations (default: %d)", params.n_out_freq),
diff --git a/common/common.h b/common/common.h
@@ -426,7 +426,10 @@ struct common_params {
 
     bool spm_infill = false; // suffix/prefix/middle pattern for infill
 
-    std::string lora_outfile = "ggml-lora-merged-f16.gguf";
+    // default output filenames
+    std::string
+        lora_outfile = "ggml-lora-merged-f16.gguf",
+        ttss_outfile = "output.wav";
 
     // batched-bench params
     bool batched_bench_output_jsonl = false;
diff --git a/examples/tts/tts.cpp b/examples/tts/tts.cpp
@@ -87,11 +87,11 @@ struct wav_header {
     uint32_t data_size;
 };
 
-static void save_wav16(const std::string & fname, const std::vector<float> & data, int sample_rate) {
+static bool save_wav16(const std::string & fname, const std::vector<float> & data, int sample_rate) {
     std::ofstream file(fname, std::ios::binary);
     if (!file) {
-        LOG_ERR("%s: Failed to open file '%s' for writing", __func__, fname.c_str());
-        return;
+        LOG_ERR("%s: Failed to open file '%s' for writing.\n", __func__, fname.c_str());
+        return false;
     }
 
     wav_header header;
@@ -108,7 +108,8 @@ static void save_wav16(const std::string & fname, const std::vector<float> & dat
         file.write(reinterpret_cast<const char*>(&pcm_sample), sizeof(pcm_sample));
     }
 
-    file.close();
+    //file.close();
+    return file.good();
 }
 
 static void fill_hann_window(int length, bool periodic, float * output) {
@@ -545,6 +546,8 @@ int main(int argc, char ** argv) {
     params.sampling.top_k = 4;
     params.sampling.samplers = { COMMON_SAMPLER_TYPE_TOP_K, };
 
+    params.out_file = params.ttss_outfile;
+
     if (!common_params_parse(argc, argv, params, LLAMA_EXAMPLE_TTS, print_usage)) {
         return 1;
     }
@@ -1060,8 +1063,6 @@ lovely<|t_0.56|><|code_start|><|634|><|596|><|1766|><|1556|><|1306|><|1285|><|14
     }
 #endif
 
-    const std::string fname = "output.wav";
-
     const int n_sr = 24000; // sampling rate
 
     // zero out first 0.25 seconds
@@ -1072,11 +1073,18 @@ lovely<|t_0.56|><|code_start|><|634|><|596|><|1766|><|1556|><|1306|><|1285|><|14
     LOG_INF("%s: time for spectral ops: %.3f ms\n", __func__, (ggml_time_us() - t_spec_start) / 1000.0f);
     LOG_INF("%s: total time:            %.3f ms\n", __func__, (ggml_time_us() - t_main_start) / 1000.0f);
 
-    save_wav16(fname, audio, n_sr);
+    int retval(0);
 
-    LOG_INF("%s: audio written to file '%s'\n", __func__, fname.c_str());
+    if (save_wav16(params.out_file, audio, n_sr)) {
+        LOG_INF("%s: audio written to file '%s'\n", __func__, params.out_file.c_str());
+    }
+
+    else {
+        retval=ENOENT;
+        LOG_ERR("Check path exists, directory write permissions, free disk space.\n");
+    }
 
     llama_backend_free();
 
-    return 0;
+    return retval;
 }