Skip to content

Commit f8a9634

Browse files
committed
better xtts and oai speech (+1 squashed commits)
Squashed commits: [34b9c15] better xtts and oai speech
1 parent 70ba616 commit f8a9634

File tree

2 files changed

+19
-3
lines changed

2 files changed

+19
-3
lines changed

kcpp_docs.embd

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1453,7 +1453,7 @@
14531453
"application/json": {
14541454
"example": {
14551455
"input": "hello world, how are you today?",
1456-
"voice": "fire",
1456+
"voice": "kobo",
14571457
},
14581458
"schema": {
14591459
"properties": {

koboldcpp.py

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1365,7 +1365,12 @@ def tts_generate(genparams):
13651365
inputs = tts_generation_inputs()
13661366
inputs.prompt = prompt.encode("UTF-8")
13671367
inputs.speaker_seed = voice
1368-
inputs.audio_seed = -1
1368+
aseed = -1
1369+
try:
1370+
aseed = int(genparams.get("seed", -1))
1371+
except Exception:
1372+
aseed = -1
1373+
inputs.audio_seed = aseed
13691374
inputs.quiet = is_quiet
13701375
ret = handle.tts_generate(inputs)
13711376
outstr = ""
@@ -1847,7 +1852,10 @@ def LaunchWebbrowser(target_url, failedmsg):
18471852
try:
18481853
if os.name == "posix" and "DISPLAY" in os.environ: # UNIX-like systems
18491854
import subprocess
1850-
result = subprocess.run(["/usr/bin/env", "xdg-open", target_url], check=True)
1855+
clean_env = os.environ.copy()
1856+
clean_env.pop("LD_LIBRARY_PATH", None)
1857+
clean_env["PATH"] = "/usr/bin:/bin"
1858+
result = subprocess.run(["/usr/bin/env", "xdg-open", target_url], check=True, env=clean_env)
18511859
if result.returncode == 0:
18521860
return # fallback successful
18531861
raise RuntimeError("no xdg-open")
@@ -2323,6 +2331,10 @@ def do_GET(self):
23232331

23242332
elif self.path.endswith(('/speakers_list')): #xtts compatible
23252333
response_body = (json.dumps(["kobo","cheery","sleepy","tutor","shouty","bored","record"]).encode()) #some random voices for them to enjoy
2334+
elif self.path.endswith(('/speakers')): #xtts compatible
2335+
response_body = (json.dumps([{"name":"kobo","voice_id":"kobo","preview_url":""},{"name":"cheery","voice_id":"cheery","preview_url":""},{"name":"sleepy","voice_id":"sleepy","preview_url":""},{"name":"tutor","voice_id":"tutor","preview_url":""},{"name":"shouty","voice_id":"shouty","preview_url":""},{"name":"bored","voice_id":"bored","preview_url":""},{"name":"record","voice_id":"record","preview_url":""}]).encode()) #some random voices for them to enjoy
2336+
elif self.path.endswith(('/get_tts_settings')): #xtts compatible
2337+
response_body = (json.dumps({"temperature":0.75,"speed":1,"length_penalty":1,"repetition_penalty":1,"top_p":1,"top_k":4,"enable_text_splitting":True,"stream_chunk_size":100}).encode()) #some random voices for them to enjoy
23262338

23272339
elif self.path.endswith(('/api/tags')): #ollama compatible
23282340
response_body = (json.dumps({"models":[{"name":"koboldcpp","model":friendlymodelname,"modified_at":"2024-07-19T15:26:55.6122841+08:00","size":394998579,"digest":"b5dc5e784f2a3ee1582373093acf69a2f4e2ac1710b253a001712b86a61f88bb","details":{"parent_model":"","format":"gguf","family":"koboldcpp","families":["koboldcpp"],"parameter_size":"128M","quantization_level":"Q4_0"}}]}).encode())
@@ -2622,6 +2634,9 @@ def do_POST(self):
26222634
else:
26232635
response_body = (json.dumps([]).encode())
26242636

2637+
elif self.path.endswith('/set_tts_settings'): #return dummy response
2638+
response_body = (json.dumps({"message": "Settings successfully applied"}).encode())
2639+
26252640
if response_body is not None:
26262641
self.send_response(response_code)
26272642
self.send_header('content-length', str(len(response_body)))
@@ -2802,6 +2817,7 @@ def do_POST(self):
28022817
wav_data = base64.b64decode(gen) # Decode the Base64 string into binary data
28032818
self.send_response(200)
28042819
self.send_header('content-length', str(len(wav_data))) # Set content length
2820+
self.send_header('Content-Disposition', 'attachment; filename="output.wav"')
28052821
self.end_headers(content_type='audio/wav')
28062822
self.wfile.write(wav_data) # Write the binary WAV data to the response
28072823
except Exception as ex:

0 commit comments

Comments
 (0)