release v4

samtupy · samtupy · commit a45369e39e33 · 2025-04-27T01:09:45.000-05:00
diff --git a/coagulator/coagulator.py b/coagulator/coagulator.py
@@ -16,8 +16,8 @@
 import websockets.asyncio.server
 
 def g(): pass #globals
-g.provider_rev = 3
-g.user_rev = 3
+g.provider_rev = 4
+g.user_rev = 4
 g.next_client_id = 1
 g.next_web_id = 10000000
 
diff --git a/readme.md b/readme.md
@@ -1,5 +1,5 @@
 # Speech To Audio Relay (STAR)
-[Download windows client](https://github.com/samtupy/star/releases/latest/download/STAR_win64_v3.zip)
+[Download windows client](https://github.com/samtupy/star/releases/latest/download/STAR_win64_v4.zip)
 
 This is a set of components intended to ease the creation of audio productions that involve the synthesis of text to speech to audio, particularly where many voices that might be contained on any number of different computers or devices are involved.
 
diff --git a/user/STAR.py b/user/STAR.py
@@ -19,7 +19,7 @@
 import websockets.uri
 import wx
 
-USER_REVISION = 3
+USER_REVISION = 4
 
 speech = accessible_output2.outputs.auto.Auto()
 sound_output=output.Output(0)
@@ -354,6 +354,7 @@ def __init__(self, parent = None):
 		self.websocket = None
 		self.script_continuous_preview = False
 		self.render_total = 0
+		self.last_renderable_lines = []
 		self.Show()
 		self.Centre()
 		sizer = wx.BoxSizer(wx.VERTICAL)
@@ -526,13 +527,14 @@ def on_render(self, evt):
 			self.render_output_path_tmp = tempfile.TemporaryDirectory()
 			self.render_output_path = self.render_output_path_tmp.name
 		if (not "clear_output_on_render" in config or config.as_bool("clear_output_on_render")) and self.render_title.Value:
-			[os.remove(i) for i in glob.glob(os.path.join(config.get("render_path", os.path.join(os.getcwd(), "output")), self.render_title.Value, "*.wav"))]
+			[os.remove(i) for i in glob.glob(os.path.join(config.get("render_path", os.path.join(os.getcwd(), "output")), self.render_title.Value, "*.*"))]
 		self.render_btn.Label = "Cancel"
 		if selected_renderable_lines: renderable_lines = selected_renderable_lines
 		self.render_total = len(renderable_lines)
 		self.render_progress.Range = self.render_total
 		self.render_progress.Value = 0
 		self.render_progress.Show()
+		self.last_renderable_lines = renderable_lines
 		for l in renderable_lines:
 			if not self.render_total: return # render canceled
 			self.audiospeak(l[1], render_filename = l[0])
@@ -543,7 +545,7 @@ def on_render_complete(self, canceled = False):
 		if not canceled:
 			title = self.render_title.Value
 			if os.path.splitext(title)[1] in [".wav", ".mp3"]:
-				items = [i for i in glob.glob(os.path.join(self.render_output_path, "*.wav"))]
+				items = [os.path.join(self.render_output_path, i[0] + "." + self.speech_cache[i[1]]["extension"]) for i in self.last_renderable_lines]
 				combined = AudioSegment(data = b"", sample_width = 2, frame_rate = 44100, channels = 1)
 				for i in items:
 					if len(combined) > 0: combined += AudioSegment.silent(config.as_int("render_consolidated_silence") if "render_consolidated_silence" in config else 200)
diff --git a/user/readme.md b/user/readme.md
@@ -1,5 +1,5 @@
 # STAR user client documentation
-The STAR user client is the frontend interface to this relay system. With it, you can connect to any coagulator you know about before synthesizing text into either audio that is played through speakers or rendered to audio files.
+The STAR user client is the frontend interface to this text to speech relay system. With it, you can connect to any coagulator you know about before synthesizing text into audio that can either be played through speakers or rendered to audio files.
 
 If you are trying to learn how to host a coagulator so that your friends can share voices, then this [coagulator quickstart guide](https://github.com/samtupy/star/blob/main/coagulator/readme.md) on STAR's github will help you do that.
 
@@ -137,6 +137,22 @@ Each provider has a --configure command line option. So if you run balcony.exe -
 You can then run balcony.exe or sam.exe standalone and the voices will be shared using the set configuration. It's common to create shortcuts to the providers and place them in the shell:startup location accessible from the run dialog, causing voices to be shared to a list of coagulators on system boot.
 
 ## Change log
+### Revision 4
+This update to STAR contains all changes to the project that have taken place over the last 4+ months, including a slightly better visual UI, more providers, the coagulator web frontend, security/stability and bugfixes.
+* Improves the visual layout for the user client UI, it's still very likely quite far from perfect.
+* New providers in the STAR source package: bestspeech / Keynote Gold, openai, elevenlabs, and googlecloud.
+* Though it still needs work, at least somewhat improved the consolidated render feature. Now at least all the clips get rendered and in order too, though it's still a bit slow and has weird resampling.
+* The coagulator now provides an http frontend and API as a lightweight alternative to the STAR client.
+* Fixed a bug in the balcony provider which could cause text containing quotes to be output through speakers!
+* Major provider stability improvements, from the ability to specify maximum concurrent requests to vastly improved synthesis cancelation to general robustness including 10mb default max packet size. Before this update, providers would easily crash if too much text was fed to it. Now they handle that situation much more gracefully.
+* Fix bug in user client which was causing render complete noise to be played on synthesis error.
+* The STAR repository now includes a script which requests permission for macsay to be able to access and provide your MacOS personal voices!
+* STAR can now handle audio in formats other than wav when required. For example some cloud services actually offer the best sounding quality as mp3 or vorbis, and it would just be a waste of bandwidth to deceptively decode to wav before providing.
+* Implemented default pitch and rate functionality into the provider, sets macsay's default rate to 195wpm.
+* Minor provider code cleanup, including reducing very noisy error output when connections can't be established.
+* Fixed user client not reporting synthesis errors sent from a provider.
+* Fixed broken SAPI4 voice selection when a SAPI4 and SAPI5 voice existed with the same name.
+* Minor documentation updates including correcting a misdocumented keyboard shortcut.
 ### Revision 3
 This is a major update to STAR which includes a complete user client rewrite and consequently the introduction of several useful features.
 * The user client was completely rewritten from scratch in python and WX Widgets, meaning that though feedback must still be gathered to make it look right or even to insure that controls are visible at all, the user client should soon  be able to be used without a screen reader within a couple of revisions!