feat(tts): add streaming synthesis sample (#12642)

Jacoblevy1999 · glasnt · web-flow · commit e134962ca6c0 · 2024-10-03T07:36:34.000+10:00
* Create streaming_tts_quickstart.py

Add quickstart documentation for new streaming synthesize API

* Update streaming_tts_quickstart.py

* Update streaming_tts_quickstart.py

* Update streaming_tts_quickstart.py

* Update streaming_tts_quickstart.py

* Update streaming_tts_quickstart.py

* Update texttospeech/snippets/streaming_tts_quickstart.py

Co-authored-by: Katie McLaughlin &lt;katie@glasnt.com&gt;

* Update texttospeech/snippets/streaming_tts_quickstart.py

Co-authored-by: Katie McLaughlin &lt;katie@glasnt.com&gt;

* Update texttospeech/snippets/streaming_tts_quickstart.py

Co-authored-by: Katie McLaughlin &lt;katie@glasnt.com&gt;

* Update texttospeech/snippets/streaming_tts_quickstart.py

Co-authored-by: Katie McLaughlin &lt;katie@glasnt.com&gt;

* Update streaming_tts_quickstart.py

resolving some comments from PR

* Update streaming_tts_quickstart.py

responding to comments in PR

* Update streaming_tts_quickstart.py

remove whitespace

* Create streaming_tts_quickstart_test.py

add test

* Update streaming_tts_quickstart_test.py

* Update streaming_tts_quickstart_test.py

* Update requirements.txt

* Update streaming_tts_quickstart.py

* Update streaming_tts_quickstart.py

---------

Co-authored-by: Katie McLaughlin &lt;katie@glasnt.com&gt;
diff --git a/texttospeech/snippets/requirements.txt b/texttospeech/snippets/requirements.txt
@@ -1,3 +1,3 @@
 future==1.0.0
-google-cloud-texttospeech==2.14.1
+google-cloud-texttospeech==2.17.2
 google-cloud-storage==2.9.0
diff --git a/texttospeech/snippets/streaming_tts_quickstart.py b/texttospeech/snippets/streaming_tts_quickstart.py
@@ -0,0 +1,53 @@
+#!/usr/bin/env python
+# Copyright 2024 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""Google Cloud Text-To-Speech API streaming sample application .
+
+Example usage:
+    python streaming_tts_quickstart.py
+"""
+
+
+def run_streaming_tts_quickstart():
+    # [START tts_synthezise_streaming]
+    """Synthesizes speech from a stream of input text.
+    """
+    from google.cloud import texttospeech
+    import itertools
+
+    client = texttospeech.TextToSpeechClient()
+
+    # See https://cloud.google.com/text-to-speech/docs/voices for all voices.
+    streaming_config = texttospeech.StreamingSynthesizeConfig(voice=texttospeech.VoiceSelectionParams(name="en-US-Journey-D", language_code="en-US"))
+
+    # Set the config for your stream. The first request must contain your config, and then each subsequent request must contain text.
+    config_request = texttospeech.StreamingSynthesizeRequest(streaming_config=streaming_config)
+
+    # Request generator. Consider using Gemini or another LLM with output streaming as a generator.
+    def request_generator():
+        yield texttospeech.StreamingSynthesizeRequest(input=texttospeech.StreamingSynthesisInput(text="Hello there. "))
+        yield texttospeech.StreamingSynthesizeRequest(input=texttospeech.StreamingSynthesisInput(text="How are you "))
+        yield texttospeech.StreamingSynthesizeRequest(input=texttospeech.StreamingSynthesisInput(text="today? It's "))
+        yield texttospeech.StreamingSynthesizeRequest(input=texttospeech.StreamingSynthesisInput(text="such nice weather outside."))
+
+    streaming_responses = client.streaming_synthesize(itertools.chain([config_request], request_generator()))
+    for response in streaming_responses:
+        print(f"Audio content size in bytes is: {len(response.audio_content)}")
+    # [END tts_synthezise_streaming]
+
+
+if __name__ == "__main__":
+    run_streaming_tts_quickstart()
diff --git a/texttospeech/snippets/streaming_tts_quickstart_test.py b/texttospeech/snippets/streaming_tts_quickstart_test.py
@@ -0,0 +1,23 @@
+#!/usr/bin/env python
+# Copyright 2024 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import streaming_tts_quickstart
+
+
+def test_streaming_synthesize(capsys):
+    streaming_tts_quickstart.run_streaming_tts_quickstart()
+
+    out, err = capsys.readouterr()
+    assert "Audio content size in bytes is" in out