Skip to content

Commit 4999fd8

Browse files
feat: [google-cloud-texttospeech] StreamingSynthesize now supports opus (#13370)
- [ ] Regenerate this pull request now. PiperOrigin-RevId: 707168599 Source-Link: googleapis/googleapis@d985436 Source-Link: googleapis/googleapis-gen@3fcc3af Copy-Tag: eyJwIjoicGFja2FnZXMvZ29vZ2xlLWNsb3VkLXRleHR0b3NwZWVjaC8uT3dsQm90LnlhbWwiLCJoIjoiM2ZjYzNhZmJmOGM5MjA4NGNjNGEzMDIzMmE3NmNhMjQ3NDg5YzNkMCJ9 --------- Co-authored-by: Owl Bot <gcf-owl-bot[bot]@users.noreply.github.com> Co-authored-by: Victor Chudnovsky <[email protected]>
1 parent 0c0f37d commit 4999fd8

16 files changed

+56
-13
lines changed

packages/google-cloud-texttospeech/google/cloud/texttospeech/gapic_version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,4 +13,4 @@
1313
# See the License for the specific language governing permissions and
1414
# limitations under the License.
1515
#
16-
__version__ = "2.22.0" # {x-release-please-version}
16+
__version__ = "0.0.0" # {x-release-please-version}

packages/google-cloud-texttospeech/google/cloud/texttospeech_v1/gapic_version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,4 +13,4 @@
1313
# See the License for the specific language governing permissions and
1414
# limitations under the License.
1515
#
16-
__version__ = "2.22.0" # {x-release-please-version}
16+
__version__ = "0.0.0" # {x-release-please-version}

packages/google-cloud-texttospeech/google/cloud/texttospeech_v1beta1/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
ListVoicesResponse,
3535
MultiSpeakerMarkup,
3636
SsmlVoiceGender,
37+
StreamingAudioConfig,
3738
StreamingSynthesisInput,
3839
StreamingSynthesizeConfig,
3940
StreamingSynthesizeRequest,
@@ -65,6 +66,7 @@
6566
"ListVoicesResponse",
6667
"MultiSpeakerMarkup",
6768
"SsmlVoiceGender",
69+
"StreamingAudioConfig",
6870
"StreamingSynthesisInput",
6971
"StreamingSynthesizeConfig",
7072
"StreamingSynthesizeRequest",

packages/google-cloud-texttospeech/google/cloud/texttospeech_v1beta1/gapic_version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,4 +13,4 @@
1313
# See the License for the specific language governing permissions and
1414
# limitations under the License.
1515
#
16-
__version__ = "2.22.0" # {x-release-please-version}
16+
__version__ = "0.0.0" # {x-release-please-version}

packages/google-cloud-texttospeech/google/cloud/texttospeech_v1beta1/services/text_to_speech/async_client.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -434,7 +434,7 @@ async def sample_synthesize_speech():
434434
voice.language_code = "language_code_value"
435435
436436
audio_config = texttospeech_v1beta1.AudioConfig()
437-
audio_config.audio_encoding = "ALAW"
437+
audio_config.audio_encoding = "PCM"
438438
439439
request = texttospeech_v1beta1.SynthesizeSpeechRequest(
440440
input=input,

packages/google-cloud-texttospeech/google/cloud/texttospeech_v1beta1/services/text_to_speech/client.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -823,7 +823,7 @@ def sample_synthesize_speech():
823823
voice.language_code = "language_code_value"
824824
825825
audio_config = texttospeech_v1beta1.AudioConfig()
826-
audio_config.audio_encoding = "ALAW"
826+
audio_config.audio_encoding = "PCM"
827827
828828
request = texttospeech_v1beta1.SynthesizeSpeechRequest(
829829
input=input,

packages/google-cloud-texttospeech/google/cloud/texttospeech_v1beta1/services/text_to_speech_long_audio_synthesize/async_client.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -339,7 +339,7 @@ async def sample_synthesize_long_audio():
339339
input.text = "text_value"
340340
341341
audio_config = texttospeech_v1beta1.AudioConfig()
342-
audio_config.audio_encoding = "ALAW"
342+
audio_config.audio_encoding = "PCM"
343343
344344
voice = texttospeech_v1beta1.VoiceSelectionParams()
345345
voice.language_code = "language_code_value"

packages/google-cloud-texttospeech/google/cloud/texttospeech_v1beta1/services/text_to_speech_long_audio_synthesize/client.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -738,7 +738,7 @@ def sample_synthesize_long_audio():
738738
input.text = "text_value"
739739
740740
audio_config = texttospeech_v1beta1.AudioConfig()
741-
audio_config.audio_encoding = "ALAW"
741+
audio_config.audio_encoding = "PCM"
742742
743743
voice = texttospeech_v1beta1.VoiceSelectionParams()
744744
voice.language_code = "language_code_value"

packages/google-cloud-texttospeech/google/cloud/texttospeech_v1beta1/types/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
ListVoicesResponse,
2525
MultiSpeakerMarkup,
2626
SsmlVoiceGender,
27+
StreamingAudioConfig,
2728
StreamingSynthesisInput,
2829
StreamingSynthesizeConfig,
2930
StreamingSynthesizeRequest,
@@ -51,6 +52,7 @@
5152
"ListVoicesRequest",
5253
"ListVoicesResponse",
5354
"MultiSpeakerMarkup",
55+
"StreamingAudioConfig",
5456
"StreamingSynthesisInput",
5557
"StreamingSynthesizeConfig",
5658
"StreamingSynthesizeRequest",

packages/google-cloud-texttospeech/google/cloud/texttospeech_v1beta1/types/cloud_tts.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
"VoiceCloneParams",
4040
"SynthesizeSpeechResponse",
4141
"Timepoint",
42+
"StreamingAudioConfig",
4243
"StreamingSynthesizeConfig",
4344
"StreamingSynthesisInput",
4445
"StreamingSynthesizeRequest",
@@ -106,6 +107,11 @@ class AudioEncoding(proto.Enum):
106107
8-bit samples that compand 14-bit audio
107108
samples using G.711 PCMU/A-law. Audio content
108109
returned as ALAW also contains a WAV header.
110+
PCM (7):
111+
Uncompressed 16-bit signed little-endian
112+
samples (Linear PCM). Note that as opposed to
113+
LINEAR16, audio will not be wrapped in a WAV (or
114+
any other) header.
109115
"""
110116
AUDIO_ENCODING_UNSPECIFIED = 0
111117
LINEAR16 = 1
@@ -114,6 +120,7 @@ class AudioEncoding(proto.Enum):
114120
OGG_OPUS = 3
115121
MULAW = 5
116122
ALAW = 6
123+
PCM = 7
117124

118125

119126
class ListVoicesRequest(proto.Message):
@@ -723,6 +730,30 @@ class Timepoint(proto.Message):
723730
)
724731

725732

733+
class StreamingAudioConfig(proto.Message):
734+
r"""Description of the desired output audio data.
735+
736+
Attributes:
737+
audio_encoding (google.cloud.texttospeech_v1beta1.types.AudioEncoding):
738+
Required. The format of the audio byte stream. For now,
739+
streaming only supports PCM and OGG_OPUS. All other
740+
encodings will return an error.
741+
sample_rate_hertz (int):
742+
Optional. The synthesis sample rate (in
743+
hertz) for this audio.
744+
"""
745+
746+
audio_encoding: "AudioEncoding" = proto.Field(
747+
proto.ENUM,
748+
number=1,
749+
enum="AudioEncoding",
750+
)
751+
sample_rate_hertz: int = proto.Field(
752+
proto.INT32,
753+
number=2,
754+
)
755+
756+
726757
class StreamingSynthesizeConfig(proto.Message):
727758
r"""Provides configuration information for the
728759
StreamingSynthesize request.
@@ -731,13 +762,21 @@ class StreamingSynthesizeConfig(proto.Message):
731762
voice (google.cloud.texttospeech_v1beta1.types.VoiceSelectionParams):
732763
Required. The desired voice of the
733764
synthesized audio.
765+
streaming_audio_config (google.cloud.texttospeech_v1beta1.types.StreamingAudioConfig):
766+
Optional. The configuration of the
767+
synthesized audio.
734768
"""
735769

736770
voice: "VoiceSelectionParams" = proto.Field(
737771
proto.MESSAGE,
738772
number=1,
739773
message="VoiceSelectionParams",
740774
)
775+
streaming_audio_config: "StreamingAudioConfig" = proto.Field(
776+
proto.MESSAGE,
777+
number=4,
778+
message="StreamingAudioConfig",
779+
)
741780

742781

743782
class StreamingSynthesisInput(proto.Message):

0 commit comments

Comments
 (0)