11
11
from riva .client .proto .riva_audio_pb2 import AudioEncoding
12
12
import wave
13
13
14
+ def add_custom_dictionary_to_config (req , custom_dictionary ):
15
+ result_list = [f"{ key } { value } " for key , value in custom_dictionary .items ()]
16
+ result_string = ',' .join (result_list )
17
+ req .custom_dictionary = result_string
18
+
14
19
class SpeechSynthesisService :
15
20
"""
16
21
A class for synthesizing speech from text. Provides :meth:`synthesize` which returns entire audio for a text
@@ -38,6 +43,7 @@ def synthesize(
38
43
audio_prompt_encoding : AudioEncoding = AudioEncoding .LINEAR_PCM ,
39
44
quality : int = 20 ,
40
45
future : bool = False ,
46
+ custom_dictionary : Optional [dict ] = None ,
41
47
) -> Union [rtts .SynthesizeSpeechResponse , _MultiThreadedRendezvous ]:
42
48
"""
43
49
Synthesizes an entire audio for text :param:`text`.
@@ -56,6 +62,7 @@ def synthesize(
56
62
audio but also takes longer to generate the audio. Ranges between 1-40.
57
63
future (:obj:`bool`, defaults to :obj:`False`): Whether to return an async result instead of usual
58
64
response. You can get a response by calling ``result()`` method of the future object.
65
+ custom_dictionary (:obj:`dict`, `optional`): Dictionary with key-value pair containing grapheme and corresponding phoneme
59
66
60
67
Returns:
61
68
:obj:`Union[riva.client.proto.riva_tts_pb2.SynthesizeSpeechResponse, grpc._channel._MultiThreadedRendezvous]`:
@@ -81,6 +88,8 @@ def synthesize(
81
88
req .zero_shot_data .encoding = audio_prompt_encoding
82
89
req .zero_shot_data .quality = quality
83
90
91
+ add_custom_dictionary_to_config (req , custom_dictionary )
92
+
84
93
func = self .stub .Synthesize .future if future else self .stub .Synthesize
85
94
return func (req , metadata = self .auth .get_auth_metadata ())
86
95
@@ -94,6 +103,7 @@ def synthesize_online(
94
103
audio_prompt_file : Optional [str ] = None ,
95
104
audio_prompt_encoding : AudioEncoding = AudioEncoding .LINEAR_PCM ,
96
105
quality : int = 20 ,
106
+ custom_dictionary : Optional [dict ] = None ,
97
107
) -> Generator [rtts .SynthesizeSpeechResponse , None , None ]:
98
108
"""
99
109
Synthesizes and yields output audio chunks for text :param:`text` as the chunks
@@ -111,6 +121,7 @@ def synthesize_online(
111
121
audio_prompt_encoding: (:obj:`AudioEncoding`): Encoding of audio prompt file, e.g. ``AudioEncoding.LINEAR_PCM``.
112
122
quality: (:obj:`int`): This defines the number of times decoder is run. Higher number improves quality of generated
113
123
audio but also takes longer to generate the audio. Ranges between 1-40.
124
+ custom_dictionary (:obj:`dict`, `optional`): Dictionary with key-value pair containing grapheme and corresponding phoneme
114
125
115
126
Yields:
116
127
:obj:`riva.client.proto.riva_tts_pb2.SynthesizeSpeechResponse`: a response with output. You may find
@@ -138,4 +149,6 @@ def synthesize_online(
138
149
req .zero_shot_data .encoding = audio_prompt_encoding
139
150
req .zero_shot_data .quality = quality
140
151
152
+ add_custom_dictionary_to_config (req , custom_dictionary )
153
+
141
154
return self .stub .SynthesizeOnline (req , metadata = self .auth .get_auth_metadata ())
0 commit comments