Skip to content

Commit 4c2a3da

Browse files
committed
multi-language support
1 parent e491316 commit 4c2a3da

File tree

1 file changed

+34
-34
lines changed

1 file changed

+34
-34
lines changed

python-realtimeSTT-NL2SQL-TTS/src/RealtimeSTT_NL2SQL_TTS_AudioPlayback.py

Lines changed: 34 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -12,25 +12,24 @@
1212
from oci.config import from_file
1313
from oci.auth.signers.security_token_signer import SecurityTokenSigner
1414
from oci.ai_speech_realtime import (
15-
RealtimeClient,
16-
RealtimeClientListener,
15+
RealtimeSpeechClient,
16+
RealtimeSpeechClientListener,
1717
RealtimeParameters,
1818
)
1919
from aiohttp import web
2020

2121
from oci.ai_speech import AIServiceSpeechClient
2222
from oci.ai_speech.models import SynthesizeSpeechDetails
2323

24-
#1 change speech imports to new version/name as current is deprecated
25-
#2 show thick db client example
26-
#3 use PCM as WAV is deprecated. code to play is unchanged eventhough changing from WAV to PCM
27-
2824
latest_thetime = None
2925
latest_question = None
3026
latest_answer = None
3127
compartment_id = os.getenv('COMPARTMENT_ID')
3228
print(f"compartment_id: {compartment_id}")
3329

30+
# If using thick mode/driver, do the following to load needed libraries...
31+
# (client can be downloaded from https://www.oracle.com/database/technologies/instant-client/winx64-64-downloads.html)
32+
# oracledb.init_oracle_client(lib_dir=r"C:\[path_to_instant_client]\instantclient_23_7")
3433
connection = oracledb.connect(
3534
user="moviestream",
3635
password="Welcome12345",
@@ -109,7 +108,7 @@ def play_audio(file_path):
109108
print(f"Error playing audio: {e}")
110109

111110

112-
class SpeechListener(RealtimeClientListener):
111+
class SpeechListener(RealtimeSpeechClientListener):
113112
def on_result(self, result):
114113
global cummulativeResult, isSelect, isNarrate, isShowSQL, isRunSQL, isExplainSQL, last_result_time
115114
if result["transcriptions"][0]["isFinal"]:
@@ -178,7 +177,7 @@ async def check_idle():
178177

179178

180179
def authenticator():
181-
config = from_file("~/.oci/config", "MYSPEECHAIPROFILE")
180+
config = from_file("~/.oci/config", "DEFAULT")
182181
with open(config["security_token_file"], "r") as f:
183182
token = f.readline()
184183
private_key = oci.signer.load_private_key_from_file(config["key_file"])
@@ -203,7 +202,7 @@ def executeSelectAI():
203202

204203
query = """SELECT DBMS_CLOUD_AI.GENERATE(
205204
prompt => :prompt,
206-
profile_name => 'AIHOLO',
205+
profile_name => 'GENAI',
207206
action => :action)
208207
FROM dual"""
209208

@@ -223,32 +222,33 @@ def executeSelectAI():
223222

224223
if selectai_action in ("showsql", "runsql", "explainsql"):
225224
return
226-
# API key-based authentication...
225+
# API key-based authentication, using phoenix OCI Region - https://docs.oracle.com/en-us/iaas/Content/speech/using/speech.htm#ser-limits
227226
config = oci.config.from_file("~/.oci/config", "DEFAULT")
228-
speech_client = AIServiceSpeechClient(config)
229-
230-
text_to_speech = SynthesizeSpeechDetails(
231-
text=f" {latest_answer}",
232-
is_stream_enabled=False,
233-
configuration=oci.ai_speech.models.TtsOracleConfiguration(
234-
model_family="ORACLE",
235-
# Brian Annabelle Bob Stacy Phil Cindy Brad
236-
model_details=oci.ai_speech.models.TtsOracleTts2NaturalModelDetails(voice_id="Brian"),
237-
speech_settings=oci.ai_speech.models.TtsOracleSpeechSettings(
238-
speech_mark_types=["WORD"]
239-
),
240-
)
241-
)
242-
243-
response = speech_client.synthesize_speech(synthesize_speech_details=text_to_speech)
244-
245-
with open("TTSoutput.wav", "wb") as audio_file:
246-
audio_file.write(response.data.content)
247-
248-
print("Speech synthesis completed and saved as TTSoutput.wav")
227+
ai_speech_client = oci.ai_speech.AIServiceSpeechClient(config)
228+
synthesize_speech_response = ai_speech_client.synthesize_speech(
229+
synthesize_speech_details=oci.ai_speech.models.SynthesizeSpeechDetails(
230+
text=f" {latest_answer}",
231+
is_stream_enabled=True,
232+
compartment_id=compartment_id,
233+
configuration=oci.ai_speech.models.TtsOracleConfiguration(
234+
model_family="ORACLE",
235+
model_details=oci.ai_speech.models.TtsOracleTts1StandardModelDetails(
236+
model_name="TTS_1_STANDARD",
237+
voice_id="Bob"),
238+
speech_settings=oci.ai_speech.models.TtsOracleSpeechSettings(
239+
text_type="SSML",
240+
sample_rate_in_hz=28000,
241+
output_format="PCM",
242+
speech_mark_types=["WORD"])),
243+
audio_config=oci.ai_speech.models.TtsBaseAudioConfig(
244+
config_type="BASE_AUDIO_CONFIG")
245+
) )
246+
with open("TTSoutput.pcm", "wb") as audio_file:
247+
audio_file.write(synthesize_speech_response.data.content)
248+
print("Speech synthesis completed and saved as TTSoutput.pcm")
249249

250250
# Play the generated speech
251-
play_audio("TTSoutput.wav")
251+
play_audio("TTSoutput.pcm")
252252

253253
except Exception as e:
254254
print(f"An error occurred: {e}")
@@ -276,7 +276,7 @@ async def handle_request(request):
276276
realtime_speech_parameters.final_silence_threshold_in_ms = 2000
277277

278278
realtime_speech_url = "wss://realtime.aiservice.us-phoenix-1.oci.oraclecloud.com"
279-
client = RealtimeClient(
279+
client = RealtimeSpeechClient(
280280
config=config,
281281
realtime_speech_parameters=realtime_speech_parameters,
282282
listener=SpeechListener(),
@@ -286,7 +286,7 @@ async def handle_request(request):
286286
)
287287

288288
# Instance, resource principal, or session token-based authentication (as shown below) can also be used
289-
# client = RealtimeClient(
289+
# client = RealtimeSpeechClient(
290290
# config=config,
291291
# realtime_speech_parameters=realtime_speech_parameters,
292292
# listener=SpeechListener(),

0 commit comments

Comments
 (0)