multi-language support

paulparkinson · paulparkinson · commit 4c2a3da1d1bf · 2025-03-06T17:28:58.000-05:00
diff --git a/python-realtimeSTT-NL2SQL-TTS/src/RealtimeSTT_NL2SQL_TTS_AudioPlayback.py b/python-realtimeSTT-NL2SQL-TTS/src/RealtimeSTT_NL2SQL_TTS_AudioPlayback.py
@@ -12,25 +12,24 @@
 from oci.config import from_file
 from oci.auth.signers.security_token_signer import SecurityTokenSigner
 from oci.ai_speech_realtime import (
-    RealtimeClient,
-    RealtimeClientListener,
+    RealtimeSpeechClient,
+    RealtimeSpeechClientListener,
     RealtimeParameters,
 )
 from aiohttp import web
 
 from oci.ai_speech import AIServiceSpeechClient
 from oci.ai_speech.models import SynthesizeSpeechDetails
 
-#1 change speech imports to new version/name as current is deprecated
-#2 show thick db client example
-#3 use PCM as WAV is deprecated. code to play is unchanged eventhough changing from WAV to PCM
-
 latest_thetime = None
 latest_question = None
 latest_answer = None
 compartment_id = os.getenv('COMPARTMENT_ID')
 print(f"compartment_id: {compartment_id}")
 
+# If using thick mode/driver, do the following to load needed libraries...
+# (client can be downloaded from https://www.oracle.com/database/technologies/instant-client/winx64-64-downloads.html)
+# oracledb.init_oracle_client(lib_dir=r"C:\[path_to_instant_client]\instantclient_23_7")
 connection = oracledb.connect(
     user="moviestream",
     password="Welcome12345",
@@ -109,7 +108,7 @@ def play_audio(file_path):
         print(f"Error playing audio: {e}")
 
 
-class SpeechListener(RealtimeClientListener):
+class SpeechListener(RealtimeSpeechClientListener):
     def on_result(self, result):
         global cummulativeResult, isSelect, isNarrate, isShowSQL, isRunSQL, isExplainSQL, last_result_time
         if result["transcriptions"][0]["isFinal"]:
@@ -178,7 +177,7 @@ async def check_idle():
 
 
 def authenticator():
-    config = from_file("~/.oci/config", "MYSPEECHAIPROFILE")
+    config = from_file("~/.oci/config", "DEFAULT")
     with open(config["security_token_file"], "r") as f:
         token = f.readline()
     private_key = oci.signer.load_private_key_from_file(config["key_file"])
@@ -203,7 +202,7 @@ def executeSelectAI():
 
     query = """SELECT DBMS_CLOUD_AI.GENERATE(
                 prompt       => :prompt,
-                profile_name => 'AIHOLO', 
+                profile_name => 'GENAI',
                 action       => :action)
             FROM dual"""
 
@@ -223,32 +222,33 @@ def executeSelectAI():
 
             if selectai_action in ("showsql", "runsql", "explainsql"):
                 return
-            # API key-based authentication...
+            # API key-based authentication, using phoenix OCI Region - https://docs.oracle.com/en-us/iaas/Content/speech/using/speech.htm#ser-limits
             config = oci.config.from_file("~/.oci/config", "DEFAULT")
-            speech_client = AIServiceSpeechClient(config)
-
-            text_to_speech = SynthesizeSpeechDetails(
-                text=f" {latest_answer}",
-                is_stream_enabled=False,
-                configuration=oci.ai_speech.models.TtsOracleConfiguration(
-                    model_family="ORACLE",
-                    # Brian Annabelle Bob Stacy Phil Cindy Brad
-                    model_details=oci.ai_speech.models.TtsOracleTts2NaturalModelDetails(voice_id="Brian"),
-                    speech_settings=oci.ai_speech.models.TtsOracleSpeechSettings(
-                        speech_mark_types=["WORD"]
-                    ),
-                )
-            )
-
-            response = speech_client.synthesize_speech(synthesize_speech_details=text_to_speech)
-
-            with open("TTSoutput.wav", "wb") as audio_file:
-                audio_file.write(response.data.content)
-
-            print("Speech synthesis completed and saved as TTSoutput.wav")
+            ai_speech_client = oci.ai_speech.AIServiceSpeechClient(config)
+            synthesize_speech_response = ai_speech_client.synthesize_speech(
+                synthesize_speech_details=oci.ai_speech.models.SynthesizeSpeechDetails(
+                    text=f" {latest_answer}",
+                    is_stream_enabled=True,
+                    compartment_id=compartment_id,
+                    configuration=oci.ai_speech.models.TtsOracleConfiguration(
+                        model_family="ORACLE",
+                        model_details=oci.ai_speech.models.TtsOracleTts1StandardModelDetails(
+                            model_name="TTS_1_STANDARD",
+                            voice_id="Bob"),
+                        speech_settings=oci.ai_speech.models.TtsOracleSpeechSettings(
+                            text_type="SSML",
+                            sample_rate_in_hz=28000,
+                            output_format="PCM",
+                            speech_mark_types=["WORD"])),
+                    audio_config=oci.ai_speech.models.TtsBaseAudioConfig(
+                        config_type="BASE_AUDIO_CONFIG")
+                )       )
+            with open("TTSoutput.pcm", "wb") as audio_file:
+               audio_file.write(synthesize_speech_response.data.content)
+            print("Speech synthesis completed and saved as TTSoutput.pcm")
 
             # Play the generated speech
-            play_audio("TTSoutput.wav")
+            play_audio("TTSoutput.pcm")
 
     except Exception as e:
         print(f"An error occurred: {e}")
@@ -276,7 +276,7 @@ async def handle_request(request):
     realtime_speech_parameters.final_silence_threshold_in_ms = 2000
 
     realtime_speech_url = "wss://realtime.aiservice.us-phoenix-1.oci.oraclecloud.com"
-    client = RealtimeClient(
+    client = RealtimeSpeechClient(
         config=config,
         realtime_speech_parameters=realtime_speech_parameters,
         listener=SpeechListener(),
@@ -286,7 +286,7 @@ async def handle_request(request):
     )
 
     # Instance, resource principal, or session token-based authentication (as shown below) can also be used
-    # client = RealtimeClient(
+    # client = RealtimeSpeechClient(
     #     config=config,
     #     realtime_speech_parameters=realtime_speech_parameters,
     #     listener=SpeechListener(),