12
12
from oci .config import from_file
13
13
from oci .auth .signers .security_token_signer import SecurityTokenSigner
14
14
from oci .ai_speech_realtime import (
15
- RealtimeClient ,
16
- RealtimeClientListener ,
15
+ RealtimeSpeechClient ,
16
+ RealtimeSpeechClientListener ,
17
17
RealtimeParameters ,
18
18
)
19
19
from aiohttp import web
20
20
21
21
from oci .ai_speech import AIServiceSpeechClient
22
22
from oci .ai_speech .models import SynthesizeSpeechDetails
23
23
24
- #1 change speech imports to new version/name as current is deprecated
25
- #2 show thick db client example
26
- #3 use PCM as WAV is deprecated. code to play is unchanged eventhough changing from WAV to PCM
27
-
28
24
latest_thetime = None
29
25
latest_question = None
30
26
latest_answer = None
31
27
compartment_id = os .getenv ('COMPARTMENT_ID' )
32
28
print (f"compartment_id: { compartment_id } " )
33
29
30
+ # If using thick mode/driver, do the following to load needed libraries...
31
+ # (client can be downloaded from https://www.oracle.com/database/technologies/instant-client/winx64-64-downloads.html)
32
+ # oracledb.init_oracle_client(lib_dir=r"C:\[path_to_instant_client]\instantclient_23_7")
34
33
connection = oracledb .connect (
35
34
user = "moviestream" ,
36
35
password = "Welcome12345" ,
@@ -109,7 +108,7 @@ def play_audio(file_path):
109
108
print (f"Error playing audio: { e } " )
110
109
111
110
112
- class SpeechListener (RealtimeClientListener ):
111
+ class SpeechListener (RealtimeSpeechClientListener ):
113
112
def on_result (self , result ):
114
113
global cummulativeResult , isSelect , isNarrate , isShowSQL , isRunSQL , isExplainSQL , last_result_time
115
114
if result ["transcriptions" ][0 ]["isFinal" ]:
@@ -178,7 +177,7 @@ async def check_idle():
178
177
179
178
180
179
def authenticator ():
181
- config = from_file ("~/.oci/config" , "MYSPEECHAIPROFILE " )
180
+ config = from_file ("~/.oci/config" , "DEFAULT " )
182
181
with open (config ["security_token_file" ], "r" ) as f :
183
182
token = f .readline ()
184
183
private_key = oci .signer .load_private_key_from_file (config ["key_file" ])
@@ -203,7 +202,7 @@ def executeSelectAI():
203
202
204
203
query = """SELECT DBMS_CLOUD_AI.GENERATE(
205
204
prompt => :prompt,
206
- profile_name => 'AIHOLO',
205
+ profile_name => 'GENAI',
207
206
action => :action)
208
207
FROM dual"""
209
208
@@ -223,32 +222,33 @@ def executeSelectAI():
223
222
224
223
if selectai_action in ("showsql" , "runsql" , "explainsql" ):
225
224
return
226
- # API key-based authentication...
225
+ # API key-based authentication, using phoenix OCI Region - https://docs.oracle.com/en-us/iaas/Content/speech/using/speech.htm#ser-limits
227
226
config = oci .config .from_file ("~/.oci/config" , "DEFAULT" )
228
- speech_client = AIServiceSpeechClient (config )
229
-
230
- text_to_speech = SynthesizeSpeechDetails (
231
- text = f" { latest_answer } " ,
232
- is_stream_enabled = False ,
233
- configuration = oci .ai_speech .models .TtsOracleConfiguration (
234
- model_family = "ORACLE" ,
235
- # Brian Annabelle Bob Stacy Phil Cindy Brad
236
- model_details = oci .ai_speech .models .TtsOracleTts2NaturalModelDetails (voice_id = "Brian" ),
237
- speech_settings = oci .ai_speech .models .TtsOracleSpeechSettings (
238
- speech_mark_types = ["WORD" ]
239
- ),
240
- )
241
- )
242
-
243
- response = speech_client .synthesize_speech (synthesize_speech_details = text_to_speech )
244
-
245
- with open ("TTSoutput.wav" , "wb" ) as audio_file :
246
- audio_file .write (response .data .content )
247
-
248
- print ("Speech synthesis completed and saved as TTSoutput.wav" )
227
+ ai_speech_client = oci .ai_speech .AIServiceSpeechClient (config )
228
+ synthesize_speech_response = ai_speech_client .synthesize_speech (
229
+ synthesize_speech_details = oci .ai_speech .models .SynthesizeSpeechDetails (
230
+ text = f" { latest_answer } " ,
231
+ is_stream_enabled = True ,
232
+ compartment_id = compartment_id ,
233
+ configuration = oci .ai_speech .models .TtsOracleConfiguration (
234
+ model_family = "ORACLE" ,
235
+ model_details = oci .ai_speech .models .TtsOracleTts1StandardModelDetails (
236
+ model_name = "TTS_1_STANDARD" ,
237
+ voice_id = "Bob" ),
238
+ speech_settings = oci .ai_speech .models .TtsOracleSpeechSettings (
239
+ text_type = "SSML" ,
240
+ sample_rate_in_hz = 28000 ,
241
+ output_format = "PCM" ,
242
+ speech_mark_types = ["WORD" ])),
243
+ audio_config = oci .ai_speech .models .TtsBaseAudioConfig (
244
+ config_type = "BASE_AUDIO_CONFIG" )
245
+ ) )
246
+ with open ("TTSoutput.pcm" , "wb" ) as audio_file :
247
+ audio_file .write (synthesize_speech_response .data .content )
248
+ print ("Speech synthesis completed and saved as TTSoutput.pcm" )
249
249
250
250
# Play the generated speech
251
- play_audio ("TTSoutput.wav " )
251
+ play_audio ("TTSoutput.pcm " )
252
252
253
253
except Exception as e :
254
254
print (f"An error occurred: { e } " )
@@ -276,7 +276,7 @@ async def handle_request(request):
276
276
realtime_speech_parameters .final_silence_threshold_in_ms = 2000
277
277
278
278
realtime_speech_url = "wss://realtime.aiservice.us-phoenix-1.oci.oraclecloud.com"
279
- client = RealtimeClient (
279
+ client = RealtimeSpeechClient (
280
280
config = config ,
281
281
realtime_speech_parameters = realtime_speech_parameters ,
282
282
listener = SpeechListener (),
@@ -286,7 +286,7 @@ async def handle_request(request):
286
286
)
287
287
288
288
# Instance, resource principal, or session token-based authentication (as shown below) can also be used
289
- # client = RealtimeClient (
289
+ # client = RealtimeSpeechClient (
290
290
# config=config,
291
291
# realtime_speech_parameters=realtime_speech_parameters,
292
292
# listener=SpeechListener(),
0 commit comments