Skip to content

Commit faa6528

Browse files
committed
feat: Enhance speech-to-text transcription - Add detailed logging system - Fix language code to use en-US - Update description format - Add supported language codes to README
1 parent baa03dc commit faa6528

File tree

2 files changed

+106
-43
lines changed

2 files changed

+106
-43
lines changed

oci-subtitle-translation/README.md

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,25 @@ The solution supports translation to the following languages:
124124

125125
For an updated list of supported languages, refer to [the OCI Documentation](https://docs.oracle.com/en-us/iaas/language/using/translate.htm#supported-langs).
126126

127+
## Supported Language Codes
128+
129+
For the Speech-to-Text transcription service with GENERIC domain, the following language codes are supported:
130+
131+
| Language | Code |
132+
|----------|------|
133+
| US English | en-US |
134+
| British English | en-GB |
135+
| Australian English | en-AU |
136+
| Indian English | en-IN |
137+
| Spanish (Spain) | es-ES |
138+
| Brazilian Portuguese | pt-BR |
139+
| Hindi (India) | hi-IN |
140+
| French (France) | fr-FR |
141+
| German (Germany) | de-DE |
142+
| Italian (Italy) | it-IT |
143+
144+
Note: When using the service, make sure to use the exact language code format as shown above. Simple codes like 'en' or 'es' will not work.
145+
127146
## Contributing
128147

129148
This project is open source. Please submit your contributions by forking this repository and submitting a pull request! Oracle appreciates any contributions that are made by the open source community.
Lines changed: 87 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -1,59 +1,103 @@
11
# https://docs.oracle.com/en-us/iaas/api/#/en/speech/20220101/TranscriptionJob/CreateTranscriptionJob
22

33
import oci
4+
import yaml
5+
import argparse
6+
import sys
7+
from datetime import datetime
48

5-
# Create a default config using DEFAULT profile in default location
6-
# Refer to
7-
# https://docs.cloud.oracle.com/en-us/iaas/Content/API/Concepts/sdkconfig.htm#SDK_and_CLI_Configuration_File
8-
# for more info
9-
config = oci.config.from_file()
9+
def log_step(message, is_error=False):
10+
"""Print a formatted log message with timestamp"""
11+
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
12+
prefix = "ERROR" if is_error else "INFO"
13+
print(f"[{timestamp}] {prefix}: {message}")
1014

15+
# Parse command line arguments
16+
parser = argparse.ArgumentParser(description='Generate SRT file from audio using OCI Speech service')
17+
parser.add_argument('--input-file', required=True, help='Input audio file name in the configured bucket')
18+
args = parser.parse_args()
1119

12-
# Initialize service client with default config file
13-
ai_speech_client = oci.ai_speech.AIServiceSpeechClient(config)
20+
log_step(f"Starting transcription process for file: {args.input_file}")
21+
22+
# Create a default config using DEFAULT profile in default location
23+
try:
24+
config = oci.config.from_file()
25+
log_step("Successfully loaded OCI configuration")
26+
except Exception as e:
27+
log_step(f"Failed to load OCI configuration: {str(e)}", True)
28+
sys.exit(1)
1429

30+
# Initialize service client with default config file
31+
try:
32+
ai_speech_client = oci.ai_speech.AIServiceSpeechClient(config)
33+
log_step("Successfully initialized AI Speech client")
34+
except Exception as e:
35+
log_step(f"Failed to initialize AI Speech client: {str(e)}", True)
36+
sys.exit(1)
1537

1638
# Load config from yaml file
1739
def load_config():
1840
"""Load configuration from config.yaml"""
19-
with open('config.yaml', 'r') as f:
20-
return yaml.safe_load(f)
41+
try:
42+
with open('config.yaml', 'r') as f:
43+
config = yaml.safe_load(f)
44+
log_step("Successfully loaded config.yaml")
45+
log_step(f"Using bucket: {config['speech']['bucket_name']}")
46+
log_step(f"Using namespace: {config['speech']['namespace']}")
47+
return config
48+
except Exception as e:
49+
log_step(f"Failed to load config.yaml: {str(e)}", True)
50+
sys.exit(1)
2151

2252
config_yaml = load_config()
2353

24-
# Send the request to service, some parameters are not required, see API
25-
# doc for more info
26-
create_transcription_job_response = ai_speech_client.create_transcription_job(
27-
create_transcription_job_details=oci.ai_speech.models.CreateTranscriptionJobDetails(
28-
compartment_id=config_yaml['speech']['compartment_id'],
29-
input_location=oci.ai_speech.models.ObjectListFileInputLocation(
30-
location_type="OBJECT_LIST_FILE_INPUT_LOCATION",
31-
object_location=oci.ai_speech.models.ObjectLocation(
54+
# Send the request to service
55+
log_step("Creating transcription job with following settings:")
56+
log_step(f" • Input file: {args.input_file}")
57+
log_step(f" • Output format: SRT")
58+
log_step(f" • Language: en-US")
59+
log_step(f" • Diarization: Enabled (2 speakers)")
60+
log_step(f" • Profanity filter: Enabled (TAG mode)")
61+
62+
try:
63+
create_transcription_job_response = ai_speech_client.create_transcription_job(
64+
create_transcription_job_details=oci.ai_speech.models.CreateTranscriptionJobDetails(
65+
compartment_id=config_yaml['speech']['compartment_id'],
66+
input_location=oci.ai_speech.models.ObjectListFileInputLocation(
67+
location_type="OBJECT_LIST_FILE_INPUT_LOCATION",
68+
object_location=oci.ai_speech.models.ObjectLocation(
69+
namespace_name=config_yaml['speech']['namespace'],
70+
bucket_name=config_yaml['speech']['bucket_name'],
71+
object_names=[args.input_file])), # Fixed: Use actual input file name
72+
output_location=oci.ai_speech.models.OutputLocation(
3273
namespace_name=config_yaml['speech']['namespace'],
3374
bucket_name=config_yaml['speech']['bucket_name'],
34-
object_names=["FILE_NAMES"])),
35-
output_location=oci.ai_speech.models.OutputLocation(
36-
namespace_name=config_yaml['speech']['namespace'],
37-
bucket_name=config_yaml['speech']['bucket_name'],
38-
prefix="transcriptions"),
39-
display_name=f"Transcription_{args.input_file}",
40-
description=f"Transcription job for {args.input_file}",
41-
additional_transcription_formats=["SRT"],
42-
model_details=oci.ai_speech.models.TranscriptionModelDetails(
43-
domain="GENERIC",
44-
language_code="en",
45-
transcription_settings=oci.ai_speech.models.TranscriptionSettings(
46-
diarization=oci.ai_speech.models.Diarization(
47-
is_diarization_enabled=True,
48-
number_of_speakers=2))),
49-
normalization=oci.ai_speech.models.TranscriptionNormalization(
50-
is_punctuation_enabled=True,
51-
filters=[
52-
oci.ai_speech.models.ProfanityTranscriptionFilter(
53-
type="PROFANITY",
54-
mode="TAG")]),
55-
freeform_tags={},
56-
defined_tags={}))
57-
58-
# Get the data from response
59-
print(create_transcription_job_response.data)
75+
prefix="transcriptions"),
76+
display_name=f"Transcription_{args.input_file}",
77+
description=f"transcription_job_{args.input_file.replace('.', '_')}",
78+
additional_transcription_formats=["SRT"],
79+
model_details=oci.ai_speech.models.TranscriptionModelDetails(
80+
domain="GENERIC",
81+
language_code="en-US",
82+
transcription_settings=oci.ai_speech.models.TranscriptionSettings(
83+
diarization=oci.ai_speech.models.Diarization(
84+
is_diarization_enabled=True,
85+
number_of_speakers=2))),
86+
normalization=oci.ai_speech.models.TranscriptionNormalization(
87+
is_punctuation_enabled=True,
88+
filters=[
89+
oci.ai_speech.models.ProfanityTranscriptionFilter(
90+
type="PROFANITY",
91+
mode="TAG")]),
92+
freeform_tags={},
93+
defined_tags={}))
94+
95+
log_step("Successfully created transcription job")
96+
log_step("Job details:")
97+
log_step(f" • Job ID: {create_transcription_job_response.data.id}")
98+
log_step(f" • Status: {create_transcription_job_response.data.lifecycle_state}")
99+
log_step(f" • Output will be saved to: {config_yaml['speech']['bucket_name']}/transcriptions/")
100+
101+
except Exception as e:
102+
log_step(f"Failed to create transcription job: {str(e)}", True)
103+
sys.exit(1)

0 commit comments

Comments
 (0)