|
1 | 1 | # https://docs.oracle.com/en-us/iaas/api/#/en/speech/20220101/TranscriptionJob/CreateTranscriptionJob
|
2 | 2 |
|
3 | 3 | import oci
|
| 4 | +import yaml |
| 5 | +import argparse |
| 6 | +import sys |
| 7 | +from datetime import datetime |
4 | 8 |
|
5 |
| -# Create a default config using DEFAULT profile in default location |
6 |
| -# Refer to |
7 |
| -# https://docs.cloud.oracle.com/en-us/iaas/Content/API/Concepts/sdkconfig.htm#SDK_and_CLI_Configuration_File |
8 |
| -# for more info |
9 |
| -config = oci.config.from_file() |
| 9 | +def log_step(message, is_error=False): |
| 10 | + """Print a formatted log message with timestamp""" |
| 11 | + timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S") |
| 12 | + prefix = "ERROR" if is_error else "INFO" |
| 13 | + print(f"[{timestamp}] {prefix}: {message}") |
10 | 14 |
|
| 15 | +# Parse command line arguments |
| 16 | +parser = argparse.ArgumentParser(description='Generate SRT file from audio using OCI Speech service') |
| 17 | +parser.add_argument('--input-file', required=True, help='Input audio file name in the configured bucket') |
| 18 | +args = parser.parse_args() |
11 | 19 |
|
12 |
| -# Initialize service client with default config file |
13 |
| -ai_speech_client = oci.ai_speech.AIServiceSpeechClient(config) |
| 20 | +log_step(f"Starting transcription process for file: {args.input_file}") |
| 21 | + |
| 22 | +# Create a default config using DEFAULT profile in default location |
| 23 | +try: |
| 24 | + config = oci.config.from_file() |
| 25 | + log_step("Successfully loaded OCI configuration") |
| 26 | +except Exception as e: |
| 27 | + log_step(f"Failed to load OCI configuration: {str(e)}", True) |
| 28 | + sys.exit(1) |
14 | 29 |
|
| 30 | +# Initialize service client with default config file |
| 31 | +try: |
| 32 | + ai_speech_client = oci.ai_speech.AIServiceSpeechClient(config) |
| 33 | + log_step("Successfully initialized AI Speech client") |
| 34 | +except Exception as e: |
| 35 | + log_step(f"Failed to initialize AI Speech client: {str(e)}", True) |
| 36 | + sys.exit(1) |
15 | 37 |
|
16 | 38 | # Load config from yaml file
|
17 | 39 | def load_config():
|
18 | 40 | """Load configuration from config.yaml"""
|
19 |
| - with open('config.yaml', 'r') as f: |
20 |
| - return yaml.safe_load(f) |
| 41 | + try: |
| 42 | + with open('config.yaml', 'r') as f: |
| 43 | + config = yaml.safe_load(f) |
| 44 | + log_step("Successfully loaded config.yaml") |
| 45 | + log_step(f"Using bucket: {config['speech']['bucket_name']}") |
| 46 | + log_step(f"Using namespace: {config['speech']['namespace']}") |
| 47 | + return config |
| 48 | + except Exception as e: |
| 49 | + log_step(f"Failed to load config.yaml: {str(e)}", True) |
| 50 | + sys.exit(1) |
21 | 51 |
|
22 | 52 | config_yaml = load_config()
|
23 | 53 |
|
24 |
| -# Send the request to service, some parameters are not required, see API |
25 |
| -# doc for more info |
26 |
| -create_transcription_job_response = ai_speech_client.create_transcription_job( |
27 |
| - create_transcription_job_details=oci.ai_speech.models.CreateTranscriptionJobDetails( |
28 |
| - compartment_id=config_yaml['speech']['compartment_id'], |
29 |
| - input_location=oci.ai_speech.models.ObjectListFileInputLocation( |
30 |
| - location_type="OBJECT_LIST_FILE_INPUT_LOCATION", |
31 |
| - object_location=oci.ai_speech.models.ObjectLocation( |
| 54 | +# Send the request to service |
| 55 | +log_step("Creating transcription job with following settings:") |
| 56 | +log_step(f" • Input file: {args.input_file}") |
| 57 | +log_step(f" • Output format: SRT") |
| 58 | +log_step(f" • Language: en-US") |
| 59 | +log_step(f" • Diarization: Enabled (2 speakers)") |
| 60 | +log_step(f" • Profanity filter: Enabled (TAG mode)") |
| 61 | + |
| 62 | +try: |
| 63 | + create_transcription_job_response = ai_speech_client.create_transcription_job( |
| 64 | + create_transcription_job_details=oci.ai_speech.models.CreateTranscriptionJobDetails( |
| 65 | + compartment_id=config_yaml['speech']['compartment_id'], |
| 66 | + input_location=oci.ai_speech.models.ObjectListFileInputLocation( |
| 67 | + location_type="OBJECT_LIST_FILE_INPUT_LOCATION", |
| 68 | + object_location=oci.ai_speech.models.ObjectLocation( |
| 69 | + namespace_name=config_yaml['speech']['namespace'], |
| 70 | + bucket_name=config_yaml['speech']['bucket_name'], |
| 71 | + object_names=[args.input_file])), # Fixed: Use actual input file name |
| 72 | + output_location=oci.ai_speech.models.OutputLocation( |
32 | 73 | namespace_name=config_yaml['speech']['namespace'],
|
33 | 74 | bucket_name=config_yaml['speech']['bucket_name'],
|
34 |
| - object_names=["FILE_NAMES"])), |
35 |
| - output_location=oci.ai_speech.models.OutputLocation( |
36 |
| - namespace_name=config_yaml['speech']['namespace'], |
37 |
| - bucket_name=config_yaml['speech']['bucket_name'], |
38 |
| - prefix="transcriptions"), |
39 |
| - display_name=f"Transcription_{args.input_file}", |
40 |
| - description=f"Transcription job for {args.input_file}", |
41 |
| - additional_transcription_formats=["SRT"], |
42 |
| - model_details=oci.ai_speech.models.TranscriptionModelDetails( |
43 |
| - domain="GENERIC", |
44 |
| - language_code="en", |
45 |
| - transcription_settings=oci.ai_speech.models.TranscriptionSettings( |
46 |
| - diarization=oci.ai_speech.models.Diarization( |
47 |
| - is_diarization_enabled=True, |
48 |
| - number_of_speakers=2))), |
49 |
| - normalization=oci.ai_speech.models.TranscriptionNormalization( |
50 |
| - is_punctuation_enabled=True, |
51 |
| - filters=[ |
52 |
| - oci.ai_speech.models.ProfanityTranscriptionFilter( |
53 |
| - type="PROFANITY", |
54 |
| - mode="TAG")]), |
55 |
| - freeform_tags={}, |
56 |
| - defined_tags={})) |
57 |
| - |
58 |
| -# Get the data from response |
59 |
| -print(create_transcription_job_response.data) |
| 75 | + prefix="transcriptions"), |
| 76 | + display_name=f"Transcription_{args.input_file}", |
| 77 | + description=f"transcription_job_{args.input_file.replace('.', '_')}", |
| 78 | + additional_transcription_formats=["SRT"], |
| 79 | + model_details=oci.ai_speech.models.TranscriptionModelDetails( |
| 80 | + domain="GENERIC", |
| 81 | + language_code="en-US", |
| 82 | + transcription_settings=oci.ai_speech.models.TranscriptionSettings( |
| 83 | + diarization=oci.ai_speech.models.Diarization( |
| 84 | + is_diarization_enabled=True, |
| 85 | + number_of_speakers=2))), |
| 86 | + normalization=oci.ai_speech.models.TranscriptionNormalization( |
| 87 | + is_punctuation_enabled=True, |
| 88 | + filters=[ |
| 89 | + oci.ai_speech.models.ProfanityTranscriptionFilter( |
| 90 | + type="PROFANITY", |
| 91 | + mode="TAG")]), |
| 92 | + freeform_tags={}, |
| 93 | + defined_tags={})) |
| 94 | + |
| 95 | + log_step("Successfully created transcription job") |
| 96 | + log_step("Job details:") |
| 97 | + log_step(f" • Job ID: {create_transcription_job_response.data.id}") |
| 98 | + log_step(f" • Status: {create_transcription_job_response.data.lifecycle_state}") |
| 99 | + log_step(f" • Output will be saved to: {config_yaml['speech']['bucket_name']}/transcriptions/") |
| 100 | + |
| 101 | +except Exception as e: |
| 102 | + log_step(f"Failed to create transcription job: {str(e)}", True) |
| 103 | + sys.exit(1) |
0 commit comments