feat: enhance OCI subtitle translation with comprehensive documentation and JSON support

jasperan · jasperan · commit f8b825011240 · 2025-08-12T00:20:35.000+02:00
- Add comprehensive README with setup instructions and language support tables
- Create config_example.yaml for easy configuration setup
- Add translate_json.py for JSON subtitle file translation support
- Enhance generate_srt_from_audio.py with improved logging and error handling
- Update translate_srt.py with async document translation and better job monitoring
- Add requirements.txt with OCI SDK and PyYAML dependencies
- Include detailed language code mappings for both Speech and Translation services
- Improve error handling and logging throughout all scripts
- Add support for 30+ target languages with proper language codes
Updated as well with richard palissery's latest commits
diff --git a/oci-subtitle-translation/README.md b/oci-subtitle-translation/README.md
@@ -70,6 +70,9 @@ This automated approach significantly reduces the time and effort required to cr
 
 ## 2. Usage
 
+> Before running the script, make sure your input `.mp3` file has already been uploaded to the OCI Object Storage **input bucket** defined in your `config.yaml`.  
+> The script does **not** accept local files it looks for the file in the cloud bucket only.
+
 This solution works in two steps:
 
 1. First, we generate SRT from audio:
@@ -155,4 +158,4 @@ Licensed under the Universal Permissive License (UPL), Version 1.0.
 
 See [LICENSE](../LICENSE) for more details.
 
-ORACLE AND ITS AFFILIATES DO NOT PROVIDE ANY WARRANTY WHATSOEVER, EXPRESS OR IMPLIED, FOR ANY SOFTWARE, MATERIAL OR CONTENT OF ANY KIND CONTAINED OR PRODUCED WITHIN THIS REPOSITORY, AND IN PARTICULAR SPECIFICALLY DISCLAIM ANY AND ALL IMPLIED WARRANTIES OF TITLE, NON-INFRINGEMENT, MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. FURTHERMORE, ORACLE AND ITS AFFILIATES DO NOT REPRESENT THAT ANY CUSTOMARY SECURITY REVIEW HAS BEEN PERFORMED WITH RESPECT TO ANY SOFTWARE, MATERIAL OR CONTENT CONTAINED OR PRODUCED WITHIN THIS REPOSITORY. IN ADDITION, AND WITHOUT LIMITING THE FOREGOING, THIRD PARTIES MAY HAVE POSTED SOFTWARE, MATERIAL OR CONTENT TO THIS REPOSITORY WITHOUT ANY REVIEW. USE AT YOUR OWN RISK. 
+ORACLE AND ITS AFFILIATES DO NOT PROVIDE ANY WARRANTY WHATSOEVER, EXPRESS OR IMPLIED, FOR ANY SOFTWARE, MATERIAL OR CONTENT OF ANY KIND CONTAINED OR PRODUCED WITHIN THIS REPOSITORY, AND IN PARTICULAR SPECIFICALLY DISCLAIM ANY AND ALL IMPLIED WARRANTIES OF TITLE, NON-INFRINGEMENT, MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE. FURTHERMORE, ORACLE AND ITS AFFILIATES DO NOT REPRESENT THAT ANY CUSTOMARY SECURITY REVIEW HAS BEEN PERFORMED WITH RESPECT TO ANY SOFTWARE, MATERIAL OR CONTENT CONTAINED OR PRODUCED WITHIN THIS REPOSITORY. IN ADDITION, AND WITHOUT LIMITING THE FOREGOING, THIRD PARTIES MAY HAVE POSTED SOFTWARE, MATERIAL OR CONTENT TO THIS REPOSITORY WITHOUT ANY REVIEW. USE AT YOUR OWN RISK. 
diff --git a/oci-subtitle-translation/config_example.yaml b/oci-subtitle-translation/config_example.yaml
@@ -1,9 +1,11 @@
 # Speech Service Configuration
+profile: "your-profile"
+
 speech:
   compartment_id: "ocid1.compartment.oc1..your-compartment-id"
   bucket_name: "your-bucket-name"
   namespace: "your-namespace"
 
 # Language Translation Configuration
 language:
-  compartment_id: "ocid1.compartment.oc1..your-compartment-id" 
+  compartment_id: "ocid1.compartment.oc1..your-compartment-id"
diff --git a/oci-subtitle-translation/generate_srt_from_audio.py b/oci-subtitle-translation/generate_srt_from_audio.py
@@ -4,6 +4,9 @@
 import yaml
 import argparse
 import sys
+import time
+import os
+import json
 from datetime import datetime
 
 def log_step(message, is_error=False):
@@ -12,29 +15,45 @@ def log_step(message, is_error=False):
     prefix = "ERROR" if is_error else "INFO"
     print(f"[{timestamp}] {prefix}: {message}")
 
+def wait_for_job_completion(ai_speech_client, job_id, check_interval=15):
+    """Wait for the transcription job to complete and return the output file name"""
+    while True:
+        try:
+            job_response = ai_speech_client.get_transcription_job(job_id)
+            status = job_response.data.lifecycle_state
+            
+            if status == "SUCCEEDED":
+                log_step("Transcription job completed successfully")
+                # Get the output file name from the job details
+                input_file = job_response.data.input_location.object_locations[0].object_names[0]
+                input_file_name = input_file.split("/")[-1]  # Get the filename after last slash
+                output_prefix = job_response.data.output_location.prefix
+                # Extract just the job ID part (before the first slash)
+                job_id_part = job_id.split("/")[0]
+                output_file = f"{output_prefix}/{job_id_part}/{input_file_name}.srt"
+                return output_file
+            
+            elif status == "FAILED":
+                log_step("Transcription job failed", True)
+                sys.exit(1)
+            elif status in ["CANCELED", "DELETED"]:
+                log_step(f"Transcription job was {status.lower()}", True)
+                sys.exit(1)
+            else:
+                log_step(f"Job status: {status}. Waiting {check_interval} seconds...")
+                time.sleep(check_interval)
+                
+        except Exception as e:
+            log_step(f"Error checking job status: {str(e)}", True)
+            sys.exit(1)
+
 # Parse command line arguments
 parser = argparse.ArgumentParser(description='Generate SRT file from audio using OCI Speech service')
 parser.add_argument('--input-file', required=True, help='Input audio file name in the configured bucket')
 args = parser.parse_args()
 
 log_step(f"Starting transcription process for file: {args.input_file}")
 
-# Create a default config using DEFAULT profile in default location
-try:
-    config = oci.config.from_file()
-    log_step("Successfully loaded OCI configuration")
-except Exception as e:
-    log_step(f"Failed to load OCI configuration: {str(e)}", True)
-    sys.exit(1)
-
-# Initialize service client with default config file
-try:
-    ai_speech_client = oci.ai_speech.AIServiceSpeechClient(config)
-    log_step("Successfully initialized AI Speech client")
-except Exception as e:
-    log_step(f"Failed to initialize AI Speech client: {str(e)}", True)
-    sys.exit(1)
-
 # Load config from yaml file
 def load_config():
     """Load configuration from config.yaml"""
@@ -51,6 +70,22 @@ def load_config():
 
 config_yaml = load_config()
 
+# Load config based on the profile specificied in the YAML file
+try:
+    config = oci.config.from_file(profile_name=config_yaml.get("profile", "DEFAULT"))
+    log_step("Successfully loaded OCI configuration")
+except Exception as e:
+    log_step(f"Failed to load OCI configuration: {str(e)}", True)
+    sys.exit(1)
+
+# Initialize service client with default config file
+try:
+    ai_speech_client = oci.ai_speech.AIServiceSpeechClient(config)
+    log_step("Successfully initialized AI Speech client")
+except Exception as e:
+    log_step(f"Failed to initialize AI Speech client: {str(e)}", True)
+    sys.exit(1)
+
 # Send the request to service
 log_step("Creating transcription job with following settings:")
 log_step(f"  • Input file: {args.input_file}")
@@ -59,22 +94,22 @@ def load_config():
 log_step(f"  • Diarization: Enabled (2 speakers)")
 log_step(f"  • Profanity filter: Enabled (TAG mode)")
 
+file_name = args.input_file.split("/")[-1]
+
 try:
     create_transcription_job_response = ai_speech_client.create_transcription_job(
         create_transcription_job_details=oci.ai_speech.models.CreateTranscriptionJobDetails(
             compartment_id=config_yaml['speech']['compartment_id'],
-            input_location=oci.ai_speech.models.ObjectListFileInputLocation(
-                location_type="OBJECT_LIST_FILE_INPUT_LOCATION", 
-                object_location=oci.ai_speech.models.ObjectLocation(
+            input_location=oci.ai_speech.models.ObjectListInlineInputLocation(
+                location_type="OBJECT_LIST_INLINE_INPUT_LOCATION", 
+                object_locations=[oci.ai_speech.models.ObjectLocation(
                     namespace_name=config_yaml['speech']['namespace'],
                     bucket_name=config_yaml['speech']['bucket_name'],
-                    object_names=[args.input_file])),  # Fixed: Use actual input file name
+                    object_names=[args.input_file])]),
             output_location=oci.ai_speech.models.OutputLocation(
                 namespace_name=config_yaml['speech']['namespace'],
                 bucket_name=config_yaml['speech']['bucket_name'],
-                prefix="transcriptions"),
-            display_name=f"Transcription_{args.input_file}",
-            description=f"transcription_job_{args.input_file.replace('.', '_')}",
+                prefix=f"transcriptions/{file_name}"),
             additional_transcription_formats=["SRT"],
             model_details=oci.ai_speech.models.TranscriptionModelDetails(
                 domain="GENERIC",
@@ -95,9 +130,14 @@ def load_config():
     log_step("Successfully created transcription job")
     log_step("Job details:")
     log_step(f"  • Job ID: {create_transcription_job_response.data.id}")
+    log_step(f"  • Output location: {create_transcription_job_response.data.output_location}")
     log_step(f"  • Status: {create_transcription_job_response.data.lifecycle_state}")
-    log_step(f"  • Output will be saved to: {config_yaml['speech']['bucket_name']}/transcriptions/")
+    log_step(f"  • Output will be saved to: {create_transcription_job_response.data.output_location.prefix}{config_yaml['speech']['namespace']}_{config_yaml['speech']['bucket_name']}_{file_name}.srt")
+    
+    # Wait for job completion and get output file name
+    output_file = wait_for_job_completion(ai_speech_client, create_transcription_job_response.data.id)
+    log_step(f"Generated SRT file: {output_file}")
     
 except Exception as e:
     log_step(f"Failed to create transcription job: {str(e)}", True)
-    sys.exit(1)
+    sys.exit(1)
diff --git a/oci-subtitle-translation/translate_json.py b/oci-subtitle-translation/translate_json.py
@@ -0,0 +1,120 @@
+import oci
+import yaml
+import argparse
+import sys
+import json
+import os
+from datetime import datetime
+
+def log_step(message, is_error=False):
+    """Print a formatted log message with timestamp"""
+    timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+    prefix = "ERROR" if is_error else "INFO"
+    print(f"[{timestamp}] {prefix}: {message}")
+
+# Parse command line arguments
+parser = argparse.ArgumentParser(description='Translate a JSON subtitle file using OCI AI Translation')
+parser.add_argument('--input-file', required=True, help='Input JSON file in the configured bucket')
+parser.add_argument('--target-language', required=True, help='Target language code (e.g., fr, es, de)')
+args = parser.parse_args()
+
+# Generate output filename
+input_filename = os.path.splitext(args.input_file)[0]  # Remove extension
+output_file = f"{input_filename}_{args.target_language}.json"
+
+log_step(f"Starting translation of {args.input_file} to {args.target_language}")
+
+# Create a default config using DEFAULT profile in default location
+try:
+    config = oci.config.from_file(profile_name="DEVRELCOMM")
+    log_step("Successfully loaded OCI configuration")
+except Exception as e:
+    log_step(f"Failed to load OCI configuration: {str(e)}", True)
+    sys.exit(1)
+
+# Initialize service client with default config file
+try:
+    ai_language_client = oci.ai_language.AIServiceLanguageClient(config)
+    log_step("Successfully initialized AI Translation client")
+except Exception as e:
+    log_step(f"Failed to initialize AI Translation client: {str(e)}", True)
+    sys.exit(1)
+
+# Load config from yaml file
+def load_config():
+    """Load configuration from config.yaml"""
+    try:
+        with open('config.yaml', 'r') as f:
+            config = yaml.safe_load(f)
+            log_step("Successfully loaded config.yaml")
+            log_step(f"Using bucket: {config['speech']['bucket_name']}")
+            log_step(f"Using namespace: {config['speech']['namespace']}")
+            return config
+    except Exception as e:
+        log_step(f"Failed to load config.yaml: {str(e)}", True)
+        sys.exit(1)
+
+config_yaml = load_config()
+object_storage_client = oci.object_storage.ObjectStorageClient(config)
+
+# Reads the JSON file
+try:
+    namespace = config_yaml['speech']['namespace']
+    bucket_name = config_yaml['speech']['bucket_name']
+    object_name = args.input_file
+
+    get_object_response = object_storage_client.get_object(namespace, bucket_name, object_name)
+    json_data = json.loads(get_object_response.data.text)  # Read and parse JSON data
+    log_step(f"Loaded JSON file from OCI with {len(json_data.get('transcriptions', []))} transcriptions.")
+    
+
+    log_step(f"Loaded {len(json_data)} subtitles from {args.input_file}")
+except Exception as e:
+    log_step(f"Failed to read JSON file from OCI Object Storage: {str(e)}", True)
+    sys.exit(1)
+    
+translated_data = []
+for item in json_data["transcriptions"]:
+    if "transcription" in item:
+        try:
+            document = oci.ai_language.models.Document(
+                language="en",  
+                text=item["transcription"]
+            )
+
+            request_details = oci.ai_language.models.BatchTranslateTextDetails(
+                documents=[document], 
+                target_language=args.target_language
+            )
+
+            response = ai_language_client.batch_translate_text(request_details)
+            translated_text = response.data[0].translated_text  
+
+        except Exception as e:
+            print(f"Error during translation: {str(e)}", file=sys.stderr)
+            translated_text = item['transcription']  
+
+        # Update item with translated text
+        translated_item = item.copy()
+        translated_item['transcription'] = translated_text
+        translated_data.append(translated_item)
+    else:
+        print(f"Skipping invalid item: {item}")
+
+log_step(f"Translation completed successfully with {len(translated_data)} items translated")
+
+translated_json = json.dumps(translated_data, ensure_ascii=False, indent=4)
+
+try:
+    # Convert translated data back to JSON format
+    translated_json = json.dumps(translated_data, ensure_ascii=False, indent=4)
+
+    # Use a temporary file to upload
+    with open(output_file, 'w', encoding='utf-8') as f:
+        f.write(translated_json)
+        
+    object_storage_client.put_object(namespace, bucket_name, output_file, translated_json.encode('utf-8'))
+    log_step(f"Translated JSON uploaded to OCI Object Storage as {output_file}")
+except Exception as e:
+    log_step(f"Failed to upload translated JSON to OCI: {str(e)}", True)
+    sys.exit(1)