diff --git a/api/run_api.sh b/api/run_api.sh index 4458e5f..0da50e4 100755 --- a/api/run_api.sh +++ b/api/run_api.sh @@ -6,6 +6,7 @@ export OPENAI_API_KEY="your_api_key" export ASSEMBLYAI_API_KEY="your_api_key" export ELEVENLABS_API_KEY="your_api_key" export REVAI_API_KEY="your_api_key" +export GANAI_API_KEY="your_api_key" MODEL_IDs=( "openai/gpt-4o-transcribe" @@ -16,8 +17,10 @@ MODEL_IDs=( "revai/machine" # please use --use_url=True "revai/fusion" # please use --use_url=True "speechmatics/enhanced" + "ganai/asr-v1" # please use --num_workers=5 ) + num_models=${#MODEL_IDs[@]} for (( i=0; i<${num_models}; i++ )); @@ -27,48 +30,56 @@ do --dataset_path="hf-audio/esb-datasets-test-only-sorted" \ --dataset="ami" \ --split="test" \ + --max_workers=5 \ --model_name ${MODEL_ID} python run_eval.py \ --dataset_path="hf-audio/esb-datasets-test-only-sorted" \ --dataset="earnings22" \ --split="test" \ + --max_workers=5 \ --model_name ${MODEL_ID} python run_eval.py \ --dataset_path="hf-audio/esb-datasets-test-only-sorted" \ --dataset="gigaspeech" \ --split="test" \ + --max_workers=5 \ --model_name ${MODEL_ID} python run_eval.py \ --dataset_path "hf-audio/esb-datasets-test-only-sorted" \ --dataset "librispeech" \ --split "test.clean" \ + --max_workers=5 \ --model_name ${MODEL_ID} python run_eval.py \ --dataset_path "hf-audio/esb-datasets-test-only-sorted" \ --dataset "librispeech" \ --split "test.other" \ + --max_workers=5 \ --model_name ${MODEL_ID} python run_eval.py \ --dataset_path="hf-audio/esb-datasets-test-only-sorted" \ --dataset="spgispeech" \ --split="test" \ + --max_workers=5 \ --model_name ${MODEL_ID} python run_eval.py \ --dataset_path="hf-audio/esb-datasets-test-only-sorted" \ --dataset="tedlium" \ --split="test" \ + --max_workers=5 \ --model_name ${MODEL_ID} python run_eval.py \ --dataset_path="hf-audio/esb-datasets-test-only-sorted" \ --dataset="voxpopuli" \ --split="test" \ + --max_workers=5 \ --model_name ${MODEL_ID} # Evaluate results diff --git a/api/run_eval.py b/api/run_eval.py index 69b588a..238da35 100644 --- a/api/run_eval.py +++ b/api/run_eval.py @@ -173,6 +173,32 @@ def transcribe_with_retry( f"AssemblyAI transcription error: {transcript.error}" ) return transcript.text + + elif model_name.startswith("ganai/"): + url = 'https://os.gan.ai/v1/asr/transcribe' + if use_url: + ganai_api_key = os.getenv("GANAI_API_KEY") + headers = { + 'accept': 'application/json', + 'ganos-api-key': ganai_api_key, + 'Content-Type': 'application/json' + } + + data = {"input_audio_url": sample['row']['audio'][0]['src']} + response = requests.post(url, headers=headers, json=data) + response = response.json() + else: + url = 'https://os.gan.ai/v1/asr/transcribe' + headers = { + 'accept': 'application/json', + 'ganos-api-key': 'Zrcn3NKDmQu8v6WIWG0X1eShZV_Q-adtSALJM8p5' + } + with open(audio_file_path, 'rb') as f: + files = {'input_audio_file': f} + response = requests.post(url, headers=headers, files=files) + response = response.json() + + return response['results']['channels'][0]['transcript'] elif model_name.startswith("openai/"): if use_url: @@ -259,7 +285,7 @@ def transcribe_with_retry( else: raise ValueError( - "Invalid model prefix, must start with 'assembly/', 'openai/', 'elevenlabs/' or 'revai/'" + "Invalid model prefix, must start with 'assembly/', 'openai/', 'elevenlabs/', 'ganai/' or 'revai/'" ) except Exception as e: @@ -416,7 +442,7 @@ def process_sample(sample): parser.add_argument( "--model_name", required=True, - help="Prefix model name with 'assembly/', 'openai/', 'elevenlabs/', 'revai/', or 'speechmatics/'", + help="Prefix model name with 'assembly/', 'openai/', 'elevenlabs/', 'revai/', 'ganai/' or 'speechmatics/'", ) parser.add_argument("--max_samples", type=int, default=None) parser.add_argument(