Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions api/run_api.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ export OPENAI_API_KEY="your_api_key"
export ASSEMBLYAI_API_KEY="your_api_key"
export ELEVENLABS_API_KEY="your_api_key"
export REVAI_API_KEY="your_api_key"
export GANAI_API_KEY="your_api_key"

MODEL_IDs=(
"openai/gpt-4o-transcribe"
Expand All @@ -16,8 +17,10 @@ MODEL_IDs=(
"revai/machine" # please use --use_url=True
"revai/fusion" # please use --use_url=True
"speechmatics/enhanced"
"ganai/asr-v1" # please use --num_workers=5
)


num_models=${#MODEL_IDs[@]}

for (( i=0; i<${num_models}; i++ ));
Expand All @@ -27,48 +30,56 @@ do
--dataset_path="hf-audio/esb-datasets-test-only-sorted" \
--dataset="ami" \
--split="test" \
--max_workers=5 \
--model_name ${MODEL_ID}

python run_eval.py \
--dataset_path="hf-audio/esb-datasets-test-only-sorted" \
--dataset="earnings22" \
--split="test" \
--max_workers=5 \
--model_name ${MODEL_ID}

python run_eval.py \
--dataset_path="hf-audio/esb-datasets-test-only-sorted" \
--dataset="gigaspeech" \
--split="test" \
--max_workers=5 \
--model_name ${MODEL_ID}

python run_eval.py \
--dataset_path "hf-audio/esb-datasets-test-only-sorted" \
--dataset "librispeech" \
--split "test.clean" \
--max_workers=5 \
--model_name ${MODEL_ID}

python run_eval.py \
--dataset_path "hf-audio/esb-datasets-test-only-sorted" \
--dataset "librispeech" \
--split "test.other" \
--max_workers=5 \
--model_name ${MODEL_ID}

python run_eval.py \
--dataset_path="hf-audio/esb-datasets-test-only-sorted" \
--dataset="spgispeech" \
--split="test" \
--max_workers=5 \
--model_name ${MODEL_ID}

python run_eval.py \
--dataset_path="hf-audio/esb-datasets-test-only-sorted" \
--dataset="tedlium" \
--split="test" \
--max_workers=5 \
--model_name ${MODEL_ID}

python run_eval.py \
--dataset_path="hf-audio/esb-datasets-test-only-sorted" \
--dataset="voxpopuli" \
--split="test" \
--max_workers=5 \
--model_name ${MODEL_ID}

# Evaluate results
Expand Down
30 changes: 28 additions & 2 deletions api/run_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,32 @@ def transcribe_with_retry(
f"AssemblyAI transcription error: {transcript.error}"
)
return transcript.text

elif model_name.startswith("ganai/"):
url = 'https://os.gan.ai/v1/asr/transcribe'
if use_url:
ganai_api_key = os.getenv("GANAI_API_KEY")
headers = {
'accept': 'application/json',
'ganos-api-key': ganai_api_key,
'Content-Type': 'application/json'
}

data = {"input_audio_url": sample['row']['audio'][0]['src']}
response = requests.post(url, headers=headers, json=data)
response = response.json()
else:
url = 'https://os.gan.ai/v1/asr/transcribe'
headers = {
'accept': 'application/json',
'ganos-api-key': 'Zrcn3NKDmQu8v6WIWG0X1eShZV_Q-adtSALJM8p5'
}
with open(audio_file_path, 'rb') as f:
files = {'input_audio_file': f}
response = requests.post(url, headers=headers, files=files)
response = response.json()

return response['results']['channels'][0]['transcript']

elif model_name.startswith("openai/"):
if use_url:
Expand Down Expand Up @@ -259,7 +285,7 @@ def transcribe_with_retry(

else:
raise ValueError(
"Invalid model prefix, must start with 'assembly/', 'openai/', 'elevenlabs/' or 'revai/'"
"Invalid model prefix, must start with 'assembly/', 'openai/', 'elevenlabs/', 'ganai/' or 'revai/'"
)

except Exception as e:
Expand Down Expand Up @@ -416,7 +442,7 @@ def process_sample(sample):
parser.add_argument(
"--model_name",
required=True,
help="Prefix model name with 'assembly/', 'openai/', 'elevenlabs/', 'revai/', or 'speechmatics/'",
help="Prefix model name with 'assembly/', 'openai/', 'elevenlabs/', 'revai/', 'ganai/' or 'speechmatics/'",
)
parser.add_argument("--max_samples", type=int, default=None)
parser.add_argument(
Expand Down