Skip to content

Commit 2777786

Browse files
committed
Adding support for ganai ASR
1 parent 292901a commit 2777786

File tree

2 files changed

+39
-2
lines changed

2 files changed

+39
-2
lines changed

api/run_api.sh

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ export OPENAI_API_KEY="your_api_key"
66
export ASSEMBLYAI_API_KEY="your_api_key"
77
export ELEVENLABS_API_KEY="your_api_key"
88
export REVAI_API_KEY="your_api_key"
9+
export GANAI_API_KEY="your_api_key"
910

1011
MODEL_IDs=(
1112
"openai/gpt-4o-transcribe"
@@ -16,8 +17,10 @@ MODEL_IDs=(
1617
"revai/machine" # please use --use_url=True
1718
"revai/fusion" # please use --use_url=True
1819
"speechmatics/enhanced"
20+
"ganai/asr-v1" # please use --num_workers=5
1921
)
2022

23+
2124
num_models=${#MODEL_IDs[@]}
2225

2326
for (( i=0; i<${num_models}; i++ ));
@@ -27,48 +30,56 @@ do
2730
--dataset_path="hf-audio/esb-datasets-test-only-sorted" \
2831
--dataset="ami" \
2932
--split="test" \
33+
--max_workers=5 \
3034
--model_name ${MODEL_ID}
3135

3236
python run_eval.py \
3337
--dataset_path="hf-audio/esb-datasets-test-only-sorted" \
3438
--dataset="earnings22" \
3539
--split="test" \
40+
--max_workers=5 \
3641
--model_name ${MODEL_ID}
3742

3843
python run_eval.py \
3944
--dataset_path="hf-audio/esb-datasets-test-only-sorted" \
4045
--dataset="gigaspeech" \
4146
--split="test" \
47+
--max_workers=5 \
4248
--model_name ${MODEL_ID}
4349

4450
python run_eval.py \
4551
--dataset_path "hf-audio/esb-datasets-test-only-sorted" \
4652
--dataset "librispeech" \
4753
--split "test.clean" \
54+
--max_workers=5 \
4855
--model_name ${MODEL_ID}
4956

5057
python run_eval.py \
5158
--dataset_path "hf-audio/esb-datasets-test-only-sorted" \
5259
--dataset "librispeech" \
5360
--split "test.other" \
61+
--max_workers=5 \
5462
--model_name ${MODEL_ID}
5563

5664
python run_eval.py \
5765
--dataset_path="hf-audio/esb-datasets-test-only-sorted" \
5866
--dataset="spgispeech" \
5967
--split="test" \
68+
--max_workers=5 \
6069
--model_name ${MODEL_ID}
6170

6271
python run_eval.py \
6372
--dataset_path="hf-audio/esb-datasets-test-only-sorted" \
6473
--dataset="tedlium" \
6574
--split="test" \
75+
--max_workers=5 \
6676
--model_name ${MODEL_ID}
6777

6878
python run_eval.py \
6979
--dataset_path="hf-audio/esb-datasets-test-only-sorted" \
7080
--dataset="voxpopuli" \
7181
--split="test" \
82+
--max_workers=5 \
7283
--model_name ${MODEL_ID}
7384

7485
# Evaluate results

api/run_eval.py

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,32 @@ def transcribe_with_retry(
173173
f"AssemblyAI transcription error: {transcript.error}"
174174
)
175175
return transcript.text
176+
177+
elif model_name.startswith("ganai/"):
178+
url = 'https://os.gan.ai/v1/asr/transcribe'
179+
if use_url:
180+
ganai_api_key = os.getenv("GANAI_API_KEY")
181+
headers = {
182+
'accept': 'application/json',
183+
'ganos-api-key': ganai_api_key,
184+
'Content-Type': 'application/json'
185+
}
186+
187+
data = {"input_audio_url": sample['row']['audio'][0]['src']}
188+
response = requests.post(url, headers=headers, json=data)
189+
response = response.json()
190+
else:
191+
url = 'https://os.gan.ai/v1/asr/transcribe'
192+
headers = {
193+
'accept': 'application/json',
194+
'ganos-api-key': 'Zrcn3NKDmQu8v6WIWG0X1eShZV_Q-adtSALJM8p5'
195+
}
196+
with open(audio_file_path, 'rb') as f:
197+
files = {'input_audio_file': f}
198+
response = requests.post(url, headers=headers, files=files)
199+
response = response.json()
200+
201+
return response['results']['channels'][0]['transcript']
176202

177203
elif model_name.startswith("openai/"):
178204
if use_url:
@@ -259,7 +285,7 @@ def transcribe_with_retry(
259285

260286
else:
261287
raise ValueError(
262-
"Invalid model prefix, must start with 'assembly/', 'openai/', 'elevenlabs/' or 'revai/'"
288+
"Invalid model prefix, must start with 'assembly/', 'openai/', 'elevenlabs/', 'ganai/' or 'revai/'"
263289
)
264290

265291
except Exception as e:
@@ -416,7 +442,7 @@ def process_sample(sample):
416442
parser.add_argument(
417443
"--model_name",
418444
required=True,
419-
help="Prefix model name with 'assembly/', 'openai/', 'elevenlabs/', 'revai/', or 'speechmatics/'",
445+
help="Prefix model name with 'assembly/', 'openai/', 'elevenlabs/', 'revai/', 'ganai/' or 'speechmatics/'",
420446
)
421447
parser.add_argument("--max_samples", type=int, default=None)
422448
parser.add_argument(

0 commit comments

Comments
 (0)