@@ -49,8 +49,7 @@ async def transcribe_audio(client, tokenizer, y, sr):
49
49
return latency , num_output_tokens , transcription .text
50
50
51
51
52
- async def bound_transcribe (model_name , sem , client , audio , reference ):
53
- tokenizer = AutoTokenizer .from_pretrained (model_name )
52
+ async def bound_transcribe (sem , client , tokenizer , audio , reference ):
54
53
# Use semaphore to limit concurrent requests.
55
54
async with sem :
56
55
result = await transcribe_audio (client , tokenizer , * audio )
@@ -63,15 +62,19 @@ async def bound_transcribe(model_name, sem, client, audio, reference):
63
62
async def process_dataset (model , client , data , concurrent_request ):
64
63
sem = asyncio .Semaphore (concurrent_request )
65
64
65
+ # Load tokenizer once outside the loop
66
+ tokenizer = AutoTokenizer .from_pretrained (model )
67
+
66
68
# Warmup call as the first `librosa.load` server-side is quite slow.
67
69
audio , sr = data [0 ]["audio" ]["array" ], data [0 ]["audio" ]["sampling_rate" ]
68
- _ = await bound_transcribe (model , sem , client , (audio , sr ), "" )
70
+ _ = await bound_transcribe (sem , client , tokenizer , (audio , sr ), "" )
69
71
70
72
tasks : list [asyncio .Task ] = []
71
73
for sample in data :
72
74
audio , sr = sample ["audio" ]["array" ], sample ["audio" ]["sampling_rate" ]
73
75
task = asyncio .create_task (
74
- bound_transcribe (model , sem , client , (audio , sr ), sample ["text" ]))
76
+ bound_transcribe (sem , client , tokenizer , (audio , sr ),
77
+ sample ["text" ]))
75
78
tasks .append (task )
76
79
return await asyncio .gather (* tasks )
77
80
0 commit comments