diff --git a/packages/tasks/src/tasks/automatic-speech-recognition/data.ts b/packages/tasks/src/tasks/automatic-speech-recognition/data.ts index 323c6fab36..b177a7f123 100644 --- a/packages/tasks/src/tasks/automatic-speech-recognition/data.ts +++ b/packages/tasks/src/tasks/automatic-speech-recognition/data.ts @@ -6,12 +6,16 @@ const taskData: TaskDataCustom = { description: "31,175 hours of multilingual audio-text dataset in 108 languages.", id: "mozilla-foundation/common_voice_17_0", }, + { + description: "Multilingual and diverse audio dataset with 101k hours of audio.", + id: "amphion/Emilia-Dataset", + }, { description: "A dataset with 44.6k hours of English speaker data and 6k hours of other language speakers.", id: "parler-tts/mls_eng", }, { - description: "A multi-lingual audio dataset with 370K hours of audio.", + description: "A multilingual audio dataset with 370K hours of audio.", id: "espnet/yodas", }, ], @@ -54,6 +58,10 @@ const taskData: TaskDataCustom = { description: "An end-to-end model that performs ASR and Speech Translation by MetaAI.", id: "facebook/seamless-m4t-v2-large", }, + { + description: "A powerful multilingual ASR and Speech Translation model by Nvidia.", + id: "nvidia/canary-1b", + }, { description: "Powerful speaker diarization model.", id: "pyannote/speaker-diarization-3.1", @@ -65,13 +73,17 @@ const taskData: TaskDataCustom = { id: "hf-audio/whisper-large-v3", }, { - description: "Fastest speech recognition application.", - id: "sanchit-gandhi/whisper-jax", + description: "Latest ASR model from Useful Sensors.", + id: "mrfakename/Moonshinex", }, { description: "A high quality speech and text translation model by Meta.", id: "facebook/seamless_m4t", }, + { + description: "A powerful multilingual ASR and Speech Translation model by Nvidia", + id: "nvidia/canary-1b", + }, ], summary: "Automatic Speech Recognition (ASR), also known as Speech to Text (STT), is the task of transcribing a given audio to text. It has many applications, such as voice user interfaces.",