diff --git a/mteb/abstasks/audio/abs_task_adio_reranking.py b/mteb/abstasks/audio/abs_task_audio_reranking.py similarity index 100% rename from mteb/abstasks/audio/abs_task_adio_reranking.py rename to mteb/abstasks/audio/abs_task_audio_reranking.py diff --git a/mteb/tasks/audio/any_2_any_retrieval/__init__.py b/mteb/tasks/audio/any_2_any_retrieval/__init__.py index c9e776beef..15eee8dbb8 100644 --- a/mteb/tasks/audio/any_2_any_retrieval/__init__.py +++ b/mteb/tasks/audio/any_2_any_retrieval/__init__.py @@ -40,6 +40,7 @@ "FleursT2ARetrieval", "GigaSpeechA2TRetrieval", "GigaSpeechT2ARetrieval", + "GoogleSVQA2TRetrieval", "HiFiTTSA2TRetrieval", "HiFiTTST2ARetrieval", "JLCorpusA2TRetrieval", diff --git a/mteb/tasks/audio/any_2_any_retrieval/multilingual/__init__.py b/mteb/tasks/audio/any_2_any_retrieval/multilingual/__init__.py index efc69ea4b9..1f243850ae 100644 --- a/mteb/tasks/audio/any_2_any_retrieval/multilingual/__init__.py +++ b/mteb/tasks/audio/any_2_any_retrieval/multilingual/__init__.py @@ -1,6 +1,8 @@ +from .google_svq import GoogleSVQA2TRetrieval from .jam_alt import JamAltArtist, JamAltLyricsA2T, JamAltLyricsT2A __all__ = [ + "GoogleSVQA2TRetrieval", "JamAltArtist", "JamAltLyricsA2T", "JamAltLyricsT2A", diff --git a/mteb/tasks/audio/any_2_any_retrieval/multilingual/google_svq.py b/mteb/tasks/audio/any_2_any_retrieval/multilingual/google_svq.py new file mode 100644 index 0000000000..ac686bbdd0 --- /dev/null +++ b/mteb/tasks/audio/any_2_any_retrieval/multilingual/google_svq.py @@ -0,0 +1,58 @@ +from mteb.abstasks.image.abs_task_any2any_retrieval import AbsTaskAny2AnyRetrieval +from mteb.abstasks.task_metadata import TaskMetadata + +# Google SVQ supports 17 languages +_EVAL_LANGS = { + "ar_eg": ["arz"], + "ar_x_gulf": ["acm"], + "ar_x_levant": ["apc"], + "ar_x_maghrebi": ["arq"], + "bn_bd": ["ben"], + "bn_in": ["ben"], + "en_au": ["eng"], + "en_gb": ["eng"], + "en_in": ["eng"], + "en_ph": ["eng"], + "en_us": ["eng"], + "fi_fi": ["fin"], + "id_id": ["ind"], + "ko_kr": ["kor"], + "ru_ru": ["rus"], + "sw": ["swa"], + "te_in": ["tel"], +} + + +class GoogleSVQA2TRetrieval(AbsTaskAny2AnyRetrieval): + metadata = TaskMetadata( + name="GoogleSVQA2TRetrieval", + description="Natural language transcription for short voice questions", + reference="https://github.com/nateraw/download-musiccaps-dataset", + dataset={ + "path": "google/svq", + "revision": "177e4fa88e59148dc746471e164b0b46b193f41f", + }, + type="Any2AnyRetrieval", + category="a2t", + modalities=["audio", "text"], + eval_splits=["test"], + eval_langs=_EVAL_LANGS, + main_score="cv_recall_at_5", + date=("2025-01-01", "2025-12-31"), + domains=["Spoken"], + task_subtypes=["Speech Transcription Retrieval"], + license="cc-by-4.0", + annotations_creators="human-annotated", + dialect=[], + sample_creation="human-translated", + bibtex_citation=r""" + @inproceedings{ + heigold2025massive, + title={Massive Sound Embedding Benchmark ({MSEB})}, + author={Georg Heigold and Ehsan Variani and Tom Bagby and Cyril Allauzen and Ji Ma and Shankar Kumar and Michael Riley}, + booktitle={The Thirty-ninth Annual Conference on Neural Information Processing Systems Datasets and Benchmarks Track}, + year={2025}, + url={https://openreview.net/forum?id=X0juYgFVng} + } + """, + ) diff --git a/mteb/tasks/audio/audio_reranking/eng/audio_caps_mini_reranking.py b/mteb/tasks/audio/audio_reranking/eng/audio_caps_mini_reranking.py index f9803471b7..dd3ffb8023 100644 --- a/mteb/tasks/audio/audio_reranking/eng/audio_caps_mini_reranking.py +++ b/mteb/tasks/audio/audio_reranking/eng/audio_caps_mini_reranking.py @@ -1,4 +1,4 @@ -from mteb.abstasks.audio.abs_task_adio_reranking import AbsTaskAudioReranking +from mteb.abstasks.audio.abs_task_audio_reranking import AbsTaskAudioReranking from mteb.abstasks.task_metadata import TaskMetadata diff --git a/mteb/tasks/audio/audio_reranking/eng/esc50_audio_reranking.py b/mteb/tasks/audio/audio_reranking/eng/esc50_audio_reranking.py index 3738c90a9e..e68c06883f 100644 --- a/mteb/tasks/audio/audio_reranking/eng/esc50_audio_reranking.py +++ b/mteb/tasks/audio/audio_reranking/eng/esc50_audio_reranking.py @@ -1,4 +1,4 @@ -from mteb.abstasks.audio.abs_task_adio_reranking import AbsTaskAudioReranking +from mteb.abstasks.audio.abs_task_audio_reranking import AbsTaskAudioReranking from mteb.abstasks.task_metadata import TaskMetadata diff --git a/mteb/tasks/audio/audio_reranking/eng/fs_dnoisy18k_audio_reranking.py b/mteb/tasks/audio/audio_reranking/eng/fs_dnoisy18k_audio_reranking.py index 8ba5dcc1ff..aef3eb3d1c 100644 --- a/mteb/tasks/audio/audio_reranking/eng/fs_dnoisy18k_audio_reranking.py +++ b/mteb/tasks/audio/audio_reranking/eng/fs_dnoisy18k_audio_reranking.py @@ -1,4 +1,4 @@ -from mteb.abstasks.audio.abs_task_adio_reranking import AbsTaskAudioReranking +from mteb.abstasks.audio.abs_task_audio_reranking import AbsTaskAudioReranking from mteb.abstasks.task_metadata import TaskMetadata diff --git a/mteb/tasks/audio/audio_reranking/eng/gtzan_audio_reranking.py b/mteb/tasks/audio/audio_reranking/eng/gtzan_audio_reranking.py index b09ad64b71..f5076d6fc8 100644 --- a/mteb/tasks/audio/audio_reranking/eng/gtzan_audio_reranking.py +++ b/mteb/tasks/audio/audio_reranking/eng/gtzan_audio_reranking.py @@ -1,4 +1,4 @@ -from mteb.abstasks.audio.abs_task_adio_reranking import AbsTaskAudioReranking +from mteb.abstasks.audio.abs_task_audio_reranking import AbsTaskAudioReranking from mteb.abstasks.task_metadata import TaskMetadata diff --git a/mteb/tasks/audio/audio_reranking/eng/urban_sound8_k_audio_reranking.py b/mteb/tasks/audio/audio_reranking/eng/urban_sound8_k_audio_reranking.py index e4553a7ca4..4694ee5ab3 100644 --- a/mteb/tasks/audio/audio_reranking/eng/urban_sound8_k_audio_reranking.py +++ b/mteb/tasks/audio/audio_reranking/eng/urban_sound8_k_audio_reranking.py @@ -1,4 +1,4 @@ -from mteb.abstasks.audio.abs_task_adio_reranking import AbsTaskAudioReranking +from mteb.abstasks.audio.abs_task_audio_reranking import AbsTaskAudioReranking from mteb.abstasks.task_metadata import TaskMetadata diff --git a/mteb/tasks/audio/audio_reranking/eng/vocal_sound_audio_reranking.py b/mteb/tasks/audio/audio_reranking/eng/vocal_sound_audio_reranking.py index 9cdff278c5..a0531940d0 100644 --- a/mteb/tasks/audio/audio_reranking/eng/vocal_sound_audio_reranking.py +++ b/mteb/tasks/audio/audio_reranking/eng/vocal_sound_audio_reranking.py @@ -1,4 +1,4 @@ -from mteb.abstasks.audio.abs_task_adio_reranking import AbsTaskAudioReranking +from mteb.abstasks.audio.abs_task_audio_reranking import AbsTaskAudioReranking from mteb.abstasks.task_metadata import TaskMetadata