From 1e0e4b9c11867f88b2c3476ec41507bdb9a253cf Mon Sep 17 00:00:00 2001 From: silky1708 Date: Thu, 13 Mar 2025 13:46:14 -0700 Subject: [PATCH 1/7] add task subtype --- mteb/abstasks/TaskMetadata.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mteb/abstasks/TaskMetadata.py b/mteb/abstasks/TaskMetadata.py index 95f7d3317f..6951b347fa 100644 --- a/mteb/abstasks/TaskMetadata.py +++ b/mteb/abstasks/TaskMetadata.py @@ -59,6 +59,7 @@ "Gunshot Audio Classification", "Instrument Source Classification", "Music Genre Classification", + "Music Genre Classification", "Music Instrument Recognition", "Spoken Language Identification", "Stroke Classification of Musical Instrument", @@ -494,4 +495,4 @@ def __hash__(self) -> int: @property def revision(self) -> str: - return self.dataset["revision"] + return self.dataset["revision"] \ No newline at end of file From 58b477c61951df17b10a1456dbafe422058519d8 Mon Sep 17 00:00:00 2001 From: silky1708 Date: Thu, 13 Mar 2025 16:09:17 -0700 Subject: [PATCH 2/7] add voxlingua107-top10 dataset --- mteb/abstasks/TaskMetadata.py | 1 - 1 file changed, 1 deletion(-) diff --git a/mteb/abstasks/TaskMetadata.py b/mteb/abstasks/TaskMetadata.py index 6951b347fa..610aa7f80d 100644 --- a/mteb/abstasks/TaskMetadata.py +++ b/mteb/abstasks/TaskMetadata.py @@ -59,7 +59,6 @@ "Gunshot Audio Classification", "Instrument Source Classification", "Music Genre Classification", - "Music Genre Classification", "Music Instrument Recognition", "Spoken Language Identification", "Stroke Classification of Musical Instrument", From a546ae0d03448adfda97fca0a837c876f3418d55 Mon Sep 17 00:00:00 2001 From: silky1708 Date: Fri, 14 Mar 2025 23:58:39 -0700 Subject: [PATCH 3/7] updates --- mteb/abstasks/TaskMetadata.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mteb/abstasks/TaskMetadata.py b/mteb/abstasks/TaskMetadata.py index 610aa7f80d..95f7d3317f 100644 --- a/mteb/abstasks/TaskMetadata.py +++ b/mteb/abstasks/TaskMetadata.py @@ -494,4 +494,4 @@ def __hash__(self) -> int: @property def revision(self) -> str: - return self.dataset["revision"] \ No newline at end of file + return self.dataset["revision"] From db70816e46da6e0bc5f043c1005ada53c9e2be21 Mon Sep 17 00:00:00 2001 From: silky1708 Date: Wed, 5 Nov 2025 16:59:47 -0800 Subject: [PATCH 4/7] rename audio reranking --- .../{abs_task_adio_reranking.py => abs_task_audio_reranking.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename mteb/abstasks/audio/{abs_task_adio_reranking.py => abs_task_audio_reranking.py} (100%) diff --git a/mteb/abstasks/audio/abs_task_adio_reranking.py b/mteb/abstasks/audio/abs_task_audio_reranking.py similarity index 100% rename from mteb/abstasks/audio/abs_task_adio_reranking.py rename to mteb/abstasks/audio/abs_task_audio_reranking.py From 3b93da7e2d998cca7742e7d37bec01b3aaaa81c2 Mon Sep 17 00:00:00 2001 From: silky1708 Date: Wed, 5 Nov 2025 18:58:51 -0800 Subject: [PATCH 5/7] add google svq dataset --- .../multilingual/__init__.py | 2 + .../multilingual/google_svq.py | 60 +++++++++++++++++++ 2 files changed, 62 insertions(+) create mode 100644 mteb/tasks/audio/any_2_any_retrieval/multilingual/google_svq.py diff --git a/mteb/tasks/audio/any_2_any_retrieval/multilingual/__init__.py b/mteb/tasks/audio/any_2_any_retrieval/multilingual/__init__.py index efc69ea4b9..741b5c965b 100644 --- a/mteb/tasks/audio/any_2_any_retrieval/multilingual/__init__.py +++ b/mteb/tasks/audio/any_2_any_retrieval/multilingual/__init__.py @@ -1,7 +1,9 @@ from .jam_alt import JamAltArtist, JamAltLyricsA2T, JamAltLyricsT2A +from .google_svq import GoogleSVQA2TRetrieval __all__ = [ "JamAltArtist", "JamAltLyricsA2T", "JamAltLyricsT2A", + "GoogleSVQA2TRetrieval" ] diff --git a/mteb/tasks/audio/any_2_any_retrieval/multilingual/google_svq.py b/mteb/tasks/audio/any_2_any_retrieval/multilingual/google_svq.py new file mode 100644 index 0000000000..4d7e6708f7 --- /dev/null +++ b/mteb/tasks/audio/any_2_any_retrieval/multilingual/google_svq.py @@ -0,0 +1,60 @@ +from mteb.abstasks.image.abs_task_any2any_retrieval import AbsTaskAny2AnyRetrieval +from mteb.abstasks.task_metadata import TaskMetadata + + +# Google SVQ supports 17 languages +_EVAL_LANGS = { + "ar_eg": ["arz"], + "ar_x_gulf": ["acm"], + "ar_x_levant": ["apc"], + "ar_x_maghrebi": ["arq"], + "bn_bd": ["ben"], + "bn_in": ["ben"], + "en_au": ["eng"], + "en_gb": ["eng"], + "en_in": ["eng"], + "en_ph": ["eng"], + "en_us": ["eng"], + "fi_fi": ["fin"], + "id_id": ["ind"], + "ko_kr": ["kor"], + "ru_ru": ["rus"], + "sw": ["swa"], + "te_in": ["tel"] +} + +class GoogleSVQA2TRetrieval(AbsTaskAny2AnyRetrieval): + metadata = TaskMetadata( + name="GoogleSVQA2TRetrieval", + description="Natural language transcription for short voice questions", + reference="https://github.com/nateraw/download-musiccaps-dataset", + dataset={ + "path": "google/svq", + "revision": "177e4fa88e59148dc746471e164b0b46b193f41f", + }, + type="Any2AnyRetrieval", + category="a2t", + modalities=["audio", "text"], + eval_splits=["test"], + eval_langs=_EVAL_LANGS, + main_score="cv_recall_at_5", + date=("2025-01-01", "2025-12-31"), + domains=["Spoken"], + task_subtypes=["Speech Transcription Retrieval"], + license="cc-by-4.0", + annotations_creators="human-annotated", + dialect=[], + sample_creation="human-translated", + bibtex_citation=r""" + @inproceedings{ + heigold2025massive, + title={Massive Sound Embedding Benchmark ({MSEB})}, + author={Georg Heigold and Ehsan Variani and Tom Bagby and Cyril Allauzen and Ji Ma and Shankar Kumar and Michael Riley}, + booktitle={The Thirty-ninth Annual Conference on Neural Information Processing Systems Datasets and Benchmarks Track}, + year={2025}, + url={https://openreview.net/forum?id=X0juYgFVng} + } + """, + ) + + From 81083726469c7afa4d93078c95e51e1bbc74a5f9 Mon Sep 17 00:00:00 2001 From: silky1708 Date: Wed, 5 Nov 2025 19:45:14 -0800 Subject: [PATCH 6/7] rename audio reranking --- mteb/tasks/audio/any_2_any_retrieval/__init__.py | 1 + .../audio/audio_reranking/eng/audio_caps_mini_reranking.py | 2 +- mteb/tasks/audio/audio_reranking/eng/esc50_audio_reranking.py | 2 +- .../audio/audio_reranking/eng/fs_dnoisy18k_audio_reranking.py | 2 +- mteb/tasks/audio/audio_reranking/eng/gtzan_audio_reranking.py | 2 +- .../audio/audio_reranking/eng/urban_sound8_k_audio_reranking.py | 2 +- .../audio/audio_reranking/eng/vocal_sound_audio_reranking.py | 2 +- 7 files changed, 7 insertions(+), 6 deletions(-) diff --git a/mteb/tasks/audio/any_2_any_retrieval/__init__.py b/mteb/tasks/audio/any_2_any_retrieval/__init__.py index c9e776beef..15eee8dbb8 100644 --- a/mteb/tasks/audio/any_2_any_retrieval/__init__.py +++ b/mteb/tasks/audio/any_2_any_retrieval/__init__.py @@ -40,6 +40,7 @@ "FleursT2ARetrieval", "GigaSpeechA2TRetrieval", "GigaSpeechT2ARetrieval", + "GoogleSVQA2TRetrieval", "HiFiTTSA2TRetrieval", "HiFiTTST2ARetrieval", "JLCorpusA2TRetrieval", diff --git a/mteb/tasks/audio/audio_reranking/eng/audio_caps_mini_reranking.py b/mteb/tasks/audio/audio_reranking/eng/audio_caps_mini_reranking.py index f9803471b7..dd3ffb8023 100644 --- a/mteb/tasks/audio/audio_reranking/eng/audio_caps_mini_reranking.py +++ b/mteb/tasks/audio/audio_reranking/eng/audio_caps_mini_reranking.py @@ -1,4 +1,4 @@ -from mteb.abstasks.audio.abs_task_adio_reranking import AbsTaskAudioReranking +from mteb.abstasks.audio.abs_task_audio_reranking import AbsTaskAudioReranking from mteb.abstasks.task_metadata import TaskMetadata diff --git a/mteb/tasks/audio/audio_reranking/eng/esc50_audio_reranking.py b/mteb/tasks/audio/audio_reranking/eng/esc50_audio_reranking.py index 3738c90a9e..e68c06883f 100644 --- a/mteb/tasks/audio/audio_reranking/eng/esc50_audio_reranking.py +++ b/mteb/tasks/audio/audio_reranking/eng/esc50_audio_reranking.py @@ -1,4 +1,4 @@ -from mteb.abstasks.audio.abs_task_adio_reranking import AbsTaskAudioReranking +from mteb.abstasks.audio.abs_task_audio_reranking import AbsTaskAudioReranking from mteb.abstasks.task_metadata import TaskMetadata diff --git a/mteb/tasks/audio/audio_reranking/eng/fs_dnoisy18k_audio_reranking.py b/mteb/tasks/audio/audio_reranking/eng/fs_dnoisy18k_audio_reranking.py index 8ba5dcc1ff..aef3eb3d1c 100644 --- a/mteb/tasks/audio/audio_reranking/eng/fs_dnoisy18k_audio_reranking.py +++ b/mteb/tasks/audio/audio_reranking/eng/fs_dnoisy18k_audio_reranking.py @@ -1,4 +1,4 @@ -from mteb.abstasks.audio.abs_task_adio_reranking import AbsTaskAudioReranking +from mteb.abstasks.audio.abs_task_audio_reranking import AbsTaskAudioReranking from mteb.abstasks.task_metadata import TaskMetadata diff --git a/mteb/tasks/audio/audio_reranking/eng/gtzan_audio_reranking.py b/mteb/tasks/audio/audio_reranking/eng/gtzan_audio_reranking.py index b09ad64b71..f5076d6fc8 100644 --- a/mteb/tasks/audio/audio_reranking/eng/gtzan_audio_reranking.py +++ b/mteb/tasks/audio/audio_reranking/eng/gtzan_audio_reranking.py @@ -1,4 +1,4 @@ -from mteb.abstasks.audio.abs_task_adio_reranking import AbsTaskAudioReranking +from mteb.abstasks.audio.abs_task_audio_reranking import AbsTaskAudioReranking from mteb.abstasks.task_metadata import TaskMetadata diff --git a/mteb/tasks/audio/audio_reranking/eng/urban_sound8_k_audio_reranking.py b/mteb/tasks/audio/audio_reranking/eng/urban_sound8_k_audio_reranking.py index e4553a7ca4..4694ee5ab3 100644 --- a/mteb/tasks/audio/audio_reranking/eng/urban_sound8_k_audio_reranking.py +++ b/mteb/tasks/audio/audio_reranking/eng/urban_sound8_k_audio_reranking.py @@ -1,4 +1,4 @@ -from mteb.abstasks.audio.abs_task_adio_reranking import AbsTaskAudioReranking +from mteb.abstasks.audio.abs_task_audio_reranking import AbsTaskAudioReranking from mteb.abstasks.task_metadata import TaskMetadata diff --git a/mteb/tasks/audio/audio_reranking/eng/vocal_sound_audio_reranking.py b/mteb/tasks/audio/audio_reranking/eng/vocal_sound_audio_reranking.py index 9cdff278c5..a0531940d0 100644 --- a/mteb/tasks/audio/audio_reranking/eng/vocal_sound_audio_reranking.py +++ b/mteb/tasks/audio/audio_reranking/eng/vocal_sound_audio_reranking.py @@ -1,4 +1,4 @@ -from mteb.abstasks.audio.abs_task_adio_reranking import AbsTaskAudioReranking +from mteb.abstasks.audio.abs_task_audio_reranking import AbsTaskAudioReranking from mteb.abstasks.task_metadata import TaskMetadata From f0fb1bda197934955c6184e384037be715a7d346 Mon Sep 17 00:00:00 2001 From: silky1708 Date: Wed, 5 Nov 2025 19:50:56 -0800 Subject: [PATCH 7/7] make lint --- .../audio/any_2_any_retrieval/multilingual/__init__.py | 4 ++-- .../audio/any_2_any_retrieval/multilingual/google_svq.py | 6 ++---- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/mteb/tasks/audio/any_2_any_retrieval/multilingual/__init__.py b/mteb/tasks/audio/any_2_any_retrieval/multilingual/__init__.py index 741b5c965b..1f243850ae 100644 --- a/mteb/tasks/audio/any_2_any_retrieval/multilingual/__init__.py +++ b/mteb/tasks/audio/any_2_any_retrieval/multilingual/__init__.py @@ -1,9 +1,9 @@ -from .jam_alt import JamAltArtist, JamAltLyricsA2T, JamAltLyricsT2A from .google_svq import GoogleSVQA2TRetrieval +from .jam_alt import JamAltArtist, JamAltLyricsA2T, JamAltLyricsT2A __all__ = [ + "GoogleSVQA2TRetrieval", "JamAltArtist", "JamAltLyricsA2T", "JamAltLyricsT2A", - "GoogleSVQA2TRetrieval" ] diff --git a/mteb/tasks/audio/any_2_any_retrieval/multilingual/google_svq.py b/mteb/tasks/audio/any_2_any_retrieval/multilingual/google_svq.py index 4d7e6708f7..ac686bbdd0 100644 --- a/mteb/tasks/audio/any_2_any_retrieval/multilingual/google_svq.py +++ b/mteb/tasks/audio/any_2_any_retrieval/multilingual/google_svq.py @@ -1,7 +1,6 @@ from mteb.abstasks.image.abs_task_any2any_retrieval import AbsTaskAny2AnyRetrieval from mteb.abstasks.task_metadata import TaskMetadata - # Google SVQ supports 17 languages _EVAL_LANGS = { "ar_eg": ["arz"], @@ -20,9 +19,10 @@ "ko_kr": ["kor"], "ru_ru": ["rus"], "sw": ["swa"], - "te_in": ["tel"] + "te_in": ["tel"], } + class GoogleSVQA2TRetrieval(AbsTaskAny2AnyRetrieval): metadata = TaskMetadata( name="GoogleSVQA2TRetrieval", @@ -56,5 +56,3 @@ class GoogleSVQA2TRetrieval(AbsTaskAny2AnyRetrieval): } """, ) - -