From fd5935dcdf8ee4f77cb128ce4c427f6bb8f437e1 Mon Sep 17 00:00:00 2001 From: Adnan El Assadi Date: Thu, 1 Jan 2026 13:40:14 -0500 Subject: [PATCH 1/4] Use new subsampled version of fsd50_mini --- .../eng/fsd50_hf.py | 26 +++++-------------- 1 file changed, 6 insertions(+), 20 deletions(-) diff --git a/mteb/tasks/audio/audio_multilabel_classification/eng/fsd50_hf.py b/mteb/tasks/audio/audio_multilabel_classification/eng/fsd50_hf.py index daabbba5ef..b7c64239dc 100644 --- a/mteb/tasks/audio/audio_multilabel_classification/eng/fsd50_hf.py +++ b/mteb/tasks/audio/audio_multilabel_classification/eng/fsd50_hf.py @@ -8,12 +8,12 @@ class FSD50HFMultilingualClassification(AbsTaskMultilabelClassification): metadata = TaskMetadata( name="FSD50K", - description="Multilabel Audio Classification.", - reference="https://huggingface.co/datasets/Chand0320/fsd50k_hf", + description="Multilabel Audio Classification on a subsampled version of FSD50K.", + reference="https://huggingface.co/datasets/mteb/fsd50k_mini", dataset={ - "path": "Chand0320/fsd50k_hf", - "revision": "ca72d33100074e2933437e844028c941d8e8f065", - }, # this is actually used to download the data + "path": "mteb/fsd50k_mini", + "revision": "a574eeb10bb11d28eff83dd522151028d529551d", + }, type="AudioMultilabelClassification", category="a2t", eval_splits=["test"], @@ -22,7 +22,7 @@ class FSD50HFMultilingualClassification(AbsTaskMultilabelClassification): date=( "2020-01-01", "2020-01-30", - ), # Estimated date when this dataset was committed, what should be the second tuple? + ), domains=["Web"], # obtained from Freesound - online collaborative platform task_subtypes=["Environment Sound Classification"], license="cc-by-4.0", @@ -50,17 +50,3 @@ class FSD50HFMultilingualClassification(AbsTaskMultilabelClassification): label_column_name: str = "labels" samples_per_label: int = 8 - def dataset_transform(self): - # labels column is a string of comma separated labels, this function converts it to a list of labels - self.dataset = self.dataset.map( - lambda x: { - self.label_column_name: x[self.label_column_name].split(","), - } - ) - self.dataset = self.stratified_subsampling( - self.dataset, - seed=self.seed, - splits=self.eval_splits, - label=self.label_column_name, - n_samples=2048, - ) From 7cb107a20a125fb6753816667009e9b3a151c180 Mon Sep 17 00:00:00 2001 From: Adnan El Assadi Date: Tue, 6 Jan 2026 16:52:47 -0500 Subject: [PATCH 2/4] Fixes description --- maeb-results | 1 + .../tasks/audio/audio_multilabel_classification/eng/fsd50_hf.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) create mode 160000 maeb-results diff --git a/maeb-results b/maeb-results new file mode 160000 index 0000000000..8ecd5588dd --- /dev/null +++ b/maeb-results @@ -0,0 +1 @@ +Subproject commit 8ecd5588dd5f4584c575a8db9d1e4f972d943e55 diff --git a/mteb/tasks/audio/audio_multilabel_classification/eng/fsd50_hf.py b/mteb/tasks/audio/audio_multilabel_classification/eng/fsd50_hf.py index b7c64239dc..4dce9953af 100644 --- a/mteb/tasks/audio/audio_multilabel_classification/eng/fsd50_hf.py +++ b/mteb/tasks/audio/audio_multilabel_classification/eng/fsd50_hf.py @@ -8,7 +8,7 @@ class FSD50HFMultilingualClassification(AbsTaskMultilabelClassification): metadata = TaskMetadata( name="FSD50K", - description="Multilabel Audio Classification on a subsampled version of FSD50K.", + description="Multilabel Audio Classification on a subsampled version of FSD50K using 2048 samples", reference="https://huggingface.co/datasets/mteb/fsd50k_mini", dataset={ "path": "mteb/fsd50k_mini", From 72e04175511b0594c252cfafe71ac66cf6bd38fb Mon Sep 17 00:00:00 2001 From: Adnan El Assadi Date: Tue, 6 Jan 2026 19:29:07 -0500 Subject: [PATCH 3/4] ran lint --- mteb/tasks/audio/audio_multilabel_classification/eng/fsd50_hf.py | 1 - 1 file changed, 1 deletion(-) diff --git a/mteb/tasks/audio/audio_multilabel_classification/eng/fsd50_hf.py b/mteb/tasks/audio/audio_multilabel_classification/eng/fsd50_hf.py index 4dce9953af..2537a2a5e2 100644 --- a/mteb/tasks/audio/audio_multilabel_classification/eng/fsd50_hf.py +++ b/mteb/tasks/audio/audio_multilabel_classification/eng/fsd50_hf.py @@ -49,4 +49,3 @@ class FSD50HFMultilingualClassification(AbsTaskMultilabelClassification): input_column_name: str = "audio" label_column_name: str = "labels" samples_per_label: int = 8 - From a4ee65dcc04a1c1b3262d1b7b08bc2a610eedba7 Mon Sep 17 00:00:00 2001 From: Adnan El Assadi Date: Tue, 6 Jan 2026 19:34:52 -0500 Subject: [PATCH 4/4] remove git submodule --- maeb-results | 1 - 1 file changed, 1 deletion(-) delete mode 160000 maeb-results diff --git a/maeb-results b/maeb-results deleted file mode 160000 index 8ecd5588dd..0000000000 --- a/maeb-results +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 8ecd5588dd5f4584c575a8db9d1e4f972d943e55