add autoconvert to multilabel when read

Samoed · Samoed · commit a5dc0aec213f · 2025-06-14T16:27:28.000+03:00
diff --git a/autointent/_dataset/_dataset.py b/autointent/_dataset/_dataset.py
@@ -71,6 +71,16 @@ def n_classes(self) -> int:
         """Returns the number of classes in the dataset."""
         return len(self.intents)
 
+    def is_multilabel_format(self) -> bool:
+        """Checks if the dataset is in multilabel format.
+
+        Returns:
+            bool: True if the dataset is multilabel, False otherwise.
+        """
+        ds_keys = list(self.keys())
+        first_split = self[ds_keys[0]]
+        return isinstance(first_split.features[self.label_feature], Sequence)
+
     @classmethod
     def from_dict(cls, mapping: dict[str, Any]) -> "Dataset":
         """Creates a dataset from a dictionary mapping.
@@ -80,7 +90,11 @@ def from_dict(cls, mapping: dict[str, Any]) -> "Dataset":
         """
         from ._reader import DictReader
 
-        return DictReader().read(mapping)
+        dataset = DictReader().read(mapping)
+
+        if dataset.is_multilabel_format():
+            dataset = dataset.to_multilabel()
+        return dataset
 
     @classmethod
     def from_json(cls, filepath: str | Path) -> "Dataset":
@@ -91,7 +105,10 @@ def from_json(cls, filepath: str | Path) -> "Dataset":
         """
         from ._reader import JsonReader
 
-        return JsonReader().read(filepath)
+        dataset = JsonReader().read(filepath)
+        if dataset.is_multilabel_format():
+            dataset = dataset.to_multilabel()
+        return dataset
 
     @classmethod
     def from_hub(cls, repo_name: str, data_split: str = "default") -> "Dataset":
@@ -109,9 +126,7 @@ def from_hub(cls, repo_name: str, data_split: str = "default") -> "Dataset":
             mapping[Split.INTENTS] = load_dataset(repo_name, name=Split.INTENTS, split=Split.INTENTS).to_list()
 
         dataset = DictReader().read(mapping)
-        ds_keys = list(dataset.keys())
-        first_split = dataset[ds_keys[0]]
-        if isinstance(first_split.features[dataset.label_feature], Sequence):
+        if dataset.is_multilabel_format():
             dataset = dataset.to_multilabel()
         return dataset
 
diff --git a/autointent/custom_types/_types.py b/autointent/custom_types/_types.py
@@ -73,7 +73,7 @@ class NodeType(str, Enum):
     decision = "decision"
 
 
-class Split:
+class Split(str, Enum):
     """Enumeration of data splits in the AutoIntent framework.
 
     Attributes: