Fix unleved bug with bert

mirand863 · mirand863 · commit a92b6a83b8f5 · 2024-12-02T11:23:25.000-03:00
diff --git a/hiclass/HierarchicalClassifier.py b/hiclass/HierarchicalClassifier.py
@@ -161,7 +161,9 @@ def _pre_fit(self, X, y, sample_weight):
             )
         else:
             self.X_ = np.array(X)
-            self.y_ = np.array(make_leveled(y))
+            self.y_ = check_array(
+                make_leveled(y), dtype=None, ensure_2d=False, allow_nd=True
+            )
 
         if sample_weight is not None:
             self.sample_weight_ = _check_sample_weight(sample_weight, X)
diff --git a/tests/test_LocalClassifiers.py b/tests/test_LocalClassifiers.py
@@ -3,15 +3,16 @@
 
 import numpy as np
 import pytest
+from bert_sklearn import BertClassifier
 from numpy.testing import assert_array_equal
 from pyfakefs.fake_filesystem_unittest import Patcher
 from sklearn.linear_model import LogisticRegression
 from sklearn.neighbors import KNeighborsClassifier
 from sklearn.utils.validation import check_is_fitted
 
 from hiclass import (
-    LocalClassifierPerNode,
     LocalClassifierPerLevel,
+    LocalClassifierPerNode,
     LocalClassifierPerParentNode,
 )
 from hiclass.ConstantClassifier import ConstantClassifier
@@ -77,16 +78,20 @@ def test_empty_levels(empty_levels, classifier):
 
 @pytest.mark.parametrize("classifier", classifiers)
 def test_fit_bert(classifier):
-    bert = ConstantClassifier()
+    bert = BertClassifier()
     clf = classifier(
         local_classifier=bert,
         bert=True,
     )
-    X = ["Text 1", "Text 2"]
-    y = ["a", "a"]
-    clf.fit(X, y)
+    x = ["Batman", "Joker", "Rorschach"]
+    y = [
+        ["Action", "The Dark Night"],
+        ["Action", "The Dark Night"],
+        ["Action", "Watchmen"],
+    ]
+    clf.fit(x, y)
     check_is_fitted(clf)
-    predictions = clf.predict(X)
+    predictions = clf.predict(x)
     assert_array_equal(y, predictions)
 
 
@@ -148,9 +153,13 @@ def test_tmp_dir(classifier):
 @pytest.mark.parametrize("classifier", classifiers)
 def test_bert_unleveled(classifier):
     clf = classifier(
-        local_classifier=LogisticRegression(),
+        local_classifier=BertClassifier(),
         bert=True,
     )
-    x = [[0, 1], [2, 3]]
-    y = [["a"], ["b", "c"]]
+    x = ["Batman", "Joker"]
+    y = [["Action", "The Dark Night"], ["Action"]]
+    ground_truth = [["Action", "The Dark Night"], ["Action", "The Dark Night"]]
     clf.fit(x, y)
+    check_is_fitted(clf)
+    predictions = clf.predict(x)
+    assert_array_equal(ground_truth, predictions)