feat: Add support for passing weight to the classification loss functions (#260)

volker48 · web-flow · commit 4867cb86e955 · 2025-06-06T16:38:06.000+02:00
* add support for passing weight to the loss functions

* adds test for weights and fixes issue with state dict
diff --git a/model2vec/train/classifier.py b/model2vec/train/classifier.py
@@ -137,6 +137,7 @@ def fit(
         device: str = "auto",
         X_val: list[str] | None = None,
         y_val: LabelType | None = None,
+        class_weight: torch.Tensor | None = None,
     ) -> StaticModelForClassification:
         """
         Fit a model.
@@ -164,6 +165,8 @@ def fit(
         :param device: The device to train on. If this is "auto", the device is chosen automatically.
         :param X_val: The texts to be used for validation.
         :param y_val: The labels to be used for validation.
+        :param class_weight: The weight of the classes. If None, all classes are weighted equally. Must 
+            have the same length as the number of classes.
         :return: The fitted model.
         :raises ValueError: If either X_val or y_val are provided, but not both.
         """
@@ -198,13 +201,17 @@ def fit(
             base_number = int(min(max(1, (len(train_texts) / 30) // 32), 16))
             batch_size = int(base_number * 32)
             logger.info("Batch size automatically set to %d.", batch_size)
+        
+        if class_weight is not None:
+            if len(class_weight) != len(self.classes_):
+                raise ValueError("class_weight must have the same length as the number of classes.")
 
         logger.info("Preparing train dataset.")
         train_dataset = self._prepare_dataset(train_texts, train_labels)
         logger.info("Preparing validation dataset.")
         val_dataset = self._prepare_dataset(validation_texts, validation_labels)
 
-        c = _ClassifierLightningModule(self, learning_rate=learning_rate)
+        c = _ClassifierLightningModule(self, learning_rate=learning_rate, class_weight=class_weight)
 
         n_train_batches = len(train_dataset) // batch_size
         callbacks: list[Callback] = []
@@ -242,6 +249,9 @@ def fit(
 
         state_dict = {}
         for weight_name, weight in best_model_weights["state_dict"].items():
+            if "loss_function" in weight_name:
+                # Skip the loss function class weight as its not needed for predictions
+                continue
             state_dict[weight_name.removeprefix("model.")] = weight
 
         self.load_state_dict(state_dict)
@@ -373,12 +383,12 @@ def to_pipeline(self) -> StaticModelPipeline:
 
 
 class _ClassifierLightningModule(pl.LightningModule):
-    def __init__(self, model: StaticModelForClassification, learning_rate: float) -> None:
+    def __init__(self, model: StaticModelForClassification, learning_rate: float, class_weight: torch.Tensor | None = None) -> None:
         """Initialize the LightningModule."""
         super().__init__()
         self.model = model
         self.learning_rate = learning_rate
-        self.loss_function = nn.CrossEntropyLoss() if not model.multilabel else nn.BCEWithLogitsLoss()
+        self.loss_function = nn.CrossEntropyLoss(weight=class_weight) if not model.multilabel else nn.BCEWithLogitsLoss(pos_weight=class_weight)
 
     def forward(self, x: torch.Tensor) -> torch.Tensor:
         """Simple forward pass."""
diff --git a/tests/test_trainable.py b/tests/test_trainable.py
@@ -174,6 +174,23 @@ def test_y_val_none() -> None:
         model.fit(X, y, X_val=None, y_val=y_val)
     model.fit(X, y, X_val=None, y_val=None)
 
+def test_class_weight() -> None:
+    """Test the class weight function."""
+    tokenizer = AutoTokenizer.from_pretrained("tests/data/test_tokenizer").backend_tokenizer
+    torch.random.manual_seed(42)
+    vectors_torched = torch.randn(len(tokenizer.get_vocab()), 12)
+    model = StaticModelForClassification(vectors=vectors_torched, tokenizer=tokenizer, hidden_dim=12).to("cpu")
+
+    X = ["dog", "cat"]
+    y = ["0", "1"]
+
+    bad_class_weight = torch.tensor([1.0])
+    with pytest.raises(ValueError):
+        model.fit(X, y, class_weight=bad_class_weight)
+
+    class_weight = torch.tensor([1.0, 2.0])
+    model.fit(X, y, class_weight=class_weight)
+
 
 @pytest.mark.parametrize(
     "y_multi,y_val_multi,should_crash",