streamline classic ml

sfluegel05 · sfluegel05 · commit 3b233d6a8868 · 2025-09-16T17:18:33.000+02:00
diff --git a/chebai/callbacks/epoch_metrics.py b/chebai/callbacks/epoch_metrics.py
@@ -62,9 +62,7 @@ def update(self, preds: torch.Tensor, labels: torch.Tensor) -> None:
             labels (torch.Tensor): Ground truth labels.
         """
         tps = torch.sum(
-            torch.logical_and(
-                preds > self.threshold, labels.to(torch.bool)
-            ),
+            torch.logical_and(preds > self.threshold, labels.to(torch.bool)),
             dim=0,
         )
         self.true_positives += tps
diff --git a/chebai/models/classic_ml.py b/chebai/models/classic_ml.py
@@ -1,5 +1,6 @@
-from typing import Any, Dict
 import pickle as pkl
+from typing import Any, Dict
+
 import numpy as np
 import torch
 import tqdm
@@ -16,55 +17,44 @@ class LogisticRegression(ChebaiBaseNet):
 
     def __init__(self, out_dim: int, input_dim: int, **kwargs):
         super().__init__(out_dim=out_dim, input_dim=input_dim, **kwargs)
-        self.models = [SklearnLogisticRegression(solver="liblinear") for _ in range(out_dim)]
+        self.models = [
+            SklearnLogisticRegression(solver="liblinear") for _ in range(300)
+        ]
 
     def forward(self, x: Dict[str, Any], **kwargs) -> torch.Tensor:
         print(
             f"forward called with x[features].shape {x['features'].shape}, self.training {self.training}"
         )
         if self.training:
             self.fit_sklearn(x["features"], x["labels"])
-        try:
-            preds = [
-                torch.from_numpy(model.predict(x["features"]))
-                .to(
-                    x["features"].device
-                    if isinstance(x["features"], torch.Tensor)
-                    else "cpu"
-                )
-                .float()
-                for model in self.models
-            ]
-        except NotFittedError:
-            # Not fitted yet, return zeros
-            print(
-                f"returning default 0s with shape {(x['features'].shape[0], self.out_dim)}"
-            )
-            return torch.zeros(
-                (x["features"].shape[0], self.out_dim),
-                device=(
-                    x["features"].device
-                    if isinstance(x["features"], torch.Tensor)
-                    else "cpu"
-                ),
-            )
+        preds = []
+        for model in self.models:
+            try:
+                p = torch.from_numpy(model.predict(x["features"])).float()
+                p = p.to(x["features"].device)
+                preds.append(p)
+            except NotFittedError:
+                preds.append(torch.zeros((x["features"].shape[0], 1), device=(x["features"].device)))
+            except AttributeError:
+                preds.append(torch.zeros((x["features"].shape[0], 1), device=(x["features"].device)))
         preds = torch.stack(preds, dim=1)
         print(f"preds shape {preds.shape}")
-        return preds
+        return preds.squeeze(-1)
 
     def fit_sklearn(self, X, y):
         """
         Fit the underlying sklearn model. X and y should be numpy arrays.
         """
         for i, model in tqdm.tqdm(enumerate(self.models), desc="Fitting models"):
             import os
+
             if os.path.exists(f"LR_CHEBI100_model_{i}.pkl"):
                 print(f"Loading model {i} from file")
                 self.models[i] = pkl.load(open(f"LR_CHEBI100_model_{i}.pkl", "rb"))
             else:
                 try:
                     model.fit(X, y[:, i])
-                except ValueError as e:
+                except ValueError:
                     self.models[i] = PlaceholderModel()
                 # dump
                 pkl.dump(model, open(f"LR_CHEBI100_model_{i}.pkl", "wb"))
@@ -80,4 +70,4 @@ def __init__(self, default_prediction=1):
         self.default_prediction = default_prediction
 
     def predict(self, preds):
-        return np.ones(preds.shape[0]) * self.default_prediction
+        return np.ones(preds.shape[0]) * self.default_prediction