add cluster

xingzhongyu · xingzhongyu · commit ce9b5bc8b633 · 2025-04-20T10:10:11.000+08:00
diff --git a/dance/modules/base.py b/dance/modules/base.py
@@ -166,3 +166,34 @@ class BaseRegressionMethod(BaseMethod):
 class BaseClusteringMethod(BaseMethod):
 
     _DEFAULT_METRIC = "ari"
+
+    def score(self, x, y, *, score_func: Optional[Union[str, Mapping[Any, float]]] = None, return_pred: bool = False,
+              valid_idx=None, test_idx=None) -> Union[float, Tuple[float, Any]]:
+        y_pred = self.predict(x)
+        func = resolve_score_func(score_func or self._DEFAULT_METRIC)
+        if valid_idx is None:
+            score = func(y, y_pred)
+            return (score, y_pred) if return_pred else score
+        else:
+            valid_score = func([y[i] for i in valid_idx], [y_pred[i] for i in valid_idx])
+            test_score = func([y[i] for i in test_idx], [y_pred[i] for i in test_idx])
+            return ({
+                "valid_score": valid_score,
+                "test_score": test_score
+            }, y_pred) if return_pred else {
+                "valid_score": valid_score,
+                "test_score": test_score
+            }
+
+    def fit_score(self, x, y, *, score_func: Optional[Union[str, Mapping[Any,
+                                                                         float]]] = None, return_pred: bool = False,
+                  valid_idx=None, test_idx=None, **fit_kwargs) -> Union[float, Tuple[float, Any]]:
+        """Shortcut for fitting data using the input feature and return eval.
+
+        Note
+        ----
+        Only work for models where the fitting does not require labeled data, i.e. unsupervised methods.
+
+        """
+        self.fit(x, **fit_kwargs)
+        return self.score(x, y, score_func=score_func, return_pred=return_pred, valid_idx=valid_idx, test_idx=test_idx)
diff --git a/examples/tuning/cluster_graphsc/main.py b/examples/tuning/cluster_graphsc/main.py
@@ -2,12 +2,14 @@
 import os
 import pprint
 import sys
+from cgi import test
 from pathlib import Path
 
 import numpy as np
 import torch
-import wandb
+from sklearn.model_selection import train_test_split
 
+import wandb
 from dance import logger
 from dance.datasets.singlemodality import ClusteringDataset
 from dance.modules.single_modality.clustering.graphsc import GraphSC
@@ -74,7 +76,8 @@ def evaluate_pipeline(tune_mode=args.tune_mode, pipeline_planer=pipeline_planer)
         preprocessing_pipeline = pipeline_planer.generate(**kwargs)
         print(f"Pipeline config:\n{preprocessing_pipeline.to_yaml()}")
         preprocessing_pipeline(data)
-
+        total_idx = range(data.shape[0])
+        valid_idx, test_idx = train_test_split(total_idx, test_size=0.9, random_state=args.seed)
         graph, y = data.get_train_data()
         n_clusters = len(np.unique(y))
 
@@ -91,8 +94,8 @@ def evaluate_pipeline(tune_mode=args.tune_mode, pipeline_planer=pipeline_planer)
                         num_workers=args.num_workers, device=args.device)
         model.fit(graph, epochs=args.epochs, lr=args.learning_rate, show_epoch_ari=args.show_epoch_ari,
                   eval_epoch=args.eval_epoch)
-        score = model.score(None, y)
-        wandb.log({"acc": score})
+        valid_score, test_score = model.score(graph, y, valid_idx=valid_idx, test_idx=test_idx)
+        wandb.log({"ari": valid_score, "test_ari": test_score})
         wandb.finish()
         del model
         torch.cuda.empty_cache()