KevinMusgrave
diff --git a/‎setup.py‎
Lines changed: 1 addition & 1 deletion b/‎setup.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/pytorch_adapt/__init__.py‎
Lines changed: 1 addition & 1 deletion b/‎src/pytorch_adapt/__init__.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/pytorch_adapt/layers/__init__.py‎
Lines changed: 1 addition & 1 deletion b/‎src/pytorch_adapt/layers/__init__.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/pytorch_adapt/layers/confidence_weights.py‎
Lines changed: 2 additions & 2 deletions b/‎src/pytorch_adapt/layers/confidence_weights.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎src/pytorch_adapt/layers/ist_loss.py‎
Lines changed: 32 additions & 19 deletions b/‎src/pytorch_adapt/layers/ist_loss.py‎
Lines changed: 32 additions & 19 deletions
diff --git a/‎src/pytorch_adapt/layers/mmd_loss.py‎
Lines changed: 46 additions & 4 deletions b/‎src/pytorch_adapt/layers/mmd_loss.py‎
Lines changed: 46 additions & 4 deletions
diff --git a/‎src/pytorch_adapt/layers/neighborhood_aggregation.py‎
Lines changed: 6 additions & 6 deletions b/‎src/pytorch_adapt/layers/neighborhood_aggregation.py‎
Lines changed: 6 additions & 6 deletions
diff --git a/‎src/pytorch_adapt/layers/utils.py‎
Lines changed: 52 additions & 5 deletions b/‎src/pytorch_adapt/layers/utils.py‎
Lines changed: 52 additions & 5 deletions
diff --git a/‎src/pytorch_adapt/utils/common_functions.py‎
Lines changed: 12 additions & 0 deletions b/‎src/pytorch_adapt/utils/common_functions.py‎
Lines changed: 12 additions & 0 deletions
diff --git a/‎src/pytorch_adapt/validators/__init__.py‎
Lines changed: 6 additions & 3 deletions b/‎src/pytorch_adapt/validators/__init__.py‎
Lines changed: 6 additions & 3 deletions
@@ -35,7 +35,7 @@
         "torch",
         "torchvision",
         "torchmetrics",
-        "pytorch-metric-learning >= 1.1.0",
+        "pytorch-metric-learning >= 1.3.1.dev0",
     ],
     extras_require={
         "ignite": extras_require_ignite,
 
@@ -1 +1 @@
-__version__ = "0.0.61"
+__version__ = "0.0.70"
@@ -16,7 +16,7 @@
 from .mcc_loss import MCCLoss
 from .mcd_loss import GeneralMCDLoss, MCDLoss
 from .mean_dist_loss import MeanDistLoss
-from .mmd_loss import MMDLoss
+from .mmd_loss import MMDBatchedLoss, MMDLoss
 from .model_with_bridge import ModelWithBridge
 from .multiple_models import MultipleModels
 from .neighborhood_aggregation import NeighborhoodAggregation
 
@@ -25,6 +25,6 @@ def __init__(self, normalizer: Callable[[torch.Tensor], torch.Tensor] = None):
         super().__init__()
         self.normalizer = c_f.default(normalizer, NoNormalizer())
 
-    def forward(self, logits):
+    def forward(self, preds):
         """"""
-        return self.normalizer(torch.max(logits, dim=1)[0])
+        return self.normalizer(torch.max(preds, dim=1)[0])
@@ -7,19 +7,42 @@
 from .entropy_loss import EntropyLoss
 
 
+def get_probs(mat, mask, y, dist_is_inverted):
+    if not dist_is_inverted:
+        mat *= -1
+    mat = F.softmax(mat, dim=1)
+    n, m = mat.shape
+    y = y.repeat(n, 1)[mask].view(n, m)
+
+    target_probs = torch.sum(mat * y, dim=1, keepdims=True)
+    src_probs = torch.sum(mat * (1 - y), dim=1, keepdims=True)
+    return torch.cat([src_probs, target_probs], dim=1)
+
+
+def get_loss(probs, ent_fn, div_fn, with_ent, with_div):
+    loss = 0
+    if with_ent:
+        loss += -ent_fn(probs)
+    if with_div:
+        loss += -div_fn(probs)
+    return loss
+
+
 class ISTLoss(torch.nn.Module):
     """
     Implementation of the I_st loss from
     [Information-Theoretical Learning of Discriminative Clusters for Unsupervised Domain Adaptation](https://icml.cc/2012/papers/566.pdf)
     """
 
-    def __init__(self, distance=None, with_div=True):
+    def __init__(self, distance=None, with_ent=True, with_div=True):
         super().__init__()
         self.distance = c_f.default(distance, CosineSimilarity, {})
+        if not (with_ent or with_div):
+            raise ValueError("At least one of with_ent or with_div must be True")
+        self.with_ent = with_ent
         self.with_div = with_div
         self.ent_loss_fn = EntropyLoss(after_softmax=True)
-        if self.with_div:
-            self.div_loss_fn = DiversityLoss(after_softmax=True)
+        self.div_loss_fn = DiversityLoss(after_softmax=True)
 
     def forward(self, x, y):
         """
@@ -35,23 +58,13 @@ def forward(self, x, y):
 
         mat = self.distance(x)
         # remove self comparisons
-        mask = torch.eye(n, dtype=torch.bool)
-        mat = mat[~mask].view(n, n - 1)
-        if not self.distance.is_inverted:
-            mat *= -1
-        mat = F.softmax(mat, dim=1)
-
-        y = y.repeat(n, 1)[~mask].view(n, n - 1)
-
-        target_probs = torch.sum(mat * y, dim=1, keepdims=True)
-        src_probs = torch.sum(mat * (1 - y), dim=1, keepdims=True)
-        probs = torch.cat([src_probs, target_probs], dim=1)
-
-        ent_loss = self.ent_loss_fn(probs)
+        mask = ~torch.eye(n, dtype=torch.bool)
+        mat = mat[mask].view(n, n - 1)
+        probs = get_probs(mat, mask, y, self.distance.is_inverted)
 
-        if self.with_div:
-            return -self.div_loss_fn(probs) - ent_loss
-        return -ent_loss
+        return get_loss(
+            probs, self.ent_loss_fn, self.div_loss_fn, self.with_ent, self.with_div
+        )
 
     def extra_repr(self):
         """"""
 
@@ -1,13 +1,25 @@
 from typing import List, Union
 
 import torch
-from pytorch_metric_learning.distances import LpDistance
+from pytorch_metric_learning.distances import BatchedDistance, LpDistance
 from pytorch_metric_learning.utils import common_functions as pml_cf
 
 from ..utils import common_functions as c_f
 from . import utils as l_u
 
 
+def check_batch_sizes(s, t, mmd_type):
+    if mmd_type == "quadratic":
+        return
+    is_list = c_f.is_list_or_tuple(s)
+    if (is_list and any(s[i].shape != t[i].shape for i in range(len(s)))) or (
+        not is_list and s.shape != t.shape
+    ):
+        raise ValueError(
+            "For mmd_type 'linear', source and target must have the same batch size."
+        )
+
+
 class MMDLoss(torch.nn.Module):
     """
     Implementation of
@@ -18,7 +30,11 @@ class MMDLoss(torch.nn.Module):
     """
 
     def __init__(
-        self, kernel_scales: Union[float, torch.Tensor] = 1, mmd_type: str = "linear"
+        self,
+        kernel_scales: Union[float, torch.Tensor] = 1,
+        mmd_type: str = "linear",
+        dist_func=None,
+        bandwidth=None,
     ):
         """
         Arguments:
@@ -28,7 +44,10 @@ def __init__(
         """
         super().__init__()
         self.kernel_scales = kernel_scales
-        self.dist_func = LpDistance(normalize_embeddings=False, p=2, power=2)
+        self.dist_func = c_f.default(
+            dist_func, LpDistance(normalize_embeddings=False, p=2, power=2)
+        )
+        self.bandwidth = bandwidth
         self.mmd_type = mmd_type
         if mmd_type == "linear":
             self.mmd_func = l_u.get_mmd_linear
@@ -50,7 +69,8 @@ def forward(
         Returns:
             MMD if the inputs are tensors, and Joint MMD (JMMD) if the inputs are lists of tensors.
         """
-        xx, yy, zz, scale = l_u.get_mmd_dist_mats(x, y, self.dist_func)
+        check_batch_sizes(x, y, self.mmd_type)
+        xx, yy, zz, scale = l_u.get_mmd_dist_mats(x, y, self.dist_func, self.bandwidth)
         if torch.is_tensor(self.kernel_scales):
             s = scale[0] if c_f.is_list_or_tuple(scale) else scale
             self.kernel_scales = pml_cf.to_device(self.kernel_scales, s, dtype=s.dtype)
@@ -66,3 +86,25 @@ def forward(
     def extra_repr(self):
         """"""
         return c_f.extra_repr(self, ["mmd_type", "kernel_scales"])
+
+
+class MMDBatchedLoss(MMDLoss):
+    def __init__(self, batch_size=1024, **kwargs):
+        super().__init__(**kwargs)
+        if self.mmd_type != "quadratic":
+            raise ValueError("mmd_type must be 'quadratic'")
+        self.mmd_func = l_u.get_mmd_quadratic_batched
+        self.dist_func = BatchedDistance(self.dist_func, batch_size=batch_size)
+
+    def forward(self, x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
+        """
+        Arguments:
+            x: features from one domain.
+            y: features from the other domain.
+        Returns:
+            MMD
+        """
+        if c_f.is_list_or_tuple(x) or c_f.is_list_or_tuple(y):
+            raise TypeError("List of features not yet supported")
+        check_batch_sizes(x, y, self.mmd_type)
+        return self.mmd_func(x, y, self.dist_func, self.kernel_scales, self.bandwidth)
@@ -77,16 +77,16 @@ def get_pseudo_labels(self, normalized_features, idx):
         for di in range(dis.size(0)):
             dis[di, idx[di]] = torch.min(dis)
         _, indices = torch.topk(dis, k=self.k, dim=1)
-        logits = torch.mean(self.pred_memory[indices], dim=1)
-        pseudo_labels = torch.argmax(logits, dim=1)
-        return pseudo_labels, logits
+        preds = torch.mean(self.pred_memory[indices], dim=1)
+        pseudo_labels = torch.argmax(preds, dim=1)
+        return pseudo_labels, preds
 
     def update_memory(self, normalized_features, logits, idx):
-        logits = F.softmax(logits, dim=1)
+        preds = F.softmax(logits, dim=1)
         p = 1.0 / self.T
-        logits = (logits**p) / torch.sum(logits**p, dim=0)
+        preds = (preds**p) / torch.sum(preds**p, dim=0)
         self.feat_memory[idx] = normalized_features
-        self.pred_memory[idx] = logits
+        self.pred_memory[idx] = preds
 
     def extra_repr(self):
         """"""
 
@@ -1,5 +1,6 @@
 import numpy as np
 import torch
+from pytorch_metric_learning.utils import common_functions as pml_cf
 
 from ..utils import common_functions as c_f
 
@@ -18,31 +19,36 @@ def get_kernel_scales(low=-8, high=8, num_kernels=33, base=2.0):
     return torch.from_numpy(np.logspace(low, high, num=num_kernels, base=base))
 
 
-def _mmd_dist_mats(x, y, dist_func):
+def _mmd_dist_mats(x, y, dist_func, bandwidth=None):
     xx = dist_func(x, x)
     yy = dist_func(y, y)
     zz = dist_func(x, y)
 
     with torch.no_grad():
         # https://arxiv.org/pdf/1409.6041.pdf
         # https://arxiv.org/pdf/1707.07269.pdf
-        scale = -1.0 / torch.median(xx)
+        denom = (
+            torch.median(xx)
+            if bandwidth is None
+            else torch.tensor([bandwidth], dtype=xx.dtype, device=xx.device)
+        )
+        scale = -1.0 / denom
 
     return xx, yy, zz, scale
 
 
-def get_mmd_dist_mats(x, y, dist_func):
+def get_mmd_dist_mats(x, y, dist_func, bandwidth):
     if c_f.is_list_or_tuple(x):
         xx, yy, zz, scale = [], [], [], []
         for i in range(len(x)):
-            _xx, _yy, _zz, _scale = _mmd_dist_mats(x[i], y[i], dist_func)
+            _xx, _yy, _zz, _scale = _mmd_dist_mats(x[i], y[i], dist_func, bandwidth)
             xx.append(_xx)
             yy.append(_yy)
             zz.append(_zz)
             scale.append(_scale)
         return xx, yy, zz, scale
     else:
-        return _mmd_dist_mats(x, y, dist_func)
+        return _mmd_dist_mats(x, y, dist_func, bandwidth)
 
 
 def get_default_kernel_weights(scale):
@@ -124,3 +130,44 @@ def get_mmd_linear(xx, yy, zz, scale, weights=None):
 
     loss = loss1 + loss2 - loss3 - loss4
     return torch.sum(loss) / float(B // 2)
+
+
+def _mmd_quadratic_batched(rsum, scale, weights, query_is_ref):
+    def fn(mat, s, *_):
+        if query_is_ref:
+            mat = c_f.mask_out_self(mat, s)
+        rsum[0] += torch.sum(_mmd_quadratic(mat, scale, weights))
+
+    return fn
+
+
+def get_median_of_medians(x, dist_func):
+    medians = []
+
+    def fn(mat, *_):
+        with torch.no_grad():
+            medians.append(torch.median(mat))
+
+    dist_func.iter_fn = fn
+    dist_func(x, x)
+    return torch.median(torch.stack(medians))
+
+
+def get_mmd_quadratic_batched(x, y, dist_func, kernel_scales, bandwidth, weights=None):
+    if torch.is_tensor(kernel_scales):
+        kernel_scales = pml_cf.to_device(kernel_scales, x, dtype=x.dtype)
+    if bandwidth is None:
+        bandwidth = get_median_of_medians(x, dist_func)
+    scale = -kernel_scales / bandwidth
+    weights = c_f.default(weights, get_default_kernel_weights(scale))
+
+    sums = []
+    for s, t in [(x, x), (y, y), (x, y)]:
+        rsum = [0]
+        query_is_ref = s is t
+        dist_func.iter_fn = _mmd_quadratic_batched(rsum, scale, weights, query_is_ref)
+        dist_func(s, t)
+        denom = (len(s) * (len(s) - 1)) if query_is_ref else (len(s) * len(t))
+        sums.append(torch.sum(rsum[0]) / denom)
+
+    return sums[0] + sums[1] - 2 * sums[2]
@@ -551,3 +551,15 @@ def subset_of_dict(x, subset):
     if isinstance(subset, dict):
         return {k: subset_of_dict(x[k], v) for k, v in subset.items()}
     raise TypeError("subset argument must be dict or set")
+
+
+def mask_out_self(sim_mat, start_idx, return_mask=False):
+    num_rows, num_cols = sim_mat.shape
+    mask = torch.ones(num_rows, num_cols, dtype=torch.bool)
+    rows = torch.arange(num_rows)
+    cols = rows + start_idx
+    mask[rows, cols] = False
+    sim_mat = sim_mat[mask].view(num_rows, num_cols - 1)
+    if return_mask:
+        return sim_mat, mask
+    return sim_mat
@@ -1,13 +1,16 @@
 from .accuracy_validator import AccuracyValidator
 from .base_validator import BaseValidator
+from .class_cluster_validator import ClassClusterValidator
 from .deep_embedded_validator import DeepEmbeddedValidator
 from .diversity_validator import DiversityValidator
 from .entropy_validator import EntropyValidator
 from .error_validator import ErrorValidator
 from .im_validator import IMValidator
-
-# from .knn_validator import ClusterValidator, KNNValidator
+from .ist_validator import ISTValidator
+from .knn_validator import KNNValidator
+from .mmd_validator import MMDValidator
 from .multiple_validators import MultipleValidators
+from .per_class_validator import PerClassValidator
 from .score_history import ScoreHistories, ScoreHistory
-from .silhouette_score_validator import SilhouetteScoreValidator
 from .snd_validator import SNDValidator
+from .target_knn_validator import TargetKNNValidator
Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-__version__ = "0.0.61"`
	`1`	`+__version__ = "0.0.70"`