Add DirectCLR (#781) (#1874)

KylevdLangemheen · web-flow · commit 5f882cc4b007 · 2025-08-14T13:52:58.000+02:00
* Move DirectCLR to loss-based implementation (#1874) * Pass both views in single pass DirectCLR (#1874) * Add example notebook DirectCLR (#1874)
diff --git a/examples/notebooks/pytorch/directclr.ipynb b/examples/notebooks/pytorch/directclr.ipynb
@@ -0,0 +1,153 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "0",
+   "metadata": {},
+   "source": [
+    "This example requires the following dependencies to be installed:\n",
+    "pip install lightly"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!pip install lightly"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "2",
+   "metadata": {},
+   "source": [
+    "Note: The model and training settings do not follow the reference settings\n",
+    "from the paper. The settings are chosen such that the example can easily be\n",
+    "run on a small dataset with a single GPU."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import torch\n",
+    "from torch.nn import Sequential\n",
+    "from torch.optim import SGD\n",
+    "from torch.utils.data import DataLoader\n",
+    "from torchvision import models\n",
+    "from torchvision.datasets import CIFAR10"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from lightly.loss import DirectCLRLoss\n",
+    "from lightly.transforms.simclr_transform import SimCLRTransform"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "resnet = models.resnet18()\n",
+    "model = Sequential(*list(resnet.children())[:-1])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n",
+    "model.to(device)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "transform = SimCLRTransform(input_size=32, gaussian_blur=0.0)\n",
+    "dataset = CIFAR10(\"datasets/cifar10\", download=True, transform=transform)\n",
+    "# or create a dataset from a folder containing images or videos:\n",
+    "# dataset = LightlyDataset(\"path/to/folder\", transform=transform)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dataloader = DataLoader(\n",
+    "    dataset,\n",
+    "    batch_size=256,\n",
+    "    shuffle=True,\n",
+    "    drop_last=True,\n",
+    "    num_workers=8,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "criterion = DirectCLRLoss(loss_dim=32)\n",
+    "optimizer = SGD(model.parameters(), lr=0.06)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "10",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(\"Starting Training\")\n",
+    "for epoch in range(10):\n",
+    "    total_loss = 0\n",
+    "    for batch in dataloader:\n",
+    "        x0, x1 = batch[0]\n",
+    "        x = torch.cat([x0, x1]).to(device)\n",
+    "        z0, z1 = model(x).chunk(2, dim=0)\n",
+    "        loss = criterion(z0, z1)\n",
+    "        total_loss += loss.detach()\n",
+    "        loss.backward()\n",
+    "        optimizer.step()\n",
+    "        optimizer.zero_grad()\n",
+    "    avg_loss = total_loss / len(dataloader)\n",
+    "    print(f\"epoch: {epoch:>02}, loss: {avg_loss:.5f}\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "jupytext": {
+   "cell_metadata_filter": "-all",
+   "main_language": "python",
+   "notebook_metadata_filter": "-all"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/examples/pytorch/directclr.py b/examples/pytorch/directclr.py
@@ -0,0 +1,53 @@
+# This example requires the following dependencies to be installed:
+# pip install lightly
+
+# Note: The model and training settings do not follow the reference settings
+# from the paper. The settings are chosen such that the example can easily be
+# run on a small dataset with a single GPU.
+
+import torch
+from torch.nn import Sequential
+from torch.optim import SGD
+from torch.utils.data import DataLoader
+from torchvision import models
+from torchvision.datasets import CIFAR10
+
+from lightly.loss import DirectCLRLoss
+from lightly.transforms.simclr_transform import SimCLRTransform
+
+resnet = models.resnet18()
+model = Sequential(*list(resnet.children())[:-1])
+
+device = "cuda" if torch.cuda.is_available() else "cpu"
+model.to(device)
+
+transform = SimCLRTransform(input_size=32, gaussian_blur=0.0)
+dataset = CIFAR10("datasets/cifar10", download=True, transform=transform)
+# or create a dataset from a folder containing images or videos:
+# dataset = LightlyDataset("path/to/folder", transform=transform)
+
+dataloader = DataLoader(
+    dataset,
+    batch_size=256,
+    shuffle=True,
+    drop_last=True,
+    num_workers=8,
+)
+
+criterion = DirectCLRLoss(loss_dim=32)
+optimizer = SGD(model.parameters(), lr=0.06)
+
+print("Starting Training")
+for epoch in range(10):
+    total_loss = 0
+    for batch in dataloader:
+        x0, x1 = batch[0]
+        x = torch.cat([x0, x1]).to(device)
+        z0, z1 = model(x).chunk(2, dim=0)
+        loss = criterion(z0, z1)
+        total_loss += loss.detach()
+        loss.backward()
+        optimizer.step()
+        optimizer.zero_grad()
+    avg_loss = total_loss / len(dataloader)
+    print(f"epoch: {epoch:>02}, loss: {avg_loss:.5f}")
diff --git a/lightly/loss/__init__.py b/lightly/loss/__init__.py
@@ -6,6 +6,7 @@
 from lightly.loss.dcl_loss import DCLLoss, DCLWLoss
 from lightly.loss.detcon_loss import DetConBLoss, DetConSLoss
 from lightly.loss.dino_loss import DINOLoss
+from lightly.loss.directclr_loss import DirectCLRLoss
 from lightly.loss.emp_ssl_loss import EMPSSLLoss
 from lightly.loss.ibot_loss import IBOTPatchLoss
 from lightly.loss.koleo_loss import KoLeoLoss
diff --git a/lightly/loss/directclr_loss.py b/lightly/loss/directclr_loss.py
@@ -0,0 +1,110 @@
+""" Contrastive Loss Functions """
+
+# Copyright (c) 2020. Lightly AG and its affiliates.
+# All Rights Reserved
+
+
+from typing import Sequence, Union
+
+from torch import Tensor
+
+from lightly.loss.ntx_ent_loss import NTXentLoss
+
+
+class DirectCLRLoss(NTXentLoss):
+    """Implementation of the NT-Xent based DirectCLR Loss.
+
+    Following the DirectCLR[0] paper, this loss should be used without projection
+    head. Set `loss_dim` to the desired truncated representation length.
+    DirectCLRLoss inherits from NTXentLoss, its parameters can be set after
+    setting `loss_dim`.
+
+    - [0] DirectCLR, 2021, https://arxiv.org/abs/2110.09348
+
+    Attributes:
+        loss_dim:
+            Computes the loss only on the first loss_dim values of the encoding.
+        temperature:
+            From NTXentLoss: scale logits by the inverse of the temperature.
+        memory_bank_size:
+            From NTXentLoss: size of the memory bank as (num_features, dim) tuple.
+            num_features are the number of negative samples stored in the memory bank.
+            If num_features is 0, the memory bank is disabled. Use 0 for SimCLR. For
+            MoCo we typically use numbers like 4096 or 65536.
+            Deprecated: If only a single integer is passed, it is interpreted as the
+            number of features and the feature dimension is inferred from the first
+            batch stored in the memory bank. Leaving out the feature dimension might
+            lead to errors in distributed training.
+        gather_distributed:
+            From NTXentLoss: if True then negatives from all GPUs are gathered before
+            the loss calculation. If a memory bank is used and gather_distributed is
+            True, then tensors from all gpus are gathered before the memory bank is
+            updated.
+
+    Examples:
+        >>> # initialize loss function
+        >>> loss_fn = DirectCLRLoss()
+        >>>
+        >>> # generate two random transforms of images
+        >>> t0 = transforms(images)
+        >>> t1 = transforms(images)
+        >>>
+        >>> # feed through backbone without projection head
+        >>> out0, out1 = model(t0), model(t1)
+        >>>
+        >>> # calculate loss
+        >>> loss = loss_fn(out0, out1)
+
+    """
+
+    def __init__(
+        self,
+        loss_dim: int = 64,
+        temperature: float = 0.5,
+        memory_bank_size: Union[int, Sequence[int]] = 0,
+        gather_distributed: bool = False,
+    ):
+        """Initializes the DirectCLRLoss module with the specified parameters.
+
+        Args:
+            loss_dim:
+                Computes the loss only on the first `loss_dim` values of the encoding.
+            temperature:
+                 Scale logits by the inverse of the temperature.
+            memory_bank_size:
+                 Size of the memory bank.
+            gather_distributed:
+                 If True, negatives from all GPUs are gathered before the loss calculation.
+        """
+        super().__init__(
+            temperature=temperature,
+            memory_bank_size=memory_bank_size,
+            gather_distributed=gather_distributed,
+        )
+        self.loss_dim = loss_dim
+
+    def forward(self, out0: Tensor, out1: Tensor) -> Tensor:
+        """Forward pass through DirectCLR Loss.
+
+        To be used directly on the encoding without projection head. Flattens
+        each output encoding and truncates it to `loss_dim` length, then computes
+        the NTXentLoss.
+
+        Args:
+            out0:
+                Output projections of the first set of transformed images.
+                Shape: (batch_size, embedding_size)
+            out1:
+                Output projections of the second set of transformed images.
+                Shape: (batch_size, embedding_size)
+
+        Returns:
+            DirectCLR Loss value.
+        """
+
+        out0 = out0.flatten(start_dim=1)[:, : self.loss_dim]
+        out1 = out1.flatten(start_dim=1)[:, : self.loss_dim]
+
+        loss: Tensor = super().forward(out0, out1)
+
+        return loss
diff --git a/tests/loss/test_directclr_loss.py b/tests/loss/test_directclr_loss.py