diff --git a/automation/notebooks-table-data.csv b/automation/notebooks-table-data.csv
index 506af50..bc819e9 100644
--- a/automation/notebooks-table-data.csv
+++ b/automation/notebooks-table-data.csv
@@ -13,4 +13,5 @@ MLP Mixer,architectures/mlp-mixer.ipynb,,https://arxiv.org/abs/2105.01601
 GloVe Word Embeddings, data_exploration/glove-word-embeddings.ipynb,https://github.com/stanfordnlp/GloVe,https://nlp.stanford.edu/pubs/glove.pdf
 Vision Transformer (ViT),architectures/vit.ipynb,,https://arxiv.org/pdf/2010.11929
 Multi-Head Attention, modules/multihead-self-attention.ipynb,,https://arxiv.org/abs/1706.03762
-ResNet,architectures/resnet.ipynb,,https://arxiv.org/abs/1512.03385
\ No newline at end of file
+ResNet,architectures/resnet.ipynb,,https://arxiv.org/abs/1512.03385
+DINO,architectures/dino.ipynb,,https://arxiv.org/abs/2104.14294
diff --git a/notebooks/architectures/DINO.ipynb b/notebooks/architectures/DINO.ipynb
new file mode 100644
index 0000000..0aaca3f
--- /dev/null
+++ b/notebooks/architectures/DINO.ipynb
@@ -0,0 +1,440 @@
+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "provenance": []
+    },
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
+    },
+    "language_info": {
+      "name": "python"
+    }
+  },
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "source": [
+        "<image src=\"https://raw.githubusercontent.com/semilleroCV/deep-learning-notes/main/assets/banner-notebook.png\" width=100%>"
+      ],
+      "metadata": {
+        "id": "Iy-fAH0K-iJC"
+      }
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "# <font color='#4C5FDA'> **DINO: Emerging Properties in Self-Supervised Vision Transformers** </font>"
+      ],
+      "metadata": {
+        "id": "hmK_SbYa_fSA"
+      }
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "The paper <font color=\"EB9A54\">“DINO: Emerging Properties in Self-Supervised Vision Transformers”</font> presents a novel approach to self-supervised learning using Vision Transformers (ViTs). In simple terms, the goal of the paper is to demonstrate how a model can learn useful representations (attention maps) of images without the need for labels, through a distillation technique."
+      ],
+      "metadata": {
+        "id": "25swm6eH-srf"
+      }
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "<font color=\"EB9A54\">**why is DINO relevant?**</font>\n",
+        "\n",
+        " - Self-supervision: The DINO method avoids reliance on large amounts of labeled data, which is useful in scenarios where labeling data is costly or complicated.\n",
+        "\n",
+        " - Vision Transformers: It uses ViTs, a powerful architecture for computer vision tasks, showing that these networks can be effectively trained unsupervised.\n",
+        "\n",
+        " - Emergent Properties: The model trained with DINO learns to capture high-level spatial structures and relationships in images. Surprisingly, it produces highly interpretable attention maps and accurate object locations without being explicitly trained to do so.\n",
+        "\n",
+        "\n"
+      ],
+      "metadata": {
+        "id": "cM7jkpbs_buN"
+      }
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "<image src=\"https://i.ibb.co/JymZwqy/Captura-desde-2024-11-29-00-38-53.png\" >"
+      ],
+      "metadata": {
+        "id": "1g-cR112_8Ff"
+      }
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "WJB9P2mT94Jy"
+      },
+      "outputs": [],
+      "source": [
+        "%%capture\n",
+        "#@title **Install required packages**\n",
+        "\n",
+        "!pip install torchinfo"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "#@title **Importing libraries**\n",
+        "\n",
+        "from torchsummary import summary\n",
+        "import torchinfo\n",
+        "import torch\n",
+        "import torch.nn as nn\n",
+        "import torch.nn.functional as F\n",
+        "import torchvision.transforms as transforms\n",
+        "from torchvision.models import vit_b_16\n",
+        "from sklearn.neighbors import KNeighborsClassifier\n",
+        "from sklearn.linear_model import LogisticRegression\n",
+        "from sklearn.preprocessing import StandardScaler\n",
+        "import numpy as np"
+      ],
+      "metadata": {
+        "id": "GWMebzLuAAuS"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Note: Not all dependencies have the __version__ method.\n",
+        "\n",
+        "print(torch.__version__)"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "9rCTVIL4ACpD",
+        "outputId": "630e8f90-47ec-4e70-8173-aa394cd98d24"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "2.5.1+cu121\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "### ViT-Small/16 architecture code"
+      ],
+      "metadata": {
+        "id": "48zU4_8sAJRF"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "class DINOHead(nn.Module):\n",
+        "    \"\"\"\n",
+        "    DINO projection head for self-supervised learning\n",
+        "    \"\"\"\n",
+        "    def __init__(self, in_dim, out_dim, use_bn=True, norm_last_layer=True, nlayers=3, hidden_dim=2048):\n",
+        "        super().__init__()\n",
+        "        nlayers = max(nlayers, 1)\n",
+        "        if nlayers == 1:\n",
+        "            self.mlp = nn.Linear(in_dim, out_dim)\n",
+        "        else:\n",
+        "            layers = [nn.Linear(in_dim, hidden_dim)]\n",
+        "            if use_bn:\n",
+        "                layers.append(nn.BatchNorm1d(hidden_dim))\n",
+        "            layers.append(nn.GELU())\n",
+        "\n",
+        "            for _ in range(nlayers - 2):\n",
+        "                layers.append(nn.Linear(hidden_dim, hidden_dim))\n",
+        "                if use_bn:\n",
+        "                    layers.append(nn.BatchNorm1d(hidden_dim))\n",
+        "                layers.append(nn.GELU())\n",
+        "\n",
+        "            layers.append(nn.Linear(hidden_dim, out_dim))\n",
+        "            if norm_last_layer:\n",
+        "                layers.append(nn.BatchNorm1d(out_dim, affine=False))\n",
+        "\n",
+        "            self.mlp = nn.Sequential(*layers)\n",
+        "\n",
+        "    def forward(self, x):\n",
+        "        return self.mlp(x)\n",
+        "\n",
+        "\n",
+        "class DINO(nn.Module):\n",
+        "    \"\"\"\n",
+        "    DINO model with ViT-Small/16 as backbone\n",
+        "    \"\"\"\n",
+        "    def __init__(self,\n",
+        "                 num_classes=1000,\n",
+        "                 out_dim=65536,\n",
+        "                 use_bn_in_head=False,\n",
+        "                 norm_last_layer=True,\n",
+        "                 momentum=0.999,\n",
+        "                 temperature_student=0.1,\n",
+        "                 temperature_teacher=0.1,\n",
+        "                 center_momentum=0.9):\n",
+        "        super().__init__()\n",
+        "\n",
+        "        # Load pre-trained ViT Small 16 backbone\n",
+        "        self.backbone = vit_b_16(pretrained=True)\n",
+        "\n",
+        "        # Remove the classification head\n",
+        "        self.backbone.heads = nn.Identity()\n",
+        "\n",
+        "        # Feature dimension of ViT-Small/16\n",
+        "        feature_dim = 384\n",
+        "\n",
+        "        # Create student and teacher heads\n",
+        "        self.student_head = DINOHead(\n",
+        "            feature_dim,\n",
+        "            out_dim,\n",
+        "            use_bn=use_bn_in_head,\n",
+        "            norm_last_layer=norm_last_layer\n",
+        "        )\n",
+        "\n",
+        "        self.teacher_head = DINOHead(\n",
+        "            feature_dim,\n",
+        "            out_dim,\n",
+        "            use_bn=use_bn_in_head,\n",
+        "            norm_last_layer=False\n",
+        "        )\n",
+        "\n",
+        "        # Freeze teacher head parameters\n",
+        "        for param in self.teacher_head.parameters():\n",
+        "            param.requires_grad = False\n",
+        "\n",
+        "        # Define the momentum parameter for EMA update\n",
+        "        self.momentum = momentum\n",
+        "\n",
+        "        # Initialize center (C)\n",
+        "        self.center = nn.Parameter(torch.zeros(out_dim), requires_grad=False)\n",
+        "\n",
+        "        # Temperatures\n",
+        "        self.temperature_student = temperature_student\n",
+        "        self.temperature_teacher = temperature_teacher\n",
+        "\n",
+        "        # Center momentum (for EMA update of the center)\n",
+        "        self.center_momentum = center_momentum\n",
+        "\n",
+        "    def update_teacher(self):\n",
+        "        \"\"\"\n",
+        "        Update teacher model with EMA (Exponential Moving Average)\n",
+        "        \"\"\"\n",
+        "        with torch.no_grad():\n",
+        "            for student_params, teacher_params in zip(self.student_head.parameters(), self.teacher_head.parameters()):\n",
+        "                teacher_params.data = self.momentum * teacher_params.data + (1. - self.momentum) * student_params.data\n",
+        "\n",
+        "    def forward(self, x1, x2):\n",
+        "        \"\"\"\n",
+        "        Forward pass with two augmented views of the same image\n",
+        "        \"\"\"\n",
+        "        # Extract features from both augmented views\n",
+        "        z1 = self.backbone(x1)\n",
+        "        z2 = self.backbone(x2)\n",
+        "\n",
+        "        # Project features through student head\n",
+        "        p1 = self.student_head(z1)\n",
+        "        p2 = self.student_head(z2)\n",
+        "\n",
+        "        # Detach teacher projections (do not compute gradients for teacher)\n",
+        "        with torch.no_grad():\n",
+        "            t1 = F.normalize(self.teacher_head(self.backbone(x1)), dim=-1)\n",
+        "            t2 = F.normalize(self.teacher_head(self.backbone(x2)), dim=-1)\n",
+        "\n",
+        "        return p1, p2, t1, t2\n",
+        "\n",
+        "    def update_center(self, t1, t2):\n",
+        "        \"\"\"\n",
+        "        Update the center of the representations using EMA (Exponential Moving Average)\n",
+        "        \"\"\"\n",
+        "        with torch.no_grad():\n",
+        "            # Concatenate teacher outputs and compute the mean\n",
+        "            center_update = torch.cat([t1, t2]).mean(dim=0)\n",
+        "            self.center.data = self.center_momentum * self.center.data + (1. - self.center_momentum) * center_update\n",
+        "\n",
+        "\n",
+        "\n",
+        "def dino_loss(student_output, teacher_output, temperature=0.1):\n",
+        "    \"\"\"\n",
+        "    DINO loss function (cross-entropy between student and teacher outputs)\n",
+        "    \"\"\"\n",
+        "    # Apply log softmax to student output and softmax to teacher output\n",
+        "    student_output = F.log_softmax(student_output / temperature, dim=-1)\n",
+        "    teacher_output = F.softmax(teacher_output / temperature, dim=-1)\n",
+        "\n",
+        "    # Compute the DINO loss (cross-entropy)\n",
+        "    loss = torch.sum(-teacher_output * student_output, dim=-1).mean()\n",
+        "    return loss\n",
+        "\n",
+        "\n",
+        "def prepare_dino_transforms():\n",
+        "    \"\"\"\n",
+        "    Prepare data augmentations for DINO\n",
+        "    \"\"\"\n",
+        "    train_transform = transforms.Compose([\n",
+        "        transforms.RandomResizedCrop(224),\n",
+        "        transforms.RandomHorizontalFlip(),\n",
+        "        transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.2),\n",
+        "        transforms.RandomGrayscale(p=0.2),\n",
+        "        transforms.GaussianBlur(kernel_size=5, sigma=(0.1, 2.0)),\n",
+        "        transforms.ToTensor(),\n",
+        "        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])\n",
+        "    ])\n",
+        "    return train_transform\n",
+        "\n",
+        "\n",
+        "def train_dino(model, optimizer, train_loader, device):\n",
+        "    \"\"\"\n",
+        "    Training loop for DINO\n",
+        "    \"\"\"\n",
+        "    model.train()\n",
+        "    for images, _ in train_loader:\n",
+        "        # Get two augmentations of each image\n",
+        "        x1, x2 = images.to(device), images.to(device)\n",
+        "\n",
+        "        # Forward pass\n",
+        "        p1, p2, t1, t2 = model(x1, x2)\n",
+        "\n",
+        "        # Compute loss\n",
+        "        loss1 = dino_loss(p1, t2)\n",
+        "        loss2 = dino_loss(p2, t1)\n",
+        "        loss = (loss1 + loss2) / 2\n",
+        "\n",
+        "        # Backpropagate\n",
+        "        optimizer.zero_grad()\n",
+        "        loss.backward()\n",
+        "        optimizer.step()\n",
+        "\n",
+        "        # Update teacher network (EMA update)\n",
+        "        model.update_teacher()\n",
+        "\n",
+        "        # Optional: print loss for monitoring\n",
+        "        print(f\"Loss: {loss.item()}\")\n",
+        "\n",
+        "\n",
+        "def extract_features(model, dataloader, device):\n",
+        "    \"\"\"\n",
+        "    Extract features using the DINO backbone\n",
+        "    \"\"\"\n",
+        "    model.eval()\n",
+        "    all_features = []\n",
+        "    all_labels = []\n",
+        "\n",
+        "    with torch.no_grad():\n",
+        "        for images, labels in dataloader:\n",
+        "            images = images.to(device)\n",
+        "            features = model.backbone(images)\n",
+        "            all_features.append(features.cpu().numpy())\n",
+        "            all_labels.append(labels.numpy())\n",
+        "\n",
+        "    return np.concatenate(all_features), np.concatenate(all_labels)\n",
+        "\n",
+        "\n",
+        "def knn_evaluation(train_features, train_labels, test_features, test_labels, k=5):\n",
+        "    \"\"\"\n",
+        "    K-Nearest Neighbors evaluation\n",
+        "    \"\"\"\n",
+        "    scaler = StandardScaler()\n",
+        "    train_features_scaled = scaler.fit_transform(train_features)\n",
+        "    test_features_scaled = scaler.transform(test_features)\n",
+        "\n",
+        "    knn = KNeighborsClassifier(n_neighbors=k)\n",
+        "    knn.fit(train_features_scaled, train_labels)\n",
+        "\n",
+        "    predictions = knn.predict(test_features_scaled)\n",
+        "    accuracy = np.mean(predictions == test_labels)\n",
+        "\n",
+        "    return accuracy\n",
+        "\n",
+        "\n",
+        "def linear_classifier_evaluation(train_features, train_labels, test_features, test_labels):\n",
+        "    \"\"\"\n",
+        "    Linear Classifier (Logistic Regression) evaluation\n",
+        "    \"\"\"\n",
+        "    scaler = StandardScaler()\n",
+        "    train_features_scaled = scaler.fit_transform(train_features)\n",
+        "    test_features_scaled = scaler.transform(test_features)\n",
+        "\n",
+        "    linear_clf = LogisticRegression(max_iter=1000)\n",
+        "    linear_clf.fit(train_features_scaled, train_labels)\n",
+        "\n",
+        "    predictions = linear_clf.predict(test_features_scaled)\n",
+        "    accuracy = np.mean(predictions == test_labels)\n",
+        "\n",
+        "    return accuracy\n",
+        "\n",
+        "\n",
+        "def evaluate_representations(model, train_loader, test_loader, device, knn_k=5):\n",
+        "    \"\"\"\n",
+        "    Comprehensive evaluation of learned representations\n",
+        "    \"\"\"\n",
+        "    # Extract features\n",
+        "    train_features, train_labels = extract_features(model, train_loader, device)\n",
+        "    test_features, test_labels = extract_features(model, test_loader, device)\n",
+        "\n",
+        "    # KNN Evaluation\n",
+        "    knn_accuracy = knn_evaluation(train_features, train_labels,\n",
+        "                                  test_features, test_labels, k=knn_k)\n",
+        "\n",
+        "    # Linear Classifier Evaluation\n",
+        "    linear_accuracy = linear_classifier_evaluation(train_features, train_labels,\n",
+        "                                                   test_features, test_labels)\n",
+        "\n",
+        "    return {\n",
+        "        'knn_accuracy': knn_accuracy,\n",
+        "        'linear_classifier_accuracy': linear_accuracy\n",
+        "    }\n",
+        "\n"
+      ],
+      "metadata": {
+        "id": "g5VuCgFuAKpV"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Example usage\n",
+        "if __name__ == '__main__':\n",
+        "    # Hyperparameters\n",
+        "    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n",
+        "    model = DINO().to(device)\n",
+        "    optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)\n",
+        "\n",
+        "    # Note: This is a skeleton. Real implementation would require\n",
+        "    # a complete dataset and proper data loaders\n",
+        "    print(\"DINO model with ViT-Small/16 backbone initialized!\")"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "bGo7Etl0Zn8T",
+        "outputId": "ab9e220a-a8bd-45f7-d8f0-4bf0f5753558"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "DINO model with ViT-Small/16 backbone initialized!\n"
+          ]
+        }
+      ]
+    }
+  ]
+}
\ No newline at end of file