diff --git a/automation/notebooks-table-data.csv b/automation/notebooks-table-data.csv index cd83274..f81e482 100644 --- a/automation/notebooks-table-data.csv +++ b/automation/notebooks-table-data.csv @@ -16,3 +16,4 @@ Multi-Head Attention, modules/multihead-self-attention.ipynb,,https://arxiv.org/ ResNet,architectures/resnet.ipynb,,https://arxiv.org/abs/1512.03385 Convolutional Block Attention, modules/convolutional-block-attention.ipynb,,https://arxiv.org/abs/1807.06521 Transformer, architectures/transformer.ipynb,,https://arxiv.org/abs/1706.03762 +ConvMixer,architectures/convmixer.ipynb,,https://arxiv.org/pdf/2201.09792 \ No newline at end of file diff --git a/notebooks/architectures/convmixer.ipynb b/notebooks/architectures/convmixer.ipynb new file mode 100644 index 0000000..cf523c4 --- /dev/null +++ b/notebooks/architectures/convmixer.ipynb @@ -0,0 +1,216 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "IJveajFZvdXK" + }, + "source": [ + "## ConvMixer" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "q5AuzFB2tA12" + }, + "outputs": [], + "source": [ + "#@title **Install required packages**\n", + "\n", + "%%capture\n", + "! pip install torchinfo" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "118P89a1osHb" + }, + "outputs": [], + "source": [ + "#@title **Importing libraries**\n", + "import torch # 2.5.1+cu121\n", + "import torch.nn as nn\n", + "import torchinfo #1.8.0" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "QchOSIbro1zf", + "outputId": "94003f5a-622f-4f54-e916-26fbbff86c17" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "torch version: 2.5.1+cu121\n", + "torchinfo version: 1.8.0\n" + ] + } + ], + "source": [ + "# Note: Not all dependencies have the __version__ method.\n", + "print(f\"torch version: {torch.__version__}\")\n", + "print(f\"torchinfo version: {torchinfo.__version__}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "hrG5QYB4pRMu" + }, + "source": [ + "**ConvMixer architecture code**\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "r2d2e2P0pdan" + }, + "outputs": [], + "source": [ + "class Residual(nn.Module):\n", + " def __init__(self, fn):\n", + " super().__init__()\n", + " self.fn = fn\n", + "\n", + " def forward(self, x):\n", + " return self.fn(x) + x\n", + "\n", + "def ConvMixer(dim, depth, kernel_size = 9, patch_size = 7, n_classes = 1000):\n", + " return nn.Sequential(\n", + " nn.Conv2d(3, dim, kernel_size = patch_size, stride = patch_size),\n", + " nn.GELU(),\n", + " nn.BatchNorm2d(dim),\n", + " *[nn.Sequential(\n", + " Residual(nn.Sequential(\n", + " nn.Conv2d(dim, dim, kernel_size, groups=dim, padding=\"same\"),\n", + " nn.GELU(),\n", + " nn.BatchNorm2d(dim)\n", + " )),\n", + " nn.Conv2d(dim,dim, kernel_size = 1),\n", + " nn.GELU(),\n", + " nn.BatchNorm2d(dim)\n", + " )for i in range(depth)],\n", + " nn.AdaptiveAvgPool2d((1,1)),\n", + " nn.Flatten(),\n", + " nn.Linear(dim, n_classes)\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "9YM5zfNoslTx", + "outputId": "1b5a1295-d3ac-49c9-d61d-ae986021ad00" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "=================================================================\n", + "Layer (type:depth-idx) Param #\n", + "=================================================================\n", + "Sequential --\n", + "├─Conv2d: 1-1 8,192\n", + "├─GELU: 1-2 --\n", + "├─BatchNorm2d: 1-3 4,096\n", + "├─Sequential: 1-4 --\n", + "│ └─Residual: 2-1 --\n", + "│ │ └─Sequential: 3-1 172,032\n", + "│ └─Conv2d: 2-2 4,196,352\n", + "│ └─GELU: 2-3 --\n", + "│ └─BatchNorm2d: 2-4 4,096\n", + "├─Sequential: 1-5 --\n", + "│ └─Residual: 2-5 --\n", + "│ │ └─Sequential: 3-2 172,032\n", + "│ └─Conv2d: 2-6 4,196,352\n", + "│ └─GELU: 2-7 --\n", + "│ └─BatchNorm2d: 2-8 4,096\n", + "├─Sequential: 1-6 --\n", + "│ └─Residual: 2-9 --\n", + "│ │ └─Sequential: 3-3 172,032\n", + "│ └─Conv2d: 2-10 4,196,352\n", + "│ └─GELU: 2-11 --\n", + "│ └─BatchNorm2d: 2-12 4,096\n", + "├─Sequential: 1-7 --\n", + "│ └─Residual: 2-13 --\n", + "│ │ └─Sequential: 3-4 172,032\n", + "│ └─Conv2d: 2-14 4,196,352\n", + "│ └─GELU: 2-15 --\n", + "│ └─BatchNorm2d: 2-16 4,096\n", + "├─Sequential: 1-8 --\n", + "│ └─Residual: 2-17 --\n", + "│ │ └─Sequential: 3-5 172,032\n", + "│ └─Conv2d: 2-18 4,196,352\n", + "│ └─GELU: 2-19 --\n", + "│ └─BatchNorm2d: 2-20 4,096\n", + "├─Sequential: 1-9 --\n", + "│ └─Residual: 2-21 --\n", + "│ │ └─Sequential: 3-6 172,032\n", + "│ └─Conv2d: 2-22 4,196,352\n", + "│ └─GELU: 2-23 --\n", + "│ └─BatchNorm2d: 2-24 4,096\n", + "├─Sequential: 1-10 --\n", + "│ └─Residual: 2-25 --\n", + "│ │ └─Sequential: 3-7 172,032\n", + "│ └─Conv2d: 2-26 4,196,352\n", + "│ └─GELU: 2-27 --\n", + "│ └─BatchNorm2d: 2-28 4,096\n", + "├─Sequential: 1-11 --\n", + "│ └─Residual: 2-29 --\n", + "│ │ └─Sequential: 3-8 172,032\n", + "│ └─Conv2d: 2-30 4,196,352\n", + "│ └─GELU: 2-31 --\n", + "│ └─BatchNorm2d: 2-32 4,096\n", + "├─AdaptiveAvgPool2d: 1-12 --\n", + "├─Flatten: 1-13 --\n", + "├─Linear: 1-14 2,049,000\n", + "=================================================================\n", + "Total params: 37,041,128\n", + "Trainable params: 37,041,128\n", + "Non-trainable params: 0\n", + "=================================================================" + ] + }, + "execution_count": 41, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model = ConvMixer(2048, 8, kernel_size=9, patch_size=1, n_classes=1000)\n", + "torchinfo.summary(model)" + ] + } + ], + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +}