pulp-platform
diff --git a/‎.github/workflows/BuildDocker.yml‎
Lines changed: 28 additions & 0 deletions b/‎.github/workflows/BuildDocker.yml‎
Lines changed: 28 additions & 0 deletions
diff --git a/‎.github/workflows/CI.yml‎
Lines changed: 60 additions & 0 deletions b/‎.github/workflows/CI.yml‎
Lines changed: 60 additions & 0 deletions
diff --git a/‎.github/workflows/Linting.yml‎
Lines changed: 20 additions & 0 deletions b/‎.github/workflows/Linting.yml‎
Lines changed: 20 additions & 0 deletions
diff --git a/‎.gitignore‎
Lines changed: 8 additions & 0 deletions b/‎.gitignore‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎Container/Dockerfile‎
Lines changed: 17 additions & 0 deletions b/‎Container/Dockerfile‎
Lines changed: 17 additions & 0 deletions
diff --git a/‎…DeepQuant/custom_forwards/activations.py‎ ‎DeepQuant/CustomForwards/Activations.py‎src/DeepQuant/custom_forwards/activations.py renamed to DeepQuant/CustomForwards/Activations.py
Lines changed: 12 additions & 15 deletions b/‎…DeepQuant/custom_forwards/activations.py‎ ‎DeepQuant/CustomForwards/Activations.py‎src/DeepQuant/custom_forwards/activations.py renamed to DeepQuant/CustomForwards/Activations.py
Lines changed: 12 additions & 15 deletions
diff --git a/‎src/DeepQuant/custom_forwards/linear.py‎ ‎DeepQuant/CustomForwards/Linear.py‎src/DeepQuant/custom_forwards/linear.py renamed to DeepQuant/CustomForwards/Linear.py
Lines changed: 17 additions & 21 deletions b/‎src/DeepQuant/custom_forwards/linear.py‎ ‎DeepQuant/CustomForwards/Linear.py‎src/DeepQuant/custom_forwards/linear.py renamed to DeepQuant/CustomForwards/Linear.py
Lines changed: 17 additions & 21 deletions
diff --git a/‎…nt/custom_forwards/multiheadattention.py‎ ‎…ant/CustomForwards/MultiHeadAttention.py‎src/DeepQuant/custom_forwards/multiheadattention.py renamed to DeepQuant/CustomForwards/MultiHeadAttention.py
Lines changed: 29 additions & 32 deletions b/‎…nt/custom_forwards/multiheadattention.py‎ ‎…ant/CustomForwards/MultiHeadAttention.py‎src/DeepQuant/custom_forwards/multiheadattention.py renamed to DeepQuant/CustomForwards/MultiHeadAttention.py
Lines changed: 29 additions & 32 deletions
@@ -0,0 +1,28 @@
+name: BuildDocker
+
+on:
+  workflow_dispatch:
+
+jobs:
+  build-docker:
+    permissions: write-all
+    name: Deploy Docker image
+    runs-on: ubuntu-22.04
+    steps:
+      # Actually build the Docker container
+      - uses: actions/checkout@v3
+      - uses: docker/setup-buildx-action@v1
+      - name: GHCR Log-in
+        uses: docker/login-action@v1
+        with:
+          registry: ghcr.io
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+      - name: Build and push
+        uses: docker/build-push-action@v2
+        with:
+          context: .
+          file: Container/Dockerfile
+          push: true
+          # JUNGVI: If you operate from a fork and want to build a new docker make sure to replace 'pulp-platform' by your uname. 
+          tags: ghcr.io/pulp-platform/deepquant:main 
@@ -0,0 +1,60 @@
+name: CI
+
+on: 
+  # push:
+  # pull_request:
+  workflow_dispatch:
+
+jobs:
+
+  build-deepquant:
+    runs-on: ubuntu-latest
+    container:
+      image: ghcr.io/pulp-platform/deepquant:main
+    steps:
+      - name: Checkout Repo
+        uses: actions/checkout@v4
+        with:
+          submodules: recursive
+      - name: Build DeepQuant
+        run: |
+          python -m pip install --upgrade pip
+          pip install -e .
+
+  single-layer-tests:
+    runs-on: ubuntu-latest
+    container:
+      image: ghcr.io/pulp-platform/deepquant:main
+    steps:
+      - name: Checkout Repo
+        uses: actions/checkout@v4
+        with:
+          submodules: recursive
+      - name: Build DeepQuant
+        run: |
+          python -m pip install --upgrade pip
+          pip install -e .
+      - name: Run Tests
+        run: |
+          pytest -m SingleLayerTests
+
+  model-tests:
+    runs-on: ubuntu-latest
+    # container:
+    #   image: ghcr.io/pulp-platform/deepquant:main
+    steps:
+      - name: Checkout Repo
+        uses: actions/checkout@v4
+        with:
+          submodules: recursive
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.11'
+      - name: Build DeepQuant
+        run: |
+          python -m pip install --upgrade pip
+          pip install -e .
+      - name: Run Tests
+        run: |
+          pytest -m ModelTests
@@ -0,0 +1,20 @@
+name: Linting
+
+on: 
+  push:
+  pull_request:
+  workflow_dispatch:
+
+jobs:
+
+  Linting:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout Repo
+        uses: actions/checkout@v4
+        with:
+          submodules: recursive
+      - name: Format Python Licenses
+        run: |
+            grep -Lr "SPDX-License-Identifier: Apache-2.0" --exclude-dir=".git" . | grep ".*\.py$" || [[ $? == 1 ]]
+        shell: bash
@@ -21,3 +21,11 @@ dist/
 .DS_Store
 .idea/
 .vscode/
+
+*.gz
+*-ubyte
+*.pth
+*.onnx
+*.npz
+onnx/*
+Dataset/*
@@ -0,0 +1,17 @@
+FROM python:3.11-slim
+
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    build-essential \
+    git \
+    curl \
+    ca-certificates \
+    && rm -rf /var/lib/apt/lists/*
+
+WORKDIR /app
+
+COPY pyproject.toml .
+
+RUN pip install --upgrade pip setuptools wheel && \
+    pip install toml-to-requirements && \
+    toml-to-req --toml-file pyproject.toml && \
+    pip install -r requirements.txt
@@ -4,11 +4,7 @@
 #
 # Federico Brancasi <[email protected]>
 
-"""
-Custom forward implementations for Brevitas QuantActivation layers.
-"""
 
-import torch
 import torch.nn as nn
 from torch import Tensor
 from brevitas.nn.quant_layer import QuantNonLinearActLayer
@@ -22,15 +18,15 @@ class InnerForwardImplWrapperActivation(nn.Module):
     so that FX tracing can display it as a separate node.
     """
 
-    def __init__(self, act_impl: nn.Module) -> None:
+    def __init__(self, actImpl: nn.Module) -> None:
         """
         Args:
             act_impl: The original activation function module (e.g. an instance of nn.ReLU).
         """
         super().__init__()
-        self.act_impl = act_impl
+        self.actImpl = actImpl
 
-    def forward(self, quant_input: Tensor) -> Tensor:
+    def forward(self, quantInput: Tensor) -> Tensor:
         """
         Applies the wrapped activation function.
 
@@ -40,10 +36,10 @@ def forward(self, quant_input: Tensor) -> Tensor:
         Returns:
             Output tensor after applying the activation.
         """
-        return self.act_impl(quant_input)
+        return self.actImpl(quantInput)
 
 
-def quant_activation_forward(self: QuantNonLinearActLayer, inp: Tensor) -> Tensor:
+def quantActivationForward(self: QuantNonLinearActLayer, inp: Tensor) -> Tensor:
     """
     Unrolled forward pass for a Brevitas QuantActivation layer.
 
@@ -59,11 +55,12 @@ def quant_activation_forward(self: QuantNonLinearActLayer, inp: Tensor) -> Tenso
     Returns:
         Output tensor after applying activation and output quantization.
     """
-    quant_input = self.input_quant(inp) if self.input_quant is not None else inp
+    quantInput = self.input_quant(inp) if self.input_quant is not None else inp
     # Use the wrapped activation if available; otherwise pass through.
-    if hasattr(self, "wrapped_act_impl"):
-        output = self.wrapped_act_impl(quant_input)
+    if hasattr(self, "wrappedActImpl"):
+        output = self.wrappedActImpl(quantInput)
     else:
-        output = quant_input
-    quant_output = self.act_quant(output) if self.act_quant is not None else output
-    return quant_output
+        output = quantInput
+        import IPython; IPython.embed()
+    quantOutput = self.act_quant(output) if self.act_quant is not None else output
+    return quantOutput
@@ -4,39 +4,35 @@
 #
 # Federico Brancasi <[email protected]>
 
-"""
-Custom forward implementations for Brevitas QuantLinear layers.
-"""
 
-import torch
 import torch.nn as nn
 from torch import Tensor
 from brevitas.nn.quant_layer import QuantWeightBiasInputOutputLayer
 
 
 class InnerForwardImplWrapperLinear(nn.Module):
     """
-    A small wrapper around the 'inner_forward_impl' of a Brevitas QuantLinear
+    A small wrapper around the 'innerForwardImpl' of a Brevitas QuantLinear
     (QuantWeightBiasInputOutputLayer).
 
-    We want to expose the logic within 'inner_forward_impl' as a standalone
+    We want to expose the logic within 'innerForwardImpl' as a standalone
     submodule, so that FX tracing can see it as a leaf.
     """
 
-    def __init__(self, inner_forward_impl: nn.Module) -> None:
+    def __init__(self, innerForwardImpl: nn.Module) -> None:
         """
         Args:
-            inner_forward_impl: The original function that processes
+            innerForwardImpl: The original function that processes
                                 (quant_input, quant_weight, quant_bias).
         """
         super().__init__()
-        self.inner_forward_impl = inner_forward_impl
+        self.innerForwardImpl = innerForwardImpl
 
     def forward(
-        self, quant_input: Tensor, quant_weight: Tensor, quant_bias: Tensor
+        self, quantInput: Tensor, quantWeight: Tensor, quantBias: Tensor
     ) -> Tensor:
         """
-        Applies the wrapped inner_forward_impl.
+        Applies the wrapped innerForwardImpl.
 
         Args:
             quant_input: Input after input_quant.
@@ -46,18 +42,18 @@ def forward(
         Returns:
             A torch.Tensor with the linear operation applied.
         """
-        return self.inner_forward_impl(quant_input, quant_weight, quant_bias)
+        return self.innerForwardImpl(quantInput, quantWeight, quantBias)
 
 
-def quantWBIOL_forward(self: QuantWeightBiasInputOutputLayer, inp: Tensor) -> Tensor:
+def quantWBIOLForward(self: QuantWeightBiasInputOutputLayer, inp: Tensor) -> Tensor:
     """
     Unrolled forward pass for a Brevitas QuantLinear:
 
     Steps:
       1) self.input_quant
       2) self.weight_quant
       3) self.bias_quant (if bias is present)
-      4) inner_forward_impl (wrapped)
+      4) innerForwardImpl (wrapped)
       5) self.output_quant
 
     Args:
@@ -67,13 +63,13 @@ def quantWBIOL_forward(self: QuantWeightBiasInputOutputLayer, inp: Tensor) -> Te
     Returns:
         Output Tensor after the unrolled quantized linear steps.
     """
-    quant_input = self.input_quant(inp)
-    quant_weight = self.weight_quant(self.weight)
+    quantInput = self.input_quant(inp)
+    quantWeight = self.weight_quant(self.weight)
 
-    quant_bias = None
+    quantBias = None
     if self.bias is not None:
-        quant_bias = self.bias_quant(self.bias, quant_input, quant_weight)
+        quantBias = self.bias_quant(self.bias, quantInput, quantWeight)
 
-    output = self.wrapped_inner_forward_impl(quant_input, quant_weight, quant_bias)
-    quant_output = self.output_quant(output)
-    return quant_output
+    output = self.wrappedInnerForwardImpl(quantInput, quantWeight, quantBias)
+    quantOutput = self.output_quant(output)
+    return quantOutput
@@ -4,9 +4,6 @@
 #
 # Federico Brancasi <[email protected]>
 
-"""
-Custom forward implementation for Brevitas QuantMultiheadAttention.
-"""
 
 import math
 import torch
@@ -15,7 +12,7 @@
 from brevitas.nn.quant_mha import QuantMultiheadAttention
 
 
-def unrolled_quant_mha_forward(
+def unrolledQuantMhaForward(
     self: QuantMultiheadAttention, query: Tensor, key: Tensor, value: Tensor
 ) -> Tensor:
     """
@@ -39,52 +36,52 @@ def unrolled_quant_mha_forward(
         after the unrolled MHA steps.
     """
     # 1) Q, K, V projections
-    q_out = self.q_proj(query)
-    k_out = self.k_proj(key)
-    v_out = self.v_proj(value)
+    qOut = self.q_proj(query)
+    kOut = self.k_proj(key)
+    vOut = self.v_proj(value)
 
     # 2) Multi-head reshape
-    seq_len, batch_size, embed_dim = q_out.shape
-    head_dim = embed_dim // self.num_heads
+    seqLen, batchSize, embedDim = qOut.shape
+    headDim = embedDim // self.num_heads
 
-    q_out = (
-        q_out.view(seq_len, batch_size, self.num_heads, head_dim)
+    qOut = (
+        qOut.view(seqLen, batchSize, self.num_heads, headDim)
         .permute(1, 2, 0, 3)
-        .reshape(batch_size * self.num_heads, seq_len, head_dim)
+        .reshape(batchSize * self.num_heads, seqLen, headDim)
     )
-    k_out = (
-        k_out.view(seq_len, batch_size, self.num_heads, head_dim)
+    kOut = (
+        kOut.view(seqLen, batchSize, self.num_heads, headDim)
         .permute(1, 2, 0, 3)
-        .reshape(batch_size * self.num_heads, seq_len, head_dim)
+        .reshape(batchSize * self.num_heads, seqLen, headDim)
     )
-    v_out = (
-        v_out.view(seq_len, batch_size, self.num_heads, head_dim)
+    vOut = (
+        vOut.view(seqLen, batchSize, self.num_heads, headDim)
         .permute(1, 2, 0, 3)
-        .reshape(batch_size * self.num_heads, seq_len, head_dim)
+        .reshape(batchSize * self.num_heads, seqLen, headDim)
     )
 
     # 3) Scale queries, then quantize
-    q_scaled = q_out / math.sqrt(head_dim)
-    q_scaled = self.q_scaled_quant(q_scaled)
+    qScaled = qOut / math.sqrt(headDim)
+    qScaled = self.q_scaled_quant(qScaled)
 
     # 4) Transpose + quantize K, compute attention weights
-    k_t = k_out.transpose(-2, -1)
+    k_t = kOut.transpose(-2, -1)
     k_t = self.k_transposed_quant(k_t)
 
-    attn_weights = torch.bmm(q_scaled, k_t)
-    attn_weights = self.softmax_input_quant(attn_weights)
-    attn_weights = F.softmax(attn_weights, dim=-1)
-    attn_weights = self.attn_output_weights_quant(attn_weights)
+    attnWeights = torch.bmm(qScaled, k_t)
+    attnWeights = self.softmax_input_quant(attnWeights)
+    attnWeights = F.softmax(attnWeights, dim=-1)
+    attnWeights = self.attn_output_weights_quant(attnWeights)
 
     # 5) Quantize V, multiply, reshape back, and final out projection
-    v_out = self.v_quant(v_out)
-    attn_output = torch.bmm(attn_weights, v_out)
+    vOut = self.v_quant(vOut)
+    attnOutput = torch.bmm(attnWeights, vOut)
 
-    attn_output = (
-        attn_output.view(batch_size, self.num_heads, seq_len, head_dim)
+    attnOutput = (
+        attnOutput.view(batchSize, self.num_heads, seqLen, headDim)
         .permute(2, 0, 1, 3)
-        .reshape(seq_len, batch_size, embed_dim)
+        .reshape(seqLen, batchSize, embedDim)
     )
 
-    attn_output = self.out_proj(attn_output)
-    return attn_output
+    attnOutput = self.out_proj(attnOutput)
+    return attnOutput