Skip to content

Commit 0a0ea5b

Browse files
committed
- Fix several bugs:
- Move I/O to cpu before exporting to numpy and running ort inference - Legalize the torch module names - Add bias to QMHSA's linear - Add input and weights quantizers to QLinear and QConv - Add CI, License Linting, and DockerBuild Workflow - Add APACHE License - Add single layer and model tests - Add MobileNetV3 model test - Cleanup structure and setup naming convention - Update README
1 parent 723f529 commit 0a0ea5b

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

52 files changed

+977
-946
lines changed

.github/workflows/BuildDocker.yml

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
name: BuildDocker
2+
3+
on:
4+
workflow_dispatch:
5+
6+
jobs:
7+
build-docker:
8+
permissions: write-all
9+
name: Deploy Docker image
10+
runs-on: ubuntu-22.04
11+
steps:
12+
# Actually build the Docker container
13+
- uses: actions/checkout@v3
14+
- uses: docker/setup-buildx-action@v1
15+
- name: GHCR Log-in
16+
uses: docker/login-action@v1
17+
with:
18+
registry: ghcr.io
19+
username: ${{ github.actor }}
20+
password: ${{ secrets.GITHUB_TOKEN }}
21+
- name: Build and push
22+
uses: docker/build-push-action@v2
23+
with:
24+
context: .
25+
file: Container/Dockerfile
26+
push: true
27+
# JUNGVI: If you operate from a fork and want to build a new docker make sure to replace 'pulp-platform' by your uname.
28+
tags: ghcr.io/pulp-platform/deepquant:main

.github/workflows/CI.yml

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
name: CI
2+
3+
on:
4+
# push:
5+
# pull_request:
6+
workflow_dispatch:
7+
8+
jobs:
9+
10+
build-deepquant:
11+
runs-on: ubuntu-latest
12+
container:
13+
image: ghcr.io/pulp-platform/deepquant:main
14+
steps:
15+
- name: Checkout Repo
16+
uses: actions/checkout@v4
17+
with:
18+
submodules: recursive
19+
- name: Build DeepQuant
20+
run: |
21+
python -m pip install --upgrade pip
22+
pip install -e .
23+
24+
single-layer-tests:
25+
runs-on: ubuntu-latest
26+
container:
27+
image: ghcr.io/pulp-platform/deepquant:main
28+
steps:
29+
- name: Checkout Repo
30+
uses: actions/checkout@v4
31+
with:
32+
submodules: recursive
33+
- name: Build DeepQuant
34+
run: |
35+
python -m pip install --upgrade pip
36+
pip install -e .
37+
- name: Run Tests
38+
run: |
39+
pytest -m SingleLayerTests
40+
41+
model-tests:
42+
runs-on: ubuntu-latest
43+
# container:
44+
# image: ghcr.io/pulp-platform/deepquant:main
45+
steps:
46+
- name: Checkout Repo
47+
uses: actions/checkout@v4
48+
with:
49+
submodules: recursive
50+
- name: Set up Python
51+
uses: actions/setup-python@v5
52+
with:
53+
python-version: '3.11'
54+
- name: Build DeepQuant
55+
run: |
56+
python -m pip install --upgrade pip
57+
pip install -e .
58+
- name: Run Tests
59+
run: |
60+
pytest -m ModelTests

.github/workflows/Linting.yml

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
name: Linting
2+
3+
on:
4+
push:
5+
pull_request:
6+
workflow_dispatch:
7+
8+
jobs:
9+
10+
Linting:
11+
runs-on: ubuntu-latest
12+
steps:
13+
- name: Checkout Repo
14+
uses: actions/checkout@v4
15+
with:
16+
submodules: recursive
17+
- name: Format Python Licenses
18+
run: |
19+
grep -Lr "SPDX-License-Identifier: Apache-2.0" --exclude-dir=".git" . | grep ".*\.py$" || [[ $? == 1 ]]
20+
shell: bash

.gitignore

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,3 +21,11 @@ dist/
2121
.DS_Store
2222
.idea/
2323
.vscode/
24+
25+
*.gz
26+
*-ubyte
27+
*.pth
28+
*.onnx
29+
*.npz
30+
onnx/*
31+
Dataset/*

Container/Dockerfile

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
FROM python:3.11-slim
2+
3+
RUN apt-get update && apt-get install -y --no-install-recommends \
4+
build-essential \
5+
git \
6+
curl \
7+
ca-certificates \
8+
&& rm -rf /var/lib/apt/lists/*
9+
10+
WORKDIR /app
11+
12+
COPY pyproject.toml .
13+
14+
RUN pip install --upgrade pip setuptools wheel && \
15+
pip install toml-to-requirements && \
16+
toml-to-req --toml-file pyproject.toml && \
17+
pip install -r requirements.txt

src/DeepQuant/custom_forwards/activations.py renamed to DeepQuant/CustomForwards/Activations.py

Lines changed: 12 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,7 @@
44
#
55
# Federico Brancasi <[email protected]>
66

7-
"""
8-
Custom forward implementations for Brevitas QuantActivation layers.
9-
"""
107

11-
import torch
128
import torch.nn as nn
139
from torch import Tensor
1410
from brevitas.nn.quant_layer import QuantNonLinearActLayer
@@ -22,15 +18,15 @@ class InnerForwardImplWrapperActivation(nn.Module):
2218
so that FX tracing can display it as a separate node.
2319
"""
2420

25-
def __init__(self, act_impl: nn.Module) -> None:
21+
def __init__(self, actImpl: nn.Module) -> None:
2622
"""
2723
Args:
2824
act_impl: The original activation function module (e.g. an instance of nn.ReLU).
2925
"""
3026
super().__init__()
31-
self.act_impl = act_impl
27+
self.actImpl = actImpl
3228

33-
def forward(self, quant_input: Tensor) -> Tensor:
29+
def forward(self, quantInput: Tensor) -> Tensor:
3430
"""
3531
Applies the wrapped activation function.
3632
@@ -40,10 +36,10 @@ def forward(self, quant_input: Tensor) -> Tensor:
4036
Returns:
4137
Output tensor after applying the activation.
4238
"""
43-
return self.act_impl(quant_input)
39+
return self.actImpl(quantInput)
4440

4541

46-
def quant_activation_forward(self: QuantNonLinearActLayer, inp: Tensor) -> Tensor:
42+
def quantActivationForward(self: QuantNonLinearActLayer, inp: Tensor) -> Tensor:
4743
"""
4844
Unrolled forward pass for a Brevitas QuantActivation layer.
4945
@@ -59,11 +55,12 @@ def quant_activation_forward(self: QuantNonLinearActLayer, inp: Tensor) -> Tenso
5955
Returns:
6056
Output tensor after applying activation and output quantization.
6157
"""
62-
quant_input = self.input_quant(inp) if self.input_quant is not None else inp
58+
quantInput = self.input_quant(inp) if self.input_quant is not None else inp
6359
# Use the wrapped activation if available; otherwise pass through.
64-
if hasattr(self, "wrapped_act_impl"):
65-
output = self.wrapped_act_impl(quant_input)
60+
if hasattr(self, "wrappedActImpl"):
61+
output = self.wrappedActImpl(quantInput)
6662
else:
67-
output = quant_input
68-
quant_output = self.act_quant(output) if self.act_quant is not None else output
69-
return quant_output
63+
output = quantInput
64+
import IPython; IPython.embed()
65+
quantOutput = self.act_quant(output) if self.act_quant is not None else output
66+
return quantOutput
Lines changed: 17 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -4,39 +4,35 @@
44
#
55
# Federico Brancasi <[email protected]>
66

7-
"""
8-
Custom forward implementations for Brevitas QuantLinear layers.
9-
"""
107

11-
import torch
128
import torch.nn as nn
139
from torch import Tensor
1410
from brevitas.nn.quant_layer import QuantWeightBiasInputOutputLayer
1511

1612

1713
class InnerForwardImplWrapperLinear(nn.Module):
1814
"""
19-
A small wrapper around the 'inner_forward_impl' of a Brevitas QuantLinear
15+
A small wrapper around the 'innerForwardImpl' of a Brevitas QuantLinear
2016
(QuantWeightBiasInputOutputLayer).
2117
22-
We want to expose the logic within 'inner_forward_impl' as a standalone
18+
We want to expose the logic within 'innerForwardImpl' as a standalone
2319
submodule, so that FX tracing can see it as a leaf.
2420
"""
2521

26-
def __init__(self, inner_forward_impl: nn.Module) -> None:
22+
def __init__(self, innerForwardImpl: nn.Module) -> None:
2723
"""
2824
Args:
29-
inner_forward_impl: The original function that processes
25+
innerForwardImpl: The original function that processes
3026
(quant_input, quant_weight, quant_bias).
3127
"""
3228
super().__init__()
33-
self.inner_forward_impl = inner_forward_impl
29+
self.innerForwardImpl = innerForwardImpl
3430

3531
def forward(
36-
self, quant_input: Tensor, quant_weight: Tensor, quant_bias: Tensor
32+
self, quantInput: Tensor, quantWeight: Tensor, quantBias: Tensor
3733
) -> Tensor:
3834
"""
39-
Applies the wrapped inner_forward_impl.
35+
Applies the wrapped innerForwardImpl.
4036
4137
Args:
4238
quant_input: Input after input_quant.
@@ -46,18 +42,18 @@ def forward(
4642
Returns:
4743
A torch.Tensor with the linear operation applied.
4844
"""
49-
return self.inner_forward_impl(quant_input, quant_weight, quant_bias)
45+
return self.innerForwardImpl(quantInput, quantWeight, quantBias)
5046

5147

52-
def quantWBIOL_forward(self: QuantWeightBiasInputOutputLayer, inp: Tensor) -> Tensor:
48+
def quantWBIOLForward(self: QuantWeightBiasInputOutputLayer, inp: Tensor) -> Tensor:
5349
"""
5450
Unrolled forward pass for a Brevitas QuantLinear:
5551
5652
Steps:
5753
1) self.input_quant
5854
2) self.weight_quant
5955
3) self.bias_quant (if bias is present)
60-
4) inner_forward_impl (wrapped)
56+
4) innerForwardImpl (wrapped)
6157
5) self.output_quant
6258
6359
Args:
@@ -67,13 +63,13 @@ def quantWBIOL_forward(self: QuantWeightBiasInputOutputLayer, inp: Tensor) -> Te
6763
Returns:
6864
Output Tensor after the unrolled quantized linear steps.
6965
"""
70-
quant_input = self.input_quant(inp)
71-
quant_weight = self.weight_quant(self.weight)
66+
quantInput = self.input_quant(inp)
67+
quantWeight = self.weight_quant(self.weight)
7268

73-
quant_bias = None
69+
quantBias = None
7470
if self.bias is not None:
75-
quant_bias = self.bias_quant(self.bias, quant_input, quant_weight)
71+
quantBias = self.bias_quant(self.bias, quantInput, quantWeight)
7672

77-
output = self.wrapped_inner_forward_impl(quant_input, quant_weight, quant_bias)
78-
quant_output = self.output_quant(output)
79-
return quant_output
73+
output = self.wrappedInnerForwardImpl(quantInput, quantWeight, quantBias)
74+
quantOutput = self.output_quant(output)
75+
return quantOutput

src/DeepQuant/custom_forwards/multiheadattention.py renamed to DeepQuant/CustomForwards/MultiHeadAttention.py

Lines changed: 29 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,6 @@
44
#
55
# Federico Brancasi <[email protected]>
66

7-
"""
8-
Custom forward implementation for Brevitas QuantMultiheadAttention.
9-
"""
107

118
import math
129
import torch
@@ -15,7 +12,7 @@
1512
from brevitas.nn.quant_mha import QuantMultiheadAttention
1613

1714

18-
def unrolled_quant_mha_forward(
15+
def unrolledQuantMhaForward(
1916
self: QuantMultiheadAttention, query: Tensor, key: Tensor, value: Tensor
2017
) -> Tensor:
2118
"""
@@ -39,52 +36,52 @@ def unrolled_quant_mha_forward(
3936
after the unrolled MHA steps.
4037
"""
4138
# 1) Q, K, V projections
42-
q_out = self.q_proj(query)
43-
k_out = self.k_proj(key)
44-
v_out = self.v_proj(value)
39+
qOut = self.q_proj(query)
40+
kOut = self.k_proj(key)
41+
vOut = self.v_proj(value)
4542

4643
# 2) Multi-head reshape
47-
seq_len, batch_size, embed_dim = q_out.shape
48-
head_dim = embed_dim // self.num_heads
44+
seqLen, batchSize, embedDim = qOut.shape
45+
headDim = embedDim // self.num_heads
4946

50-
q_out = (
51-
q_out.view(seq_len, batch_size, self.num_heads, head_dim)
47+
qOut = (
48+
qOut.view(seqLen, batchSize, self.num_heads, headDim)
5249
.permute(1, 2, 0, 3)
53-
.reshape(batch_size * self.num_heads, seq_len, head_dim)
50+
.reshape(batchSize * self.num_heads, seqLen, headDim)
5451
)
55-
k_out = (
56-
k_out.view(seq_len, batch_size, self.num_heads, head_dim)
52+
kOut = (
53+
kOut.view(seqLen, batchSize, self.num_heads, headDim)
5754
.permute(1, 2, 0, 3)
58-
.reshape(batch_size * self.num_heads, seq_len, head_dim)
55+
.reshape(batchSize * self.num_heads, seqLen, headDim)
5956
)
60-
v_out = (
61-
v_out.view(seq_len, batch_size, self.num_heads, head_dim)
57+
vOut = (
58+
vOut.view(seqLen, batchSize, self.num_heads, headDim)
6259
.permute(1, 2, 0, 3)
63-
.reshape(batch_size * self.num_heads, seq_len, head_dim)
60+
.reshape(batchSize * self.num_heads, seqLen, headDim)
6461
)
6562

6663
# 3) Scale queries, then quantize
67-
q_scaled = q_out / math.sqrt(head_dim)
68-
q_scaled = self.q_scaled_quant(q_scaled)
64+
qScaled = qOut / math.sqrt(headDim)
65+
qScaled = self.q_scaled_quant(qScaled)
6966

7067
# 4) Transpose + quantize K, compute attention weights
71-
k_t = k_out.transpose(-2, -1)
68+
k_t = kOut.transpose(-2, -1)
7269
k_t = self.k_transposed_quant(k_t)
7370

74-
attn_weights = torch.bmm(q_scaled, k_t)
75-
attn_weights = self.softmax_input_quant(attn_weights)
76-
attn_weights = F.softmax(attn_weights, dim=-1)
77-
attn_weights = self.attn_output_weights_quant(attn_weights)
71+
attnWeights = torch.bmm(qScaled, k_t)
72+
attnWeights = self.softmax_input_quant(attnWeights)
73+
attnWeights = F.softmax(attnWeights, dim=-1)
74+
attnWeights = self.attn_output_weights_quant(attnWeights)
7875

7976
# 5) Quantize V, multiply, reshape back, and final out projection
80-
v_out = self.v_quant(v_out)
81-
attn_output = torch.bmm(attn_weights, v_out)
77+
vOut = self.v_quant(vOut)
78+
attnOutput = torch.bmm(attnWeights, vOut)
8279

83-
attn_output = (
84-
attn_output.view(batch_size, self.num_heads, seq_len, head_dim)
80+
attnOutput = (
81+
attnOutput.view(batchSize, self.num_heads, seqLen, headDim)
8582
.permute(2, 0, 1, 3)
86-
.reshape(seq_len, batch_size, embed_dim)
83+
.reshape(seqLen, batchSize, embedDim)
8784
)
8885

89-
attn_output = self.out_proj(attn_output)
90-
return attn_output
86+
attnOutput = self.out_proj(attnOutput)
87+
return attnOutput

0 commit comments

Comments
 (0)