Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 32 additions & 0 deletions .github/workflows/quickcheck.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
name: Quickcheck

on:
pull_request:
workflow_dispatch:

concurrency:
group: quickcheck-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true

jobs:
quickcheck:
runs-on: ubuntu-latest
timeout-minutes: 90
steps:
- name: Checkout Repo
uses: actions/checkout@v4

- name: Setup Python
uses: actions/setup-python@v5
with:
python-version: "3.10"
cache: "pip"

- name: Install Dependencies
run: |
python -m pip install --upgrade pip
python -m pip install -e .[test]
python -m pip install pytest-xdist

- name: Run Quickcheck
run: python -m pytest -q tests/unit_test/models/test_model_quickcheck.py -n auto
20 changes: 20 additions & 0 deletions .github/workflows/unit-tests.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
name: Unit Tests

on:
pull_request:

jobs:
unit-tests:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4

- uses: actions/setup-python@v5
with:
python-version: "3.11"

- name: Install package and test dependencies
run: pip install -e ".[test]"

- name: Run unit tests
run: pytest tests/unit_test/ -n auto -v
2 changes: 2 additions & 0 deletions QEfficient/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
from QEfficient.compile.compile_helper import compile
from QEfficient.diffusers.pipelines.flux.pipeline_flux import QEffFluxPipeline
from QEfficient.diffusers.pipelines.wan.pipeline_wan import QEffWanPipeline
from QEfficient.diffusers.pipelines.wan.pipeline_wan_i2v import QEffWanImageToVideoPipeline
from QEfficient.exporter.export_hf_to_cloud_ai_100 import qualcomm_efficient_converter
from QEfficient.generation.text_generation_inference import cloud_ai_100_exec_kv
from QEfficient.peft import QEffAutoPeftModelForCausalLM
Expand Down Expand Up @@ -59,6 +60,7 @@
"QEFFCommonLoader",
"QEffFluxPipeline",
"QEffWanPipeline",
"QEffWanImageToVideoPipeline",
]


Expand Down
28 changes: 17 additions & 11 deletions QEfficient/base/modeling_qeff.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,12 @@
import onnx
import torch

from QEfficient.base.onnx_transforms import BaseOnnxTransform, OnnxTransformPipeline
from QEfficient.base.onnx_transforms import (
BaseOnnxTransform,
FP16ClipTransform,
OnnxTransformPipeline,
SplitTensorsTransform,
)
from QEfficient.base.pytorch_transforms import PytorchTransform
from QEfficient.compile.qnn_compiler import compile as qnn_compile
from QEfficient.generation.cloud_infer import QAICInferenceSession
Expand Down Expand Up @@ -49,9 +54,8 @@ class QEFFBaseModel(ABC):
_pytorch_transforms: List[PytorchTransform]
_onnx_transforms = [BaseOnnxTransform]

@classmethod
def _transform_names(cls) -> List[str]:
return [x.__name__ for x in cls._pytorch_transforms + cls._onnx_transforms]
def _transform_names(self) -> List[str]:
return [x.__name__ for x in self._pytorch_transforms + self._onnx_transforms]

def __init__(self, model: torch.nn.Module, **kwargs) -> None:
super().__init__()
Expand Down Expand Up @@ -242,9 +246,7 @@ def _export(
# check if the model is in meta state or weights are offloaded
self._model_offloaded_check()

# Export directly into export_dir so any external data files are retained.
export_dir.mkdir(parents=True, exist_ok=True)
tmp_onnx_path = onnx_path

# Create input_names from example_inputs
input_names = []
Expand Down Expand Up @@ -274,7 +276,7 @@ def _export(
torch.onnx.export(
self.model,
(example_inputs,),
str(tmp_onnx_path),
str(onnx_path),
input_names=input_names,
output_names=output_names,
dynamic_axes=dynamic_axes,
Expand All @@ -283,11 +285,13 @@ def _export(
)
logger.info("PyTorch export successful")
_ = self._offload_model_weights(offload_pt_weights)
model = onnx.load(tmp_onnx_path, load_external_data=False)
model = onnx.load(onnx_path, load_external_data=False)

# Clear temporary references
needs_external_tensor_data = any(
transform in self._onnx_transforms for transform in (FP16ClipTransform, SplitTensorsTransform)
)
transform_kwargs = {
"onnx_base_dir": str(export_dir),
"onnx_base_dir": str(export_dir) if needs_external_tensor_data else None,
"model_name": self.model_name,
}
if onnx_transform_kwargs is not None:
Expand All @@ -302,7 +306,9 @@ def _export(
)
logger.info("ONNX transforms applied")

onnx.save(model, onnx_path)
onnx_path_tmp = onnx_path.with_suffix(onnx_path.suffix + ".tmp")
onnx.save(model, onnx_path_tmp)
onnx_path_tmp.replace(onnx_path)
del model
gc.collect()
logger.info("Transformed ONNX saved")
Expand Down
19 changes: 14 additions & 5 deletions QEfficient/base/onnx_transforms.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@

import logging
import os
import warnings
from concurrent.futures import ThreadPoolExecutor, as_completed
from typing import Any, Dict, List, Optional, Tuple, Type

Expand Down Expand Up @@ -106,16 +105,27 @@ class CustomOpTransform(BaseOnnxTransform):
@classmethod
def apply(cls, model: ModelProto) -> bool:
op_applied = False

# Register with PyTorch ONNX exporter (for export time)
for op_name, (func_class, _) in cls._custom_ops.items():
if hasattr(func_class, "symbolic"):
torch.onnx.register_custom_op_symbolic(f"::{op_name}", func_class.symbolic, ONNX_EXPORT_OPSET)

used_op_types = {node.op_type for node in model.graph.node}
for function_proto in model.functions:
used_op_types.update(node.op_type for node in function_proto.node)

# Add function prototypes to model
existing = {f.name for f in model.functions}
for _, onnxscript_func in cls._custom_ops.values():

for func_name, onnxscript_func in cls._custom_ops.values():
proto = onnxscript_func.to_function_proto()
if proto.name not in used_op_types:
continue
if proto.name not in existing:
model.functions.append(proto)
op_applied = True

return op_applied


Expand Down Expand Up @@ -202,8 +212,6 @@ class OnnxTransformPipeline(BaseOnnxTransform):
"""Pipeline to apply multiple ONNX transformations in sequence."""

def __init__(self, transforms: List[Type[BaseOnnxTransform]]):
if not transforms:
warnings.warn("Transform list is empty. No transformations will be applied.")
self.transforms = transforms

def apply(
Expand All @@ -228,7 +236,8 @@ def apply(
do_split = SplitTensorsTransform in requested
fp16_min, fp16_max = np.finfo(np.float16).min, np.finfo(np.float16).max
file_num_tracker = {"num": 0, "size": 0}
external_data_helper.load_external_data_for_model(model, onnx_base_dir)
if onnx_base_dir is not None:
external_data_helper.load_external_data_for_model(model, onnx_base_dir)

if do_fp16 or do_split:
for tensor in external_data_helper._get_all_tensors(model):
Expand Down
51 changes: 48 additions & 3 deletions QEfficient/diffusers/models/autoencoders/autoencoder_kl_wan.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,11 @@
#
# -----------------------------------------------------------------------------

from typing import Optional

import torch
from diffusers.models.autoencoders.autoencoder_kl_wan import (
AutoencoderKLWan,
WanDecoder3d,
WanEncoder3d,
WanResample,
Expand All @@ -16,8 +19,6 @@

CACHE_T = 2

modes = []

# Used max(0, x.shape[2] - CACHE_T) instead of CACHE_T because x.shape[2] is either 1 or 4,
# and CACHE_T = 2. This ensures the value never goes negative

Expand Down Expand Up @@ -58,7 +59,6 @@ def forward(self, x, feat_cache=None, feat_idx=[0]):
x = x.reshape(b, c, t * 2, h, w)
t = x.shape[2]
x = x.permute(0, 2, 1, 3, 4).reshape(b * t, c, h, w)
modes.append(self.mode)
x = self.resample(x)
x = x.view(b, t, x.size(1), x.size(2), x.size(3)).permute(0, 2, 1, 3, 4)

Expand Down Expand Up @@ -198,3 +198,48 @@ def forward(self, x, feat_cache=None, feat_idx=[0], first_chunk=False):
else:
x = self.conv_out(x)
return x


class QEffAutoencoderKLWan(AutoencoderKLWan):
def encode(self, x: torch.Tensor) -> torch.Tensor:
r"""
Encode a batch of images into latents.

Args:
x (`torch.Tensor`): Input batch of images.
"""
if self.use_slicing and x.shape[0] > 1:
encoded_slices = [self._encode(x_slice) for x_slice in x.split(1)]
h = torch.cat(encoded_slices)
else:
h = self._encode(x)
return h

def forward(
self,
image: Optional[torch.Tensor] = None,
latent_sample: Optional[torch.Tensor] = None,
return_dict: bool = True,
) -> torch.Tensor:
r"""
Forward pass through the VAE autoencoder with dual-mode functionality.
This method automatically determines whether to perform encoding or decoding based on the provided inputs:
- If `image` is provided, performs encoding (image → latent space)
- If `latent_sample` is provided, performs decoding (latent space → image)

Args:
image (`torch.Tensor`, *optional*): Input image tensor to encode into latent space.
latent_sample (`torch.Tensor`, *optional*): input latent tensor to decode back to image space.
If provided, `image` should be None.
return_dict (`bool`, *optional*, defaults to `True`):
Whether to return a dictionary with structured output or a raw tensor.
Only applies to decoding operations.
Returns:
`torch.Tensor`:
- If encoding: Latent representation of the input image
- If decoding: Reconstructed image/video from latent representation
"""
if image is not None:
return self.encode(image)
else:
return self.decode(latent_sample, return_dict)
3 changes: 3 additions & 0 deletions QEfficient/diffusers/models/pytorch_transforms.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
# -----------------------------------------------------------------------------

from diffusers.models.autoencoders.autoencoder_kl_wan import (
AutoencoderKLWan,
WanDecoder3d,
WanEncoder3d,
WanResample,
Expand All @@ -25,6 +26,7 @@
from QEfficient.base.pytorch_transforms import ModuleMappingTransform
from QEfficient.customop.rms_norm import CustomRMSNormAIC
from QEfficient.diffusers.models.autoencoders.autoencoder_kl_wan import (
QEffAutoencoderKLWan,
QEffWanDecoder3d,
QEffWanEncoder3d,
QEffWanResample,
Expand Down Expand Up @@ -66,6 +68,7 @@ class AttentionTransform(ModuleMappingTransform):
WanAttnProcessor: QEffWanAttnProcessor,
WanAttention: QEffWanAttention,
WanTransformer3DModel: QEffWanTransformer3DModel,
AutoencoderKLWan: QEffAutoencoderKLWan,
WanDecoder3d: QEffWanDecoder3d,
WanEncoder3d: QEffWanEncoder3d,
WanResidualBlock: QEffWanResidualBlock,
Expand Down
6 changes: 6 additions & 0 deletions QEfficient/diffusers/pipelines/configs/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# -----------------------------------------------------------------------------
#
# Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
# SPDX-License-Identifier: BSD-3-Clause
#
# ----------------------------------------------------------------------------

Large diffs are not rendered by default.

Loading
Loading