quic-abhamidi
diff --git a/‎.github/workflows/quickcheck.yml‎
Lines changed: 32 additions & 0 deletions b/‎.github/workflows/quickcheck.yml‎
Lines changed: 32 additions & 0 deletions
diff --git a/‎QEfficient/base/modeling_qeff.py‎
Lines changed: 17 additions & 11 deletions b/‎QEfficient/base/modeling_qeff.py‎
Lines changed: 17 additions & 11 deletions
diff --git a/‎QEfficient/base/onnx_transforms.py‎
Lines changed: 14 additions & 5 deletions b/‎QEfficient/base/onnx_transforms.py‎
Lines changed: 14 additions & 5 deletions
diff --git a/‎scripts/Jenkinsfile‎
Lines changed: 8 additions & 8 deletions b/‎scripts/Jenkinsfile‎
Lines changed: 8 additions & 8 deletions
@@ -0,0 +1,32 @@
+name: Quickcheck
+
+on:
+  pull_request:
+  workflow_dispatch:
+
+concurrency:
+  group: quickcheck-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  quickcheck:
+    runs-on: ubuntu-latest
+    timeout-minutes: 90
+    steps:
+      - name: Checkout Repo
+        uses: actions/checkout@v4
+
+      - name: Setup Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.10"
+          cache: "pip"
+
+      - name: Install Dependencies
+        run: |
+          python -m pip install --upgrade pip
+          python -m pip install -e .[test]
+          python -m pip install pytest-xdist
+
+      - name: Run Quickcheck
+        run: python -m pytest -q tests/unit_test/models/test_model_quickcheck.py -n auto
@@ -18,7 +18,12 @@
 import onnx
 import torch
 
-from QEfficient.base.onnx_transforms import BaseOnnxTransform, OnnxTransformPipeline
+from QEfficient.base.onnx_transforms import (
+    BaseOnnxTransform,
+    FP16ClipTransform,
+    OnnxTransformPipeline,
+    SplitTensorsTransform,
+)
 from QEfficient.base.pytorch_transforms import PytorchTransform
 from QEfficient.compile.qnn_compiler import compile as qnn_compile
 from QEfficient.generation.cloud_infer import QAICInferenceSession
@@ -49,9 +54,8 @@ class QEFFBaseModel(ABC):
     _pytorch_transforms: List[PytorchTransform]
     _onnx_transforms = [BaseOnnxTransform]
 
-    @classmethod
-    def _transform_names(cls) -> List[str]:
-        return [x.__name__ for x in cls._pytorch_transforms + cls._onnx_transforms]
+    def _transform_names(self) -> List[str]:
+        return [x.__name__ for x in self._pytorch_transforms + self._onnx_transforms]
 
     def __init__(self, model: torch.nn.Module, **kwargs) -> None:
         super().__init__()
@@ -242,9 +246,7 @@ def _export(
         # check if the model is in meta state or weights are offloaded
         self._model_offloaded_check()
 
-        # Export directly into export_dir so any external data files are retained.
         export_dir.mkdir(parents=True, exist_ok=True)
-        tmp_onnx_path = onnx_path
 
         # Create input_names from example_inputs
         input_names = []
@@ -274,7 +276,7 @@ def _export(
             torch.onnx.export(
                 self.model,
                 (example_inputs,),
-                str(tmp_onnx_path),
+                str(onnx_path),
                 input_names=input_names,
                 output_names=output_names,
                 dynamic_axes=dynamic_axes,
@@ -283,11 +285,13 @@ def _export(
             )
             logger.info("PyTorch export successful")
             _ = self._offload_model_weights(offload_pt_weights)
-            model = onnx.load(tmp_onnx_path, load_external_data=False)
+            model = onnx.load(onnx_path, load_external_data=False)
 
-            # Clear temporary references
+            needs_external_tensor_data = any(
+                transform in self._onnx_transforms for transform in (FP16ClipTransform, SplitTensorsTransform)
+            )
             transform_kwargs = {
-                "onnx_base_dir": str(export_dir),
+                "onnx_base_dir": str(export_dir) if needs_external_tensor_data else None,
                 "model_name": self.model_name,
             }
             if onnx_transform_kwargs is not None:
@@ -302,7 +306,9 @@ def _export(
             )
             logger.info("ONNX transforms applied")
 
-            onnx.save(model, onnx_path)
+            onnx_path_tmp = onnx_path.with_suffix(onnx_path.suffix + ".tmp")
+            onnx.save(model, onnx_path_tmp)
+            onnx_path_tmp.replace(onnx_path)
             del model
             gc.collect()
             logger.info("Transformed ONNX saved")
 
@@ -7,7 +7,6 @@
 
 import logging
 import os
-import warnings
 from concurrent.futures import ThreadPoolExecutor, as_completed
 from typing import Any, Dict, List, Optional, Tuple, Type
 
@@ -106,16 +105,27 @@ class CustomOpTransform(BaseOnnxTransform):
     @classmethod
     def apply(cls, model: ModelProto) -> bool:
         op_applied = False
+
+        # Register with PyTorch ONNX exporter (for export time)
         for op_name, (func_class, _) in cls._custom_ops.items():
             if hasattr(func_class, "symbolic"):
                 torch.onnx.register_custom_op_symbolic(f"::{op_name}", func_class.symbolic, ONNX_EXPORT_OPSET)
 
+        used_op_types = {node.op_type for node in model.graph.node}
+        for function_proto in model.functions:
+            used_op_types.update(node.op_type for node in function_proto.node)
+
+        # Add function prototypes to model
         existing = {f.name for f in model.functions}
-        for _, onnxscript_func in cls._custom_ops.values():
+
+        for func_name, onnxscript_func in cls._custom_ops.values():
             proto = onnxscript_func.to_function_proto()
+            if proto.name not in used_op_types:
+                continue
             if proto.name not in existing:
                 model.functions.append(proto)
                 op_applied = True
+
         return op_applied
 
 
@@ -202,8 +212,6 @@ class OnnxTransformPipeline(BaseOnnxTransform):
     """Pipeline to apply multiple ONNX transformations in sequence."""
 
     def __init__(self, transforms: List[Type[BaseOnnxTransform]]):
-        if not transforms:
-            warnings.warn("Transform list is empty. No transformations will be applied.")
         self.transforms = transforms
 
     def apply(
@@ -228,7 +236,8 @@ def apply(
         do_split = SplitTensorsTransform in requested
         fp16_min, fp16_max = np.finfo(np.float16).min, np.finfo(np.float16).max
         file_num_tracker = {"num": 0, "size": 0}
-        external_data_helper.load_external_data_for_model(model, onnx_base_dir)
+        if onnx_base_dir is not None:
+            external_data_helper.load_external_data_for_model(model, onnx_base_dir)
 
         if do_fp16 or do_split:
             for tensor in external_data_helper._get_all_tensors(model):
 
@@ -41,7 +41,7 @@ pipeline {
                            mkdir -p $PWD/Non_cli_qaic &&
                            export TOKENIZERS_PARALLELISM=false &&
                            export QEFF_HOME=$PWD/Non_cli_qaic &&
-                           pytest tests -m '(not cli) and (not on_qaic) and (not finetune)' --ignore tests/vllm --ignore tests/transformers/models/image_text_to_text -n 4 --junitxml=tests/tests_log1.xml --durations=10 &&
+                           pytest tests -m '(not cli) and (not on_qaic) and (not finetune)' --ignore tests/vllm --ignore tests/transformers/models/image_text_to_text --ignore tests/unit_test -n 4 --junitxml=tests/tests_log1.xml --durations=10 &&
                            junitparser merge tests/tests_log1.xml tests/tests_log.xml &&
                            deactivate"
                            '''
@@ -50,15 +50,15 @@ pipeline {
                }
                stage('QAIC LLM Tests') {
                    steps {
-                       timeout(time: 120, unit: 'MINUTES') {
+                       timeout(time: 180, unit: 'MINUTES') {
                            sh '''
                            sudo docker exec ${BUILD_TAG} bash -c "
                            cd /efficient-transformers &&
                            . preflight_qeff/bin/activate &&
                            mkdir -p $PWD/Non_qaic_llm &&
                            export TOKENIZERS_PARALLELISM=false &&
                            export QEFF_HOME=$PWD/Non_qaic_llm &&
-                           pytest tests -m '(not cli) and (on_qaic) and (llm_model) and (not nightly) and (not multimodal) and (not qnn) and (not finetune) and (not diffusion_models)' --ignore tests/vllm --junitxml=tests/tests_log2.xml --durations=10 &&
+                           pytest tests -m '(not cli) and (on_qaic) and (llm_model) and (not nightly) and (not multimodal) and (not qnn) and (not finetune) and (not diffusion_models)' --ignore tests/vllm --ignore tests/unit_test --junitxml=tests/tests_log2.xml --durations=10 &&
                            junitparser merge tests/tests_log2.xml tests/tests_log.xml &&
                            deactivate"
                            '''
@@ -75,7 +75,7 @@ pipeline {
                            mkdir -p $PWD/Non_qaic_feature &&
                            export TOKENIZERS_PARALLELISM=false &&
                            export QEFF_HOME=$PWD/Non_qaic_feature &&
-                           pytest tests -m '(not cli) and (on_qaic) and (feature) and (not nightly) and (not multimodal) and (not qnn) and (not finetune) and (not diffusion_models)' --ignore tests/vllm --junitxml=tests/tests_log2_feature.xml --durations=10 &&
+                           pytest tests -m '(not cli) and (on_qaic) and (feature) and (not nightly) and (not multimodal) and (not qnn) and (not finetune) and (not diffusion_models)' --ignore tests/vllm --ignore tests/unit_test --junitxml=tests/tests_log2_feature.xml --durations=10 &&
                            junitparser merge tests/tests_log2_feature.xml tests/tests_log.xml &&
                            deactivate"
                            '''
@@ -94,7 +94,7 @@ pipeline {
                            mkdir -p $PWD/Non_cli_qaic_multimodal &&
                            export TOKENIZERS_PARALLELISM=false &&
                            export QEFF_HOME=$PWD/Non_cli_qaic_multimodal &&
-                           pytest tests -m '(not cli) and (on_qaic) and (multimodal) and (not qnn) and (not finetune) and (not diffusion_models)' --ignore tests/vllm --junitxml=tests/tests_log6.xml --durations=10 &&
+                           pytest tests -m '(not cli) and (on_qaic) and (multimodal) and (not qnn) and (not finetune) and (not diffusion_models)' --ignore tests/vllm --ignore tests/unit_test --junitxml=tests/tests_log6.xml --durations=10 &&
                            junitparser merge tests/tests_log6.xml tests/tests_log.xml &&
                            deactivate"
                            '''
@@ -112,7 +112,7 @@ pipeline {
                     export TOKENIZERS_PARALLELISM=false &&
                     export QEFF_HOME=$PWD/Non_cli_qaic_diffusion &&
                     export HF_HUB_CACHE=/huggingface_hub &&
-                    pytest tests -m '(not cli) and (on_qaic) and (diffusion_models) and (not wan) and (not qnn) and (not finetune)' --ignore tests/vllm --junitxml=tests/tests_log_diffusion.xml --durations=10 &&
+                    pytest tests -m '(not cli) and (on_qaic) and (diffusion_models) and (not wan) and (not qnn) and (not finetune)' --ignore tests/vllm --ignore tests/unit_test --junitxml=tests/tests_log_diffusion.xml --durations=10 &&
                     junitparser merge tests/tests_log_diffusion.xml tests/tests_log.xml &&
                     deactivate"
                     '''
@@ -131,7 +131,7 @@ pipeline {
                            mkdir -p $PWD/cli &&
                            export TOKENIZERS_PARALLELISM=false &&
                            export QEFF_HOME=$PWD/cli &&
-                           pytest tests -m '(cli and not qnn) and (not finetune)' --ignore tests/vllm --junitxml=tests/tests_log3.xml --durations=10 &&
+                           pytest tests -m '(cli and not qnn) and (not finetune)' --ignore tests/vllm --ignore tests/unit_test --junitxml=tests/tests_log3.xml --durations=10 &&
                            junitparser merge tests/tests_log3.xml tests/tests_log.xml &&
                            deactivate"
                            '''
@@ -209,7 +209,7 @@ pipeline {
                     mkdir -p $PWD/cli_qaic_finetuning &&
                     export TOKENIZERS_PARALLELISM=false &&
                     export QEFF_HOME=$PWD/cli_qaic_finetuning &&
-                    pytest tests -m '(cli) and (on_qaic) and (not qnn) and (not multimodal) and (finetune)' --ignore tests/vllm --junitxml=tests/tests_log_finetune.xml --durations=10 &&
+                    pytest tests -m '(cli) and (on_qaic) and (not qnn) and (not multimodal) and (finetune)' --ignore tests/vllm --ignore tests/unit_test --junitxml=tests/tests_log_finetune.xml --durations=10 &&
                     junitparser merge tests/tests_log_finetune.xml tests/tests_log.xml &&
                     deactivate"
                     '''