Skip to content
Draft
Show file tree
Hide file tree
Changes from 46 commits
Commits
Show all changes
48 commits
Select commit Hold shift + click to select a range
12388c4
Add QairtGenAIBuilder pass
qti-kromero Oct 22, 2025
8561f8c
Merge branch 'microsoft:main' into dev/qti-kromero/qairt-genai-builder
qti-kromero Nov 26, 2025
b68c7c2
updates
qti-kromero Nov 26, 2025
865b6dd
Merge remote-tracking branch 'origin/dev/qti-kromero/qairt-genai-buil…
qti-kromero Nov 26, 2025
8c8658b
QairtEncapsulation Updates
qti-kromero Dec 17, 2025
74e069a
updates
qti-kromero Dec 23, 2025
fafb6c8
Merge remote-tracking branch 'origin/main' into dev/qti-kromero/qairt…
qti-kromero Jan 8, 2026
070e8c3
Merge remote-tracking branch 'origin/main' into dev/qti-kromero/qairt…
qti-kromero Jan 8, 2026
d094194
Fix issue w/ get_size_on_disk
qti-kromero Jan 9, 2026
926e446
update
qti-kromero Jan 22, 2026
aa62e5c
Rename zip export format to lm-executor
qti-kromero Jan 22, 2026
7660b50
Add more general imports
qti-kromero Feb 4, 2026
4fbcbaa
Update for datatypes of lm executor node
qti-kromero Feb 6, 2026
7034e31
First pass at gen ai config
qti-kromero Feb 11, 2026
4165cbb
Add gen_ai_config.json output to directory
qti-kromero Feb 12, 2026
447b45b
new preparation apss
qti-kromero Feb 19, 2026
a33355d
Merge branch 'main' of github.com:CodeLinaro/Olive into dev/qti-krome…
qti-kromero Feb 19, 2026
cac0ffc
update for piping responses
qti-kromero Feb 19, 2026
6109fdd
updates from offline feedback
qti-kromero Feb 24, 2026
90eeb93
fix deadlock issue
qti-kromero Feb 25, 2026
5392025
updates for cache/output directory
qti-kromero Feb 25, 2026
7525126
add output file handling
qti-kromero Feb 25, 2026
2185cce
rework for regex of output files
qti-kromero Feb 26, 2026
3948876
fix preparation path
qti-kromero Feb 26, 2026
e07ce57
tokenizer_config updates
qti-kromero Feb 27, 2026
d9de7bc
updates for genai config
qti-kromero Feb 27, 2026
cebf41c
saving
qti-kromero Mar 2, 2026
b97f9e2
add DLC handling to output
qti-kromero Mar 2, 2026
c69455a
remove extra_args
qti-kromero Mar 2, 2026
0ab211b
update for new structure
qti-kromero Mar 3, 2026
315c1c8
update for new output from QairtPrep Pass
qti-kromero Mar 4, 2026
e3964bb
add to factory
qti-kromero Mar 4, 2026
efbcb50
update
qti-kromero Mar 4, 2026
237cc9c
add updates
qti-kromero Mar 4, 2026
6be099e
minor updates
qti-kromero Mar 10, 2026
65d3a18
updates
qti-kromero Mar 10, 2026
6ba760a
generation_config changes
qti-kromero Mar 11, 2026
1f2f34f
fix tokenizer path
qti-kromero Mar 11, 2026
ddf4cfb
fix config/tokenizer issue
qti-kromero Mar 11, 2026
212e992
Adascale improvements
qti-kromero Mar 11, 2026
95275e3
extended udma support
qti-kromero Mar 12, 2026
98fdfbf
fix udma
qti-kromero Mar 12, 2026
f014044
udma fix
qti-kromero Mar 12, 2026
0470c79
edits
qti-kromero Mar 14, 2026
d7ac199
Merge remote-tracking branch 'origin/main' into dev/qti-kromero/qairt…
qti-kromero Mar 14, 2026
84eb7b2
Refactor error message
qti-kromero Mar 14, 2026
7adfff7
debugging
qti-kromero Mar 16, 2026
5badb8e
vtcm/hvx thread hardcode
qti-kromero Mar 16, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions olive/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ class Framework(StrEnumBase):

ONNX = "ONNX"
PYTORCH = "PyTorch"
QAIRT = "QAIRT"
QNN = "QNN"
TENSORFLOW = "TensorFlow"
OPENVINO = "OpenVINO"
Expand All @@ -30,6 +31,8 @@ class ModelFileFormat(StrEnumBase):
PYTORCH_SLICE_GPT_MODEL = "PyTorch.SliceGPT"
TENSORFLOW_PROTOBUF = "TensorFlow.Protobuf"
TENSORFLOW_SAVED_MODEL = "TensorFlow.SavedModel"
QAIRT = "QAIRT"
QAIRT_PREPARED = "QAIRT.Prepared"
QNN_CPP = "QNN.CPP"
QNN_LIB = "QNN.LIB"
QNN_SERIALIZED_BIN = "QNN.SERIALIZED.BIN"
Expand Down
3 changes: 3 additions & 0 deletions olive/model/handler/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from olive.model.handler.onnx import DistributedOnnxModelHandler, ONNXModelHandler
from olive.model.handler.openvino import OpenVINOModelHandler
from olive.model.handler.pytorch import PyTorchModelHandler
from olive.model.handler.qairt import QairtModelHandler, QairtPreparedModelHandler
from olive.model.handler.qnn import QNNModelHandler
from olive.model.handler.tensorflow import TensorFlowModelHandler

Expand All @@ -23,5 +24,7 @@
"OpenVINOModelHandler",
"PyTorchModelHandler",
"QNNModelHandler",
"QairtModelHandler",
"QairtPreparedModelHandler",
"TensorFlowModelHandler",
]
105 changes: 105 additions & 0 deletions olive/model/handler/qairt.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
# -------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
# --------------------------------------------------------------------------

import logging
from typing import Any, Callable, Optional, Union

from olive.constants import Framework, ModelFileFormat
from olive.hardware.accelerator import Device
from olive.model.config import IoConfig
from olive.model.config.registry import model_handler_registry
from olive.model.handler.base import OliveModelHandler

logger = logging.getLogger(__name__)


@model_handler_registry("QairtPreparedModel")
class QairtPreparedModelHandler(OliveModelHandler):
json_config_keys: tuple[str, ...] = ("io_config", "model_file_format")

def __init__(
self,
model_path: str,
model_attributes: Optional[dict[str, Any]] = None,
io_config: Union[dict[str, Any], IoConfig, str, Callable] = None,
model_file_format: ModelFileFormat = ModelFileFormat.QAIRT_PREPARED,
):
super().__init__(
framework=Framework.QAIRT,
model_file_format=model_file_format,
model_path=model_path,
model_attributes=model_attributes,
io_config=io_config,
)

@property
def size_on_disk(self) -> int:
"""Compute size of the model on disk."""
return 0

def load_model(self, rank: int = None, cache_model: bool = True):
raise NotImplementedError("QairtPreparedModelHandler does not support load_model")

def prepare_session(
self,
inference_settings: Union[dict[str, Any], None] = None,
device: Device = Device.CPU,
execution_providers: Union[str, list[str]] = None,
rank: Union[int, None] = None,
):
raise NotImplementedError("QairtPreparedModelHandler does not support prepare_session")

def run_session(
self,
session: Any = None,
inputs: Union[dict[str, Any], list[Any], tuple[Any, ...]] = None,
**kwargs: dict[str, Any],
) -> Any:
raise NotImplementedError("QairtPreparedModelHandler does not support prepare_session")


@model_handler_registry("QairtModel")
class QairtModelHandler(OliveModelHandler):
json_config_keys: tuple[str, ...] = ("io_config", "model_file_format")

def __init__(
self,
model_path: str,
model_attributes: Optional[dict[str, Any]] = None,
io_config: Union[dict[str, Any], IoConfig, str, Callable] = None,
model_file_format: ModelFileFormat = ModelFileFormat.QAIRT,
):
super().__init__(
framework=Framework.QAIRT,
model_file_format=model_file_format,
model_path=model_path,
model_attributes=model_attributes,
io_config=io_config,
)

@property
def size_on_disk(self) -> int:
"""Compute size of the model on disk."""
return 0

def load_model(self, rank: int = None, cache_model: bool = True):
raise NotImplementedError("QairtModelHandler does not support load_model")

def prepare_session(
self,
inference_settings: Union[dict[str, Any], None] = None,
device: Device = Device.CPU,
execution_providers: Union[str, list[str]] = None,
rank: Union[int, None] = None,
):
raise NotImplementedError("QairtModelHandler does not support prepare_session")

def run_session(
self,
session: Any = None,
inputs: Union[dict[str, Any], list[Any], tuple[Any, ...]] = None,
**kwargs: dict[str, Any],
) -> Any:
raise NotImplementedError("QairtModelHandler does not support prepare_session")
31 changes: 30 additions & 1 deletion olive/olive_config.json
Original file line number Diff line number Diff line change
Expand Up @@ -465,13 +465,41 @@
"supported_algorithms": [ ],
"supported_quantization_encodings": [ ]
},
"QairtEncapsulation": {
"module_path": "olive.passes.qairt.encapsulation.QairtEncapsulation",
"supported_providers": [ "QNNExecutionProvider" ],
"supported_accelerators": [ "npu" ],
"supported_precisions": [ "*" ],
"supported_algorithms": [ ],
"supported_quantization_encodings": [ ],
"extra_dependencies": [ "qairt-dev" ]
},
"QairtGenAIBuilder": {
"module_path": "olive.passes.qairt.gen_ai_builder.QairtGenAIBuilder",
"supported_providers": [ "QNNExecutionProvider" ],
"supported_accelerators": [ "npu" ],
"supported_precisions": [ "*" ],
"supported_algorithms": [ ],
"supported_quantization_encodings": [ ],
"extra_dependencies": [ "qairt-dev" ]
},
"QairtMHA2SHA": {
"module_path": "olive.passes.onnx.qairt.mha2sha.QairtMHA2SHA",
"supported_providers": [ "QNNExecutionProvider" ],
"supported_accelerators": [ "npu" ],
"supported_precisions": [ "*" ],
"supported_algorithms": [ ],
"supported_quantization_encodings": [ ]
"supported_quantization_encodings": [ ],
"extra_dependencies": [ "qairt-dev" ]
},
"QairtPreparation": {
"module_path": "olive.passes.qairt.preparation.QairtPreparation",
"supported_providers": [ "QNNExecutionProvider" ],
"supported_accelerators": [ "npu" ],
"supported_precisions": [ "*" ],
"supported_algorithms": [ ],
"supported_quantization_encodings": [ ],
"extra_dependencies": [ "qairt-dev" ]
},
"QLoRA": {
"module_path": "olive.passes.pytorch.lora.QLoRA",
Expand Down Expand Up @@ -653,6 +681,7 @@
"onnxruntime-openvino"
],
"optimum": [ "optimum" ],
"qairt": [ "qairt-dev" ],
"qnn": [ "onnxruntime-qnn" ],
"tf": [ "tensorflow==1.15.0" ],
"torch-tensorrt": [ "torch-tensorrt" ],
Expand Down
13 changes: 13 additions & 0 deletions olive/passes/qairt/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# -------------------------------------------------------------------------

Check warning

Code scanning / lintrunner

RUFF/format Warning

Run lintrunner -a to apply this patch.

Check warning

Code scanning / lintrunner

RUFF-FORMAT/format Warning

Run lintrunner -a to apply this patch.

Check warning

Code scanning / lintrunner

EDITORCONFIG-CHECKER/editorconfig Warning

Final newline expected
# Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
# SPDX-License-Identifier: MIT
# --------------------------------------------------------------------------

try:
import qairt

Check warning

Code scanning / lintrunner

RUFF/F401 Warning

qairt imported but unused; consider using importlib.util.find_spec to test for availability.
See https://docs.astral.sh/ruff/rules/unused-import
import qairt.gen_ai_api as qairt_genai

Check warning

Code scanning / lintrunner

RUFF/F401 Warning

qairt.gen_ai_api imported but unused; consider using importlib.util.find_spec to test for availability.
See https://docs.astral.sh/ruff/rules/unused-import
except ImportError as exc:
raise ImportError(
"Failed to import QAIRT GenAIBuilder API - please install olive-ai[qairt] to use QAIRT passes."
"If already installed, please run `qairt-vm -i` for help troubleshooting issues."
) from exc

Check warning

Code scanning / lintrunner

RUFF/W292 Warning

Loading
Loading