Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions olive/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ class Framework(StrEnumBase):

ONNX = "ONNX"
PYTORCH = "PyTorch"
QAIRT = "QAIRT"
QNN = "QNN"
TENSORFLOW = "TensorFlow"
OPENVINO = "OpenVINO"
Expand All @@ -30,6 +31,8 @@ class ModelFileFormat(StrEnumBase):
PYTORCH_SLICE_GPT_MODEL = "PyTorch.SliceGPT"
TENSORFLOW_PROTOBUF = "TensorFlow.Protobuf"
TENSORFLOW_SAVED_MODEL = "TensorFlow.SavedModel"
QAIRT = "QAIRT"
QAIRT_PREPARED = "QAIRT.Prepared"
QNN_CPP = "QNN.CPP"
QNN_LIB = "QNN.LIB"
QNN_SERIALIZED_BIN = "QNN.SERIALIZED.BIN"
Expand Down
3 changes: 3 additions & 0 deletions olive/model/handler/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from olive.model.handler.onnx import DistributedOnnxModelHandler, ONNXModelHandler
from olive.model.handler.openvino import OpenVINOModelHandler
from olive.model.handler.pytorch import PyTorchModelHandler
from olive.model.handler.qairt import QairtModelHandler, QairtPreparedModelHandler
from olive.model.handler.qnn import QNNModelHandler
from olive.model.handler.tensorflow import TensorFlowModelHandler

Expand All @@ -23,5 +24,7 @@
"OpenVINOModelHandler",
"PyTorchModelHandler",
"QNNModelHandler",
"QairtModelHandler",
"QairtPreparedModelHandler",
"TensorFlowModelHandler",
]
105 changes: 105 additions & 0 deletions olive/model/handler/qairt.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
# -------------------------------------------------------------------------
# Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
# SPDX-License-Identifier: MIT
# --------------------------------------------------------------------------

import logging
from typing import Any, Callable, Optional, Union

from olive.constants import Framework, ModelFileFormat
from olive.hardware.accelerator import Device
from olive.model.config import IoConfig
from olive.model.config.registry import model_handler_registry
from olive.model.handler.base import OliveModelHandler

logger = logging.getLogger(__name__)


@model_handler_registry("QairtPreparedModel")
class QairtPreparedModelHandler(OliveModelHandler):
json_config_keys: tuple[str, ...] = ("io_config", "model_file_format")

def __init__(
self,
model_path: str,
model_attributes: Optional[dict[str, Any]] = None,
io_config: Union[dict[str, Any], IoConfig, str, Callable] = None,
model_file_format: ModelFileFormat = ModelFileFormat.QAIRT_PREPARED,
):
super().__init__(
framework=Framework.QAIRT,
model_file_format=model_file_format,
model_path=model_path,
model_attributes=model_attributes,
io_config=io_config,
)

@property
def size_on_disk(self) -> int:
"""Compute size of the model on disk."""
return 0

def load_model(self, rank: int = None, cache_model: bool = True):
raise NotImplementedError("QairtPreparedModelHandler does not support load_model")

def prepare_session(
self,
inference_settings: Union[dict[str, Any], None] = None,
device: Device = Device.CPU,
execution_providers: Union[str, list[str]] = None,
rank: Union[int, None] = None,
):
raise NotImplementedError("QairtPreparedModelHandler does not support prepare_session")

def run_session(
self,
session: Any = None,
inputs: Union[dict[str, Any], list[Any], tuple[Any, ...]] = None,
**kwargs: dict[str, Any],
) -> Any:
raise NotImplementedError("QairtPreparedModelHandler does not support prepare_session")


@model_handler_registry("QairtModel")
class QairtModelHandler(OliveModelHandler):
json_config_keys: tuple[str, ...] = ("io_config", "model_file_format")

def __init__(
self,
model_path: str,
model_attributes: Optional[dict[str, Any]] = None,
io_config: Union[dict[str, Any], IoConfig, str, Callable] = None,
model_file_format: ModelFileFormat = ModelFileFormat.QAIRT,
):
super().__init__(
framework=Framework.QAIRT,
model_file_format=model_file_format,
model_path=model_path,
model_attributes=model_attributes,
io_config=io_config,
)

@property
def size_on_disk(self) -> int:
"""Compute size of the model on disk."""
return 0

def load_model(self, rank: int = None, cache_model: bool = True):
raise NotImplementedError("QairtModelHandler does not support load_model")

def prepare_session(
self,
inference_settings: Union[dict[str, Any], None] = None,
device: Device = Device.CPU,
execution_providers: Union[str, list[str]] = None,
rank: Union[int, None] = None,
):
raise NotImplementedError("QairtModelHandler does not support prepare_session")

def run_session(
self,
session: Any = None,
inputs: Union[dict[str, Any], list[Any], tuple[Any, ...]] = None,
**kwargs: dict[str, Any],
) -> Any:
raise NotImplementedError("QairtModelHandler does not support prepare_session")
31 changes: 30 additions & 1 deletion olive/olive_config.json
Original file line number Diff line number Diff line change
Expand Up @@ -473,13 +473,41 @@
"supported_algorithms": [ ],
"supported_quantization_encodings": [ ]
},
"QairtEncapsulation": {
"module_path": "olive.passes.qairt.encapsulation.QairtEncapsulation",
"supported_providers": [ "QNNExecutionProvider" ],
"supported_accelerators": [ "npu" ],
"supported_precisions": [ "*" ],
"supported_algorithms": [ ],
"supported_quantization_encodings": [ ],
"extra_dependencies": [ "qairt-dev" ]
},
"QairtGenAIBuilder": {
"module_path": "olive.passes.qairt.gen_ai_builder.QairtGenAIBuilder",
"supported_providers": [ "QNNExecutionProvider" ],
"supported_accelerators": [ "npu" ],
"supported_precisions": [ "*" ],
"supported_algorithms": [ ],
"supported_quantization_encodings": [ ],
"extra_dependencies": [ "qairt-dev" ]
},
"QairtMHA2SHA": {
"module_path": "olive.passes.onnx.qairt.mha2sha.QairtMHA2SHA",
"supported_providers": [ "QNNExecutionProvider" ],
"supported_accelerators": [ "npu" ],
"supported_precisions": [ "*" ],
"supported_algorithms": [ ],
"supported_quantization_encodings": [ ]
"supported_quantization_encodings": [ ],
"extra_dependencies": [ "qairt-dev" ]
},
"QairtPreparation": {
"module_path": "olive.passes.qairt.preparation.QairtPreparation",
"supported_providers": [ "QNNExecutionProvider" ],
"supported_accelerators": [ "npu" ],
"supported_precisions": [ "*" ],
"supported_algorithms": [ ],
"supported_quantization_encodings": [ ],
"extra_dependencies": [ "qairt-dev" ]
},
"QLoRA": {
"module_path": "olive.passes.pytorch.lora.QLoRA",
Expand Down Expand Up @@ -661,6 +689,7 @@
"onnxruntime-openvino"
],
"optimum": [ "optimum" ],
"qairt": [ "qairt-dev" ],
"qnn": [ "onnxruntime-qnn" ],
"tf": [ "tensorflow==1.15.0" ],
"torch-tensorrt": [ "torch-tensorrt" ],
Expand Down
4 changes: 4 additions & 0 deletions olive/passes/qairt/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# -------------------------------------------------------------------------
# Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
# SPDX-License-Identifier: MIT
# --------------------------------------------------------------------------
Loading
Loading