Skip to content
Merged
Show file tree
Hide file tree
Changes from 101 commits
Commits
Show all changes
107 commits
Select commit Hold shift + click to select a range
e489d6c
Runtime support for openvino quantized models
cavusmustafa Jun 21, 2025
f0d901f
openvino export_llama_lib support
cavusmustafa Jun 21, 2025
24f2d93
nncf pattern checker in openvino partitioner
cavusmustafa Jun 22, 2025
7dd8d0f
nncf compression init
anzr299 Jun 23, 2025
ce0e809
Merge pull request #5 from anzr299/origin/nncf_compression
cavusmustafa Jun 23, 2025
1716834
openvino backend llama nncf support
cavusmustafa Jun 23, 2025
198190e
openvino quantizer init
anzr299 Jul 7, 2025
9a1dff2
Merge pull request #7 from anzr299/origin/nncf_compression
cavusmustafa Jul 7, 2025
3d88a4e
Moved all openvino llama example changes into export_llama_lib
cavusmustafa Jul 8, 2025
e81f60d
Removed openvino utils.py since it is not needed anymore
cavusmustafa Jul 8, 2025
457a868
Update nncf_observers.py
anzr299 Jul 9, 2025
050448e
Merge pull request #8 from anzr299/patch-1
cavusmustafa Jul 10, 2025
d1e9330
Add export llama runner build option into openvino build script
cavusmustafa Jul 15, 2025
cedab9d
Update README.md
cavusmustafa Jul 15, 2025
1010323
Merge branch 'main' into openvino_llama_support
cavusmustafa Jul 15, 2025
cf0d3b7
Merge branch 'main' into openvino_llama_support
suryasidd Aug 19, 2025
e54f4c7
Added CMAKE EXPORT Changes
suryasidd Aug 19, 2025
c12a4ba
code formating updates
cavusmustafa Aug 21, 2025
bf65943
code formating changes
cavusmustafa Aug 21, 2025
30a1a25
openvino quantizer refactored
anzr299 Aug 26, 2025
4cc7694
fixes
anzr299 Aug 26, 2025
5da40a5
support all_layers, backup mode in OVQuantizer
anzr299 Aug 27, 2025
9e65a7e
clean up and use new nncf method for obtaining compression parameters
anzr299 Aug 27, 2025
53e0f4c
review changes & update method names according to wc algo
anzr299 Sep 1, 2025
bf95930
review changes
anzr299 Sep 1, 2025
2d4bec7
review changes
anzr299 Sep 1, 2025
0a2e361
Update export_llama_lib.py
anzr299 Sep 3, 2025
4c86a9c
enable group_size parameter for nncf compression
cavusmustafa Sep 3, 2025
46ed3f6
Update README.md
cavusmustafa Sep 3, 2025
0a1256e
Update README.md
cavusmustafa Sep 3, 2025
f2151e3
Update README.md
cavusmustafa Sep 4, 2025
dfc8eab
openvino backend build script updates
cavusmustafa Sep 4, 2025
2ac8a8c
Update README.md
cavusmustafa Sep 4, 2025
35444ae
Update README.md
cavusmustafa Sep 4, 2025
1cfbf0b
Merge branch 'main' into openvino_llama_support
cavusmustafa Sep 4, 2025
5b8b633
formatting fix
cavusmustafa Sep 5, 2025
f4a1423
formatting fix
cavusmustafa Sep 5, 2025
44f0883
formatting fix
cavusmustafa Sep 5, 2025
5f657d3
formatting fix
cavusmustafa Sep 5, 2025
eafcc33
formatting fix
cavusmustafa Sep 5, 2025
1763b99
formatting fix
cavusmustafa Sep 5, 2025
4863826
formatting fix
cavusmustafa Sep 5, 2025
e24072f
formatting fix
cavusmustafa Sep 5, 2025
b9bb5f0
formatting fix
cavusmustafa Sep 5, 2025
291dcd9
formatting fix
cavusmustafa Sep 5, 2025
c8ea777
use new transformations
anzr299 Sep 6, 2025
a6b605f
add comment for manual MP allocation
anzr299 Sep 6, 2025
9614fc4
remove nncf_compression from export llama lib
anzr299 Sep 6, 2025
45007cf
change pt2e quantize flag to use openvino_4wo instead of openvino_8da…
anzr299 Sep 6, 2025
9d49414
follow up to last commit
anzr299 Sep 6, 2025
d6727cf
update quantizer lib with openvino_4wo
anzr299 Sep 6, 2025
4a0a781
split qspec function into 2 parts; 1 for WC and other for PTQ qspecs
anzr299 Sep 6, 2025
f6a1ee3
micro fix
anzr299 Sep 8, 2025
d285fcc
udpate mixed precision layers for higher accuracy. Change INT4 mode t…
anzr299 Sep 8, 2025
4e66df1
Apply suggestions from code review
anzr299 Sep 8, 2025
e850e41
Review changes
anzr299 Sep 8, 2025
204043f
review changes in quantizer
anzr299 Sep 8, 2025
ae6b089
revert extra args changes
anzr299 Sep 8, 2025
a6f036c
Merge branch 'openvino_llama_support' of https://github.com/anzr299/e…
anzr299 Sep 9, 2025
2de5693
precommit fixes
anzr299 Sep 9, 2025
0e10f28
revert _calculate_qparams back to calculate_qparams
anzr299 Sep 9, 2025
05f5a92
remove manual ignored nodes
anzr299 Sep 10, 2025
fbe0e21
add ratio to quantizer initialization
anzr299 Sep 10, 2025
6bff1cd
Update export_llama_lib.py
anzr299 Sep 11, 2025
d744ae9
Update quantizer_lib.py
anzr299 Sep 11, 2025
21c43fe
Merge pull request #9 from anzr299/an/ovquantizer
suryasidd Sep 11, 2025
b874204
Updated NNCF commit id
suryasidd Sep 11, 2025
08280ed
Merge branch 'main' into openvino_llama_support
suryasidd Sep 11, 2025
35f1d84
Update README.md
cavusmustafa Sep 11, 2025
41ac36a
openvino llama export configuration - initial
cavusmustafa Sep 11, 2025
4426541
Update README.md
cavusmustafa Sep 11, 2025
6b936c5
Update README.md
cavusmustafa Sep 11, 2025
08461ec
updated ov llama config file
cavusmustafa Sep 11, 2025
be85af8
Update README.md
cavusmustafa Sep 11, 2025
bba4a01
Update README.md
cavusmustafa Sep 11, 2025
1421921
Update README.md with quantization paragraph
anzr299 Sep 12, 2025
cf0e71c
Merge pull request #10 from anzr299/patch-3
cavusmustafa Sep 15, 2025
f050eea
formatting fix
cavusmustafa Sep 15, 2025
4bfdca9
Update README.md
cavusmustafa Sep 15, 2025
16aba1b
Update non_cpu_backends.md for OpenVINO instructions
cavusmustafa Sep 16, 2025
155529f
Update llama instructions link for OpenVINO backend
cavusmustafa Sep 16, 2025
5875aa8
Remove OpenVINO from non_cpu_backends.md
cavusmustafa Sep 16, 2025
2630fd6
Update llama instructions for OpenVINO backend
cavusmustafa Sep 16, 2025
6d0cbc5
Removed the comma which was added by mistake
cavusmustafa Sep 16, 2025
3fbefec
Added NPU in choices
suryasidd Sep 16, 2025
c97bd09
Merge branch 'main' into openvino_llama_support
suryasidd Sep 16, 2025
12e51c7
Fixed ref links
suryasidd Sep 16, 2025
d3d3ae0
Merge branch 'main' into openvino_llama_support
suryasidd Sep 17, 2025
72331f5
Added Remove clone ops transformation to OpenVINO backend
suryasidd Sep 17, 2025
8016165
Fixed variable names
suryasidd Sep 17, 2025
f0d9fc7
Added extended support list for openvino backend
cavusmustafa Sep 17, 2025
9b41c28
formating fix
cavusmustafa Sep 17, 2025
e751726
formatting fix
cavusmustafa Sep 17, 2025
1736571
Merge pull request #11 from cavusmustafa/remove_clone_ops
cavusmustafa Sep 17, 2025
8106204
Added DimorderOpsRevertPass to Openvino backend
suryasidd Sep 30, 2025
04ca3f3
Merge remote-tracking branch 'cavus/main' into openvino_llama_support
suryasidd Sep 30, 2025
62f74a8
Merge branch 'main' into openvino_llama_support
suryasidd Sep 30, 2025
eaf0e17
Fixed linter issues
suryasidd Oct 1, 2025
15f5e23
Merge branch 'main' into openvino_llama_support
suryasidd Oct 1, 2025
3b358d5
Merge branch 'main' into openvino_llama_support
suryasidd Oct 2, 2025
8efba17
Merge branch 'main' into openvino_llama_support
suryasidd Oct 8, 2025
229bbd2
Use defualt runner for OpenVINO backend as well
suryasidd Oct 8, 2025
0525d9c
Merge pull request #12 from suryasidd/runner_changes
cavusmustafa Oct 8, 2025
24f67b6
Merge branch 'main' into openvino_llama_support
suryasidd Oct 8, 2025
82bc4c5
Merge branch 'main' into openvino_llama_support
suryasidd Oct 13, 2025
1428d81
Changed quantization scheme
suryasidd Oct 13, 2025
caba225
Merge branch 'main' into openvino_llama_support
suryasidd Oct 13, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 44 additions & 26 deletions backends/openvino/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,11 @@ For more information on the supported hardware, please refer to [OpenVINO System
executorch
├── backends
│ └── openvino
│ ├── quantizer
│ ├── observers
│ └── nncf_observers.py
│ ├── __init__.py
│ └── quantizer.py
│ ├── runtime
│ ├── OpenvinoBackend.cpp
│ └── OpenvinoBackend.h
Expand All @@ -42,11 +47,23 @@ executorch

Before you begin, ensure you have openvino installed and configured on your system.

### Build OpenVINO from Source
### Use OpenVINO from Release Packages

1. Download the OpenVINO release package from [here](https://docs.openvino.ai/2025/get-started/install-openvino.html). Make sure to select your configuration and click on **OpenVINO Archives** under the distribution section to download the appropriate archive for your platform.

2. Extract the release package from the archive and set the environment variables.

```bash
tar -zxf openvino_toolkit_<your_release_configuration>.tgz
cd openvino_toolkit_<your_release_configuration>
source setupvars.sh
```

### (Optional) Build OpenVINO from Source

```bash
git clone https://github.com/openvinotoolkit/openvino.git
cd openvino && git checkout b16b776ac119dafda51f69a80f1e6b7376d02c3b
cd openvino
git submodule update --init --recursive
sudo ./install_build_dependencies.sh
mkdir build && cd build
Expand All @@ -59,44 +76,45 @@ cd <your_preferred_install_location>
source setupvars.sh
```

### Use OpenVINO from Release Packages

1. Download the OpenVINO release package from [here](https://docs.openvino.ai/2025/get-started/install-openvino.html). Make sure to select your configuration and click on **OpenVINO Archives** under the distribution section to download the appropriate archive for your platform.

2. Extract the release package from the archive and set the environment variables.

```bash
tar -zxf openvino_toolkit_<your_release_configuration>.tgz
cd openvino_toolkit_<your_release_configuration>
source setupvars.sh
```

For more information about OpenVINO build, refer to the [OpenVINO Build Instructions](https://github.com/openvinotoolkit/openvino/blob/master/docs/dev/build_linux.md).

### Setup

Follow the steps below to setup your build environment:

1. **Setup ExecuTorch Environment**: Refer to the [Environment Setup](https://pytorch.org/executorch/main/getting-started-setup#environment-setup) guide for detailed instructions on setting up the ExecuTorch environment.

2. **Setup OpenVINO Backend Environment**
- Install the dependent libs. Ensure that you are inside `executorch/backends/openvino/` directory
1. **Create a Virtual Environment**
- Create a virtual environment and activate it by executing the commands below.
```bash
pip install -r requirements.txt
python -m venv env
source env/bin/activate
```
Note: To achieve optimal performance with NNCF quantization, you should install the latest development version of NNCF (version 2.16.0.dev0+191b53d9 or higher).
3. Navigate to `scripts/` directory.

4. **Build OpenVINO Backend C++ Libraries and Executor Runner**: Once the prerequisites are in place, run the `openvino_build.sh` script to start the build process. By default, OpenVINO backend will be built under `cmake-out/backends/openvino/` as `libopenvino_backend.a`

2. **Clone ExecuTorch Repository from Github**
- Clone Executorch repository by executing the command below.
```bash
./openvino_build.sh
git clone --recurse-submodules https://github.com/pytorch/executorch.git
```
**Build OpenVINO Backend Python Package with Pybindings**: To build and install the OpenVINO backend Python package with Python bindings, run the `openvino_build.sh` script with the `--enable_python` argument. This will compile and install the ExecuTorch Python package with the OpenVINO backend into your Python environment. This option will also enable python bindings required to execute OpenVINO backend tests and `aot_optimize_and_infer.py` script inside `executorch/examples/openvino` folder.

3. **Build ExecuTorch with OpenVINO Backend**
- Ensure that you are inside `executorch/backends/openvino/scripts` directory. The following command builds and installs ExecuTorch with the OpenVINO backend, also compiles the C++ runtime libraries and binaries into `<executorch_root>/cmake-out` for quick inference testing.
```bash
openvino_build.sh
```
- Optionally, `openvino_build.sh` script can be used to build python package or C++ libraries/binaries seperately.

**Build OpenVINO Backend Python Package with Pybindings**: To build and install the OpenVINO backend Python package with Python bindings, run the `openvino_build.sh` script with the `--enable_python` argument as shown in the below command. This will compile and install the ExecuTorch Python package with the OpenVINO backend into your Python environment. This option will also enable python bindings required to execute OpenVINO backend tests and `aot_optimize_and_infer.py` script inside `executorch/examples/openvino` folder.
```bash
./openvino_build.sh --enable_python
```
**Build C++ Runtime Libraries for OpenVINO Backend**: Run the `openvino_build.sh` script with the `--cpp_runtime` flag to build the C++ runtime libraries as shown in the below command. The compiled libraries files and binaries can be found in the `<executorch_root>/cmake-out` directory. The binary located at `<executorch_root>/cmake-out/backends/openvino/openvino_executor_runner` can be used to run inference with vision models.
```bash
./openvino_build.sh --cpp_runtime
```
**Build C++ Llama Runner**: First, ensure the C++ runtime libraries are built by following the earlier instructions. Then, run the `openvino_build.sh` script with the `--llama_runner flag` to compile the LlaMA runner as shown the below command, which enables executing inference with models exported using export_llama. The resulting binary is located at: `<executorch_root>/cmake-out/examples/models/llama/llama_main`
```bash
./openvino_build.sh --llama_runner
```

For more information about ExecuTorch environment setup, refer to the [Environment Setup](https://pytorch.org/executorch/main/getting-started-setup#environment-setup) guide.

### Run

Expand Down
86 changes: 85 additions & 1 deletion backends/openvino/partitioner.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,13 @@
from torch.fx.passes.operator_support import OperatorSupportBase


class PatternNode:
op_types: dict[str, Optional[list]] = {}

def __init__(self):
self.op_types = {}


class OpenvinoOperatorsSupport(OperatorSupportBase):
extended_support_dict = {
"torch.ops.dim_order_ops._clone_dim_order.default": None,
Expand All @@ -36,6 +43,7 @@ def __init__(
self,
op_types_to_skip: Optional[set] = None,
op_names_to_skip: Optional[set] = None,
enabled_ops_by_name: Optional[set] = None,
) -> None:
"""
Initializes the OpenvinoOperatorsSupport class.
Expand All @@ -47,9 +55,12 @@ def __init__(
op_types_to_skip = set()
if op_names_to_skip is None:
op_names_to_skip = set()
if enabled_ops_by_name is None:
enabled_ops_by_name = set()

self._op_types_to_skip = op_types_to_skip
self._op_names_to_skip = op_names_to_skip
self._enabled_ops_by_name = enabled_ops_by_name

def is_node_supported(self, _, node: torch.fx.Node) -> bool:
"""
Expand All @@ -66,6 +77,10 @@ def is_node_supported(self, _, node: torch.fx.Node) -> bool:
op_type = node.target.__name__
else:
op_type = str(node.target)

if node.name in self._enabled_ops_by_name:
return True

supported_ops = (
OperatorSupport(options)._support_dict | self.extended_support_dict
)
Expand Down Expand Up @@ -105,6 +120,7 @@ def __init__(
self.delegation_spec = DelegationSpec(OpenvinoBackend.__name__, compile_spec)
self._op_types_to_skip = op_types_to_skip
self._op_names_to_skip = op_names_to_skip
self._enabled_ops_by_name: set = set()

def ops_to_not_decompose(
self,
Expand All @@ -123,19 +139,87 @@ def ops_to_not_decompose(
torch.ops.aten.upsample_bilinear2d.vec,
torch.ops.aten.upsample_nearest2d.default,
torch.ops.aten.upsample_nearest2d.vec,
torch.ops.aten.stack.default,
]
return (ops_not_decompose, None)

def check_pattern(
self, node: torch.fx.Node, pattern: type[PatternNode], enabled_ops: list
) -> bool:
if node.op == "call_function":
if ("call_function" + ":" + str(node.target.__name__)) in pattern.op_types: # type: ignore[union-attr]
pt_input_nodes = node.all_input_nodes
pattern_input_ops = pattern.op_types[
"call_function" + ":" + str(node.target.__name__) # type: ignore[union-attr]
]
if pattern_input_ops is None:
enabled_ops.append(node)
return True
if len(pt_input_nodes) != len(pattern_input_ops):
return False
for i in range(len(pt_input_nodes)):
if not self.check_pattern(
pt_input_nodes[i], pattern_input_ops[i], enabled_ops
):
return False
enabled_ops.append(node)
return True
elif node.op == "get_attr":
if "get_attr" in pattern.op_types:
return True
else:
return False
elif node.op == "placeholder":
if "placeholder" in pattern.op_types:
return True
else:
return False
return False

def capture_nncf_patterns(self, graph_module: torch.fx.GraphModule):
const_node = PatternNode
const_node.op_types["get_attr"] = None
const_node.op_types["placeholder"] = None
bitwise_right_shift_node = PatternNode
bitwise_right_shift_node.op_types[
"call_function:aten.bitwise_right_shift.Tensor_Scalar"
] = [const_node]
bitwise_and_node = PatternNode
bitwise_and_node.op_types["call_function:aten.bitwise_and.Scalar"] = [
const_node
]
stack_node = PatternNode
stack_node.op_types["call_function:aten.stack.default"] = [
bitwise_and_node,
bitwise_right_shift_node,
]

for node in graph_module.graph.nodes:
if (
str(node.op) == "call_function"
and str(node.target.__name__) == "aten.stack.default"
):
enabled_ops: list = []
pattern_match = self.check_pattern(node, stack_node, enabled_ops)
if pattern_match:
for pattern_op in enabled_ops:
self._enabled_ops_by_name.add(pattern_op.name)

def partition(self, exported_program: ExportedProgram) -> PartitionResult:
"""
Partitions an exported program into supported and unsupported segments.

:param exported_program: The exported program.
:return: A PartitionResult containing the partitioned graph and delegation tags.
"""
self.capture_nncf_patterns(exported_program.graph_module)
partitioner = CapabilityBasedPartitioner(
exported_program.graph_module,
OpenvinoOperatorsSupport(self._op_types_to_skip, self._op_names_to_skip),
OpenvinoOperatorsSupport(
self._op_types_to_skip,
self._op_names_to_skip,
self._enabled_ops_by_name,
),
allows_single_node_partition=True,
)
partition_list = partitioner.propose_partitions()
Expand Down
4 changes: 2 additions & 2 deletions backends/openvino/quantizer/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
from .quantizer import OpenVINOQuantizer, quantize_model
from .quantizer import OpenVINOQuantizer, QuantizationMode, quantize_model

__all__ = ["OpenVINOQuantizer", "quantize_model"]
__all__ = ["OpenVINOQuantizer", "quantize_model", "QuantizationMode"]
Loading