microsoft · skuros · Dec 8, 2025 · Dec 8, 2025
@@ -0,0 +1,12 @@
+# sam2.1-hiera-small Model Optimization
+
+This repository demonstrates the optimization of the [sam2.1-hiera-small](https://huggingface.co/facebook/sam2.1-hiera-small) model using **post-training quantization (PTQ)** techniques. The optimization process is divided into these workflows:
+
+- OpenVINO for Intel® NPU
+   + This process uses OpenVINO specific passes like `OpenVINOOptimumConversion`, `OpenVINOIoUpdate` and `OpenVINOEncapsulation`
+
+## Intel® Workflows
+
+These workflows performs quantization with Optimum Intel®. It performs the optimization pipeline:
+
+- *HuggingFace Model -> Quantized OpenVINO model -> Quantized encapsulated ONNX OpenVINO IR model*
@@ -0,0 +1,12 @@
+keywords:
+    aitk
+arch: convnext
+recipes:
+    - file: "sam2.1_hiera_small_ov_config.json"
+      devices:
+        - npu
+      ep: OpenVINOExecutionProvider
+aitk:
+    modelInfo:
+        id: "huggingface/facebook/sam2.1-hiera-small"
+        version: 1
@@ -0,0 +1,12 @@
+{
+    "workflows": [
+        {
+            "file": "sam2.1_hiera_small_ov_config.json",
+            "templateName": "sam2.1_hiera_small_ov_config"
+        }
+    ],
+    "modelInfo": {
+        "id": "huggingface/facebook/sam2.1-hiera-small",
+        "version": 1
+    }
+}
@@ -0,0 +1,4 @@
+# This file will be installed together with AITK runtime requirements
+# For the full requirements, see AITK
+transformers>=4.56.2
+optimum-intel @ git+https://github.com/skuros/optimum-intel@skuros/enable-ov-sam2
@@ -0,0 +1,32 @@
+{
+    "input_model": { "type": "HfModel", "model_path": "facebook/sam2.1-hiera-small" },
+    "systems": {
+        "local_system": {
+            "type": "LocalSystem",
+            "accelerators": [ { "device":"npu","execution_providers": [ "OpenVINOExecutionProvider" ] } ]
+        }
+    },
+    "passes": {
+        "optimum_convert": {
+            "type": "OpenVINOOptimumConversion",
+            "extra_args": { "device": "npu", "task": "feature-extraction" },
+            "ov_quant_config": { "weight_format": "int8", "group_size": -1, "ratio": 1 }
+        },
+        "io_update": { "type": "OpenVINOIoUpdate", "static": false, "reuse_cache": true },
+        "encapsulation": {
+            "type": "OpenVINOEncapsulation",
+            "target_device": "npu",
+            "keep_ov_dynamic_dims": true,
+            "ov_version": "2025.1",
+            "reuse_cache": true,
+            "onnx_file_name": "sam2.1_hiera_small.onnx",
+            "output_dir": "model/sam2.1_hiera_small"
+        }
+    },
+    "search_strategy": false,
+    "target": "local_system",
+    "cache_dir": "cache",
+    "evaluate_input_model": false,
+    "output_dir": "model/sam2.1_hiera_small",
+    "clean_cache": false
+}
@@ -0,0 +1,45 @@
+{
+    "name": "Convert to Intel NPU",
+    "isLLM": false,
+    "isIntel": true,
+    "intelRuntimeValues": [
+        "npu"
+    ],
+    "debugInfo": {
+        "autoGenerated": true,
+        "useOpenVINOOptimumConversion": "optimum_convert"
+    },
+    "addCpu": false,
+    "runtime": {
+        "autoGenerated": true,
+        "name": "Evaluate on",
+        "type": "enum",
+        "displayNames": [
+            "Intel NPU"
+        ],
+        "path": "systems.local_system.accelerators.0.device",
+        "values": [
+            "npu"
+        ],
+        "readOnly": false
+    },
+    "sections": [
+        {
+            "autoGenerated": true,
+            "name": "Convert",
+            "phase": "Conversion",
+            "parameters": [],
+            "toggle": {
+                "autoGenerated": true,
+                "name": "Convert to ONNX format",
+                "type": "bool",
+                "path": "passes.optimum_convert",
+                "actions": [
+                    [],
+                    []
+                ],
+                "readOnly": true
+            }
+        }
+    ]
+}