diff --git a/notebooks/funasr-nano/cmd_helper.py b/notebooks/funasr-nano/cmd_helper.py
new file mode 100644
index 00000000000..3dd2d928181
--- /dev/null
+++ b/notebooks/funasr-nano/cmd_helper.py
@@ -0,0 +1,63 @@
+import logging
+import subprocess  # nosec - disable B404:import-subprocess check
+import sys
+import os
+from pathlib import Path
+import platform
+
+
+def clone_repo(repo_url: str, revision: str = None, add_to_sys_path: bool = True) -> Path:
+    repo_path = Path(repo_url.split("/")[-1].replace(".git", ""))
+
+    if not repo_path.exists():
+        try:
+            subprocess.run(["git", "clone", repo_url], check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
+        except Exception as exc:
+            print(f"Failed to clone the repository: {exc.stderr}")
+            raise
+
+        if revision:
+            subprocess.run(["git", "checkout", revision], cwd=str(repo_path), check=True)
+    if add_to_sys_path and str(repo_path.resolve()) not in sys.path:
+        sys.path.insert(0, str(repo_path.resolve()))
+
+    return repo_path
+
+
+def optimum_cli(model_id, output_dir, show_command=True, additional_args: dict[str, str] = None, debug_logs=False):
+    export_command = f"optimum-cli export openvino --model {model_id} {output_dir}"
+    if additional_args is not None:
+        for arg, value in additional_args.items():
+            export_command += f" --{arg}"
+            if value:
+                export_command += f" {value}"
+
+    if show_command:
+        from IPython.display import Markdown, display
+
+        display(Markdown("**Export command:**"))
+        display(Markdown(f"`{export_command}`"))
+
+    transofrmers_loglevel = None
+    if debug_logs:
+        transofrmers_loglevel = os.environ.pop("TRANSFORMERS_VERBOSITY", None)
+        os.environ["TRANSFORMERS_VERBOSITY"] = "debug"
+
+    try:
+        subprocess.run(export_command.split(" "), shell=(platform.system() == "Windows"), check=True, capture_output=True)
+    except subprocess.CalledProcessError as exc:
+        logger = logging.getLogger()
+        logger.error(f"Command failed with exit code {exc.returncode}")
+
+        if exc.stdout:
+            logger.error(f"STDOUT:\n{exc.stdout.decode(errors='replace')}")
+
+        if exc.stderr:
+            logger.error(f"STDERR:\n{exc.stderr.decode(errors='replace')}")
+
+        if transofrmers_loglevel is not None:
+            os.environ["TRANSFORMERS_VERBOSITY"] = transofrmers_loglevel
+        raise exc
+    finally:
+        if transofrmers_loglevel is not None:
+            os.environ["TRANSFORMERS_VERBOSITY"] = transofrmers_loglevel
diff --git a/notebooks/funasr-nano/funasr_fixed.ipynb b/notebooks/funasr-nano/funasr_fixed.ipynb
new file mode 100644
index 00000000000..47bd6342352
--- /dev/null
+++ b/notebooks/funasr-nano/funasr_fixed.ipynb
@@ -0,0 +1,1278 @@
+{
+ "cells": [
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# End-to-End Speech Recognition with FunASR Nano and OpenVINO\n",
+    "\n",
+    "[FunASR Nano](https://huggingface.co/FunAudioLLM/Fun-ASR-Nano-2512) is an end-to-end speech recognition large model launched by Tongyi Lab. It is trained on tens of millions of hours of real speech data, supporting low-latency real-time transcription across 31 languages. It excels in vertical domains such as education and finance, accurately recognizing professional terminology and regional accents.\n",
+    "\n",
+    "**Model architecture** -- FunASR Nano (~800M parameters) is a **multimodal audio-language model** with four components:\n",
+    "\n",
+    "| Component | Role |\n",
+    "|---|---|\n",
+    "| **Audio Frontend (WavFrontend)** | Extracts Fbank features from raw waveform (mel-frequency filterbank) |\n",
+    "| **Audio Encoder** | Converts audio features into audio embeddings |\n",
+    "| **Text Embeddings** | Standard token embeddings for the LLM vocabulary |\n",
+    "| **Language Model (Qwen3-0.6B)** | Generates transcription from merged audio + text embeddings |\n",
+    "\n",
+    "The pipeline works as: **Audio -> Frontend -> Encoder -> Embeddings merge with text prompt -> LLM -> Transcribed text**\n",
+    "\n",
+    "In this tutorial we demonstrate how to convert, run, and optimize FunASR Nano using **OpenVINO** and discuss **OpenVINO GenAI** integration.\n",
+    "\n",
+    "#### Table of contents:\n",
+    "\n",
+    "- [1. Environment Setup](#1.-Environment-Setup)\n",
+    "- [2. Hugging Face Authentication](#2.-Hugging-Face-Authentication)\n",
+    "- [3. Model Download and Analysis](#3.-Model-Download-and-Analysis)\n",
+    "- [4. Conversion to OpenVINO IR](#4.-Conversion-to-OpenVINO-IR)\n",
+    "- [5. OpenVINO Runtime Inference](#5.-OpenVINO-Runtime-Inference)\n",
+    "- [6. Multi-Device Inference (CPU / GPU / NPU)](#6.-Multi-Device-Inference-(CPU-/-GPU-/-NPU))\n",
+    "  - [6.1 CPU Inference](#6.1-CPU-Inference)\n",
+    "  - [6.2 GPU Inference](#6.2-GPU-Inference)\n",
+    "  - [6.3 NPU Inference](#6.3-NPU-Inference)\n",
+    "- [7. OpenVINO GenAI Integration](#7.-OpenVINO-GenAI-Integration)\n",
+    "- [8. Interactive Demo](#8.-Interactive-Demo)"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 1. Environment Setup\n",
+    "[back to top ⬆️](#Table-of-contents:)\n",
+    "\n",
+    "Install all required dependencies: OpenVINO, OpenVINO GenAI, PyTorch, FunASR, and audio processing libraries."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "notebook_utils.py already exists\n",
+      "cmd_helper.py already exists\n",
+      "pip_helper.py already exists\n",
+      "ov_funasr_helper.py already exists\n",
+      "gradio_helper.py already exists\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Fetch utility modules from openvino_notebooks repository\n",
+    "import requests\n",
+    "from pathlib import Path\n",
+    "\n",
+    "utils = {\n",
+    "    # General OpenVINO notebook utilities\n",
+    "    \"notebook_utils.py\": \"https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/notebook_utils.py\",\n",
+    "    \"cmd_helper.py\":     \"https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/cmd_helper.py\",\n",
+    "    \"pip_helper.py\":     \"https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/utils/pip_helper.py\",\n",
+    "    # FunASR-specific helpers\n",
+    "    \"ov_funasr_helper.py\": \"https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/notebooks/funasr-nano/ov_funasr_helper.py\",\n",
+    "    \"gradio_helper.py\":    \"https://raw.githubusercontent.com/openvinotoolkit/openvino_notebooks/latest/notebooks/funasr-nano/gradio_helper.py\",\n",
+    "}\n",
+    "\n",
+    "for filename, url in utils.items():\n",
+    "    if not Path(filename).exists():\n",
+    "        r = requests.get(url=url)\n",
+    "        r.raise_for_status()\n",
+    "        Path(filename).write_text(r.text)\n",
+    "        print(f\"Downloaded {filename}\")\n",
+    "    else:\n",
+    "        print(f\"{filename} already exists\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from cmd_helper import clone_repo\n",
+    "from pip_helper import pip_install\n",
+    "import platform\n",
+    "\n",
+    "# Uninstall potentially conflicting packages before clean install\n",
+    "!pip uninstall -y -q torch torchaudio optimum-intel optimum\n",
+    "\n",
+    "pip_install(\n",
+    "    \"-q\",\n",
+    "    \"--extra-index-url\",\n",
+    "    \"https://download.pytorch.org/whl/cpu\",\n",
+    "    \"torch\",\n",
+    "    \"nncf\",\n",
+    "    \"torchaudio\",\n",
+    "    \"openvino==2025.3.0\",        # optimum-intel 1.26.x requires <2025.4\n",
+    "    \"openvino-genai==2025.3.0.0\",\n",
+    "    \"optimum==2.1.0\",\n",
+    "    \"optimum-intel==1.26.1\",     # last version compatible with openvino 2025.3\n",
+    "    \"transformers>=4.51,<4.56\",  # 4.51+ for Qwen3; <4.56 for optimum-intel 1.26\n",
+    "    \"funasr>=1.2.7\",\n",
+    "    \"gradio\",\n",
+    "    \"huggingface_hub\",\n",
+    "    \"librosa\",\n",
+    ")\n",
+    "\n",
+    "# Clone the Fun-ASR repository (contains model.py needed for model loading)\n",
+    "repo_dir = Path(\"Fun-ASR\")\n",
+    "revision = \"efe63c122929bcca095fedc537c3081c5c4ee062\"\n",
+    "clone_repo(\"https://github.com/FunAudioLLM/Fun-ASR.git\", revision)\n",
+    "\n",
+    "if platform.system() == \"Darwin\":\n",
+    "    pip_install(\"numpy<2.0\")"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 2. Hugging Face Authentication\n",
+    "[back to top ⬆️](#Table-of-contents:)\n",
+    "\n",
+    "FunASR Nano is a **public model** — no authentication token is required to download it. However, if you work with gated models in the future, you can authenticate using one of these methods:\n",
+    "\n",
+    "- Set an environment variable: `export HF_TOKEN=your_token_here`\n",
+    "- Or run: `huggingface-cli login`\n",
+    "\n",
+    "The cell below will use the token from the environment if available."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "No HF_TOKEN found — proceeding without authentication (OK for public models)\n"
+     ]
+    }
+   ],
+   "source": [
+    "import os\n",
+    "from huggingface_hub import login\n",
+    "\n",
+    "token = os.getenv(\"HF_TOKEN\")\n",
+    "if token:\n",
+    "    login(token=token, add_to_git_credential=False)\n",
+    "    print(\"Logged into Hugging Face using HF_TOKEN\")\n",
+    "else:\n",
+    "    print(\"No HF_TOKEN found — proceeding without authentication (OK for public models)\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 3. Model Download and Analysis\n",
+    "[back to top ⬆️](#Table-of-contents:)\n",
+    "\n",
+    "FunASR Nano is available in two variants:\n",
+    "\n",
+    "| Model | Languages | Training Data |\n",
+    "|---|---|---|\n",
+    "| **Fun-ASR-Nano-2512** | Chinese, English, Japanese + 7 dialects & 26 accents | Tens of millions of hours |\n",
+    "| **Fun-ASR-MLT-Nano-2512** | 31 languages (incl. East/Southeast Asian, European) | Hundreds of thousands of hours |\n",
+    "\n",
+    "Both models share the same architecture (~800M parameters) based on **Qwen3-0.6B** as the language model backbone.\n",
+    "\n",
+    "### Model architecture detail\n",
+    "\n",
+    "The model processes audio through a multi-stage pipeline:\n",
+    "\n",
+    "1. **WavFrontend**: Extracts 80-dimensional mel-filterbank features at 10ms frame shift\n",
+    "2. **Audio Encoder**: Convolutional + Transformer layers that downsample and encode audio features\n",
+    "3. **Audio Adaptor**: Projects encoder output to the LLM hidden dimension\n",
+    "4. **Embedding Merge**: Audio embeddings replace placeholder tokens in the text prompt, then are concatenated with text embeddings\n",
+    "5. **Qwen3-0.6B LLM**: Autoregressive decoder that generates the transcription\n",
+    "\n",
+    "**Model inputs:**\n",
+    "- Raw audio waveform (WAV, MP3, etc.) at any sample rate (resampled internally to 16kHz)\n",
+    "- Text prompt template with `<|startofspeech|>...<|endofspeech|>` markers\n",
+    "\n",
+    "**Model outputs:**\n",
+    "- Transcribed text string"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "500b1db8193c4d9799062200347cb54b",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Dropdown(description='Model:', options=('FunAudioLLM/Fun-ASR-Nano-2512', 'FunAudioLLM/Fun-ASR-MLT-Nano-2512'),…"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import ipywidgets as widgets\n",
+    "from pathlib import Path\n",
+    "\n",
+    "model_ids = [\"FunAudioLLM/Fun-ASR-Nano-2512\", \"FunAudioLLM/Fun-ASR-MLT-Nano-2512\"]\n",
+    "\n",
+    "model_selector = widgets.Dropdown(\n",
+    "    options=model_ids,\n",
+    "    default=model_ids[0],\n",
+    "    description=\"Model:\",\n",
+    ")\n",
+    "\n",
+    "model_selector"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "88d624813c4a43a1b3965cda0db041fc",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Fetching 21 files:   0%|          | 0/21 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Model downloaded to: Fun-ASR-Nano-2512\n",
+      "\n",
+      "Model directory contents:\n",
+      "  .cache/\n",
+      "  .gitattributes                               0.00 MB\n",
+      "  Qwen3-0.6B/\n",
+      "  README.md                                    0.01 MB\n",
+      "  README_zh.md                                 0.01 MB\n",
+      "  config.yaml                                  0.00 MB\n",
+      "  configuration.json                           0.00 MB\n",
+      "  example/\n",
+      "  images/\n",
+      "  model.pt                                  1879.83 MB\n",
+      "  multilingual.tiktoken                        0.87 MB\n"
+     ]
+    }
+   ],
+   "source": [
+    "from huggingface_hub import snapshot_download\n",
+    "\n",
+    "model_name = model_selector.value.split(\"/\")[-1]\n",
+    "model_dir = Path(model_name)\n",
+    "snapshot_download(repo_id=model_selector.value, local_dir=model_dir)\n",
+    "\n",
+    "print(f\"Model downloaded to: {model_dir}\")\n",
+    "print(f\"\\nModel directory contents:\")\n",
+    "for p in sorted(model_dir.iterdir()):\n",
+    "    if p.is_file():\n",
+    "        size_mb = p.stat().st_size / (1024 * 1024)\n",
+    "        print(f\"  {p.name:40s} {size_mb:8.2f} MB\")\n",
+    "    else:\n",
+    "        print(f\"  {p.name}/\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Downloading Model from https://www.modelscope.cn to directory: /home/pkrzemin/.cache/modelscope/hub/models/FunAudioLLM/Fun-ASR-Nano-2512\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "WARNING:root:trust_remote_code: True\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Loading remote code successfully: model\n",
+      "============================================================\n",
+      "FunASR Nano — Model Architecture Analysis\n",
+      "============================================================\n",
+      "\n",
+      "[Audio Frontend]\n",
+      "  Type:         WavFrontend\n",
+      "  Sample rate:  16000 Hz\n",
+      "  Frame shift:  10 ms\n",
+      "  Frame length: 25 ms\n",
+      "  Mel bins:     80\n",
+      "  LFR M/N:      7/6\n",
+      "\n",
+      "[Audio Encoder]\n",
+      "  Type: SenseVoiceEncoderSmall\n",
+      "  Parameters: 221.1M\n",
+      "\n",
+      "[Audio Adaptor]\n",
+      "  Type: Transformer\n",
+      "  Parameters: 12.6M\n",
+      "\n",
+      "[Language Model]\n",
+      "  Type:          Qwen3ForCausalLM\n",
+      "  Config class:  ['Qwen3ForCausalLM']\n",
+      "  Hidden size:   1024\n",
+      "  Layers:        28\n",
+      "  Attention heads: 16\n",
+      "  KV heads:      8\n",
+      "  Vocab size:    151936\n",
+      "\n",
+      "[Total model parameters: 829.8M]\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Inspect the PyTorch model structure\n",
+    "import sys, json\n",
+    "sys.path.insert(0, str(Path(\"Fun-ASR\")))  # model.py lives in the cloned Fun-ASR repo\n",
+    "\n",
+    "from model import FunASRNano\n",
+    "\n",
+    "pt_model, kwargs = FunASRNano.from_pretrained(model=model_selector.value, device=\"cpu\")\n",
+    "pt_model.eval()\n",
+    "\n",
+    "print(\"=\" * 60)\n",
+    "print(\"FunASR Nano — Model Architecture Analysis\")\n",
+    "print(\"=\" * 60)\n",
+    "\n",
+    "# Frontend info\n",
+    "frontend = kwargs.get(\"frontend\")\n",
+    "if frontend:\n",
+    "    print(f\"\\n[Audio Frontend]\")\n",
+    "    print(f\"  Type:         WavFrontend\")\n",
+    "    print(f\"  Sample rate:  {frontend.fs} Hz\")\n",
+    "    print(f\"  Frame shift:  {frontend.frame_shift} ms\")\n",
+    "    print(f\"  Frame length: {frontend.frame_length} ms\")\n",
+    "    print(f\"  Mel bins:     {frontend.n_mels}\")\n",
+    "    print(f\"  LFR M/N:      {frontend.lfr_m}/{frontend.lfr_n}\")\n",
+    "\n",
+    "# Audio encoder\n",
+    "print(f\"\\n[Audio Encoder]\")\n",
+    "print(f\"  Type: {pt_model.audio_encoder.__class__.__name__}\")\n",
+    "total_enc_params = sum(p.numel() for p in pt_model.audio_encoder.parameters())\n",
+    "print(f\"  Parameters: {total_enc_params / 1e6:.1f}M\")\n",
+    "\n",
+    "# Audio adaptor\n",
+    "if hasattr(pt_model, 'audio_adaptor'):\n",
+    "    print(f\"\\n[Audio Adaptor]\")\n",
+    "    print(f\"  Type: {pt_model.audio_adaptor.__class__.__name__}\")\n",
+    "    total_adp_params = sum(p.numel() for p in pt_model.audio_adaptor.parameters())\n",
+    "    print(f\"  Parameters: {total_adp_params / 1e6:.1f}M\")\n",
+    "\n",
+    "# LLM\n",
+    "print(f\"\\n[Language Model]\")\n",
+    "print(f\"  Type:          {pt_model.llm.__class__.__name__}\")\n",
+    "print(f\"  Config class:  {pt_model.llm.config.architectures}\")\n",
+    "print(f\"  Hidden size:   {pt_model.llm.config.hidden_size}\")\n",
+    "print(f\"  Layers:        {pt_model.llm.config.num_hidden_layers}\")\n",
+    "print(f\"  Attention heads: {pt_model.llm.config.num_attention_heads}\")\n",
+    "print(f\"  KV heads:      {pt_model.llm.config.num_key_value_heads}\")\n",
+    "print(f\"  Vocab size:    {pt_model.llm.config.vocab_size}\")\n",
+    "\n",
+    "total_params = sum(p.numel() for p in pt_model.parameters())\n",
+    "print(f\"\\n[Total model parameters: {total_params / 1e6:.1f}M]\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Model I/O shapes analysis\n",
+    "\n",
+    "Let's inspect the tensor shapes at each stage of the pipeline using a sample audio file."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Sample audio: Fun-ASR-Nano-2512/example/en.mp3\n",
+      "\n",
+      "--- Tensor shapes through the pipeline ---\n",
+      "[Frontend output]\n",
+      "  speech (Fbank features): torch.Size([1, 120, 560])  (batch, time_frames, feat_dim)\n",
+      "  speech_lengths:          tensor([120], dtype=torch.int32)\n",
+      "\n",
+      "[Encoder output]\n",
+      "  encoder_out:      torch.Size([1, 120, 512])  (batch, time_frames, hidden_dim)\n",
+      "  encoder_out_lens: tensor([120], dtype=torch.int32)\n",
+      "\n",
+      "[Adaptor output]\n",
+      "  adapted_out:      torch.Size([1, 120, 1024])  (batch, time_frames, llm_hidden_dim)\n",
+      "  adapted_out_lens: tensor([120], dtype=torch.int32)\n",
+      "\n",
+      "[Text embeddings]\n",
+      "  input_ids:    torch.Size([1, 2])\n",
+      "  text_embeds:  torch.Size([1, 2, 1024])  (batch, seq_len, llm_hidden_dim)\n",
+      "\n",
+      "[LLM output]\n",
+      "  Generates tokens autoregressively -> decoded to text string\n"
+     ]
+    }
+   ],
+   "source": [
+    "import torch\n",
+    "from funasr.utils.load_utils import extract_fbank, load_audio_text_image_video\n",
+    "\n",
+    "# Load a sample audio file\n",
+    "wav_path = str(model_dir / \"example\" / \"en.mp3\")\n",
+    "print(f\"Sample audio: {wav_path}\")\n",
+    "\n",
+    "data_src = load_audio_text_image_video(wav_path, fs=frontend.fs)\n",
+    "speech, speech_lengths = extract_fbank(\n",
+    "    data_src, data_type=\"sound\", frontend=frontend, is_final=True\n",
+    ")  # speech: [B, T, D]\n",
+    "\n",
+    "print(f\"\\n--- Tensor shapes through the pipeline ---\")\n",
+    "print(f\"[Frontend output]\")\n",
+    "print(f\"  speech (Fbank features): {speech.shape}  (batch, time_frames, feat_dim)\")\n",
+    "print(f\"  speech_lengths:          {speech_lengths}\")\n",
+    "\n",
+    "# Run through encoder (expects [B, T, D])\n",
+    "with torch.no_grad():\n",
+    "    encoder_out, encoder_out_lens = pt_model.audio_encoder(speech, speech_lengths)\n",
+    "\n",
+    "print(f\"\\n[Encoder output]\")\n",
+    "print(f\"  encoder_out:      {encoder_out.shape}  (batch, time_frames, hidden_dim)\")\n",
+    "print(f\"  encoder_out_lens: {encoder_out_lens}\")\n",
+    "\n",
+    "# Run through adaptor\n",
+    "if hasattr(pt_model, 'audio_adaptor'):\n",
+    "    with torch.no_grad():\n",
+    "        adapted_out, adapted_lens = pt_model.audio_adaptor(encoder_out, encoder_out_lens)\n",
+    "    print(f\"\\n[Adaptor output]\")\n",
+    "    print(f\"  adapted_out:      {adapted_out.shape}  (batch, time_frames, llm_hidden_dim)\")\n",
+    "    print(f\"  adapted_out_lens: {adapted_lens}\")\n",
+    "\n",
+    "# Token embeddings\n",
+    "tokenizer = kwargs[\"tokenizer\"]\n",
+    "sample_text = \"Hello world\"\n",
+    "tokens = tokenizer.encode(sample_text, return_tensors=\"pt\")\n",
+    "with torch.no_grad():\n",
+    "    text_embeds = pt_model.llm.model.get_input_embeddings()(tokens)\n",
+    "print(f\"\\n[Text embeddings]\")\n",
+    "print(f\"  input_ids:    {tokens.shape}\")\n",
+    "print(f\"  text_embeds:  {text_embeds.shape}  (batch, seq_len, llm_hidden_dim)\")\n",
+    "\n",
+    "print(f\"\\n[LLM output]\")\n",
+    "print(f\"  Generates tokens autoregressively -> decoded to text string\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "PyTorch model freed from memory\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Free PyTorch model memory before OpenVINO conversion\n",
+    "import gc\n",
+    "\n",
+    "del pt_model\n",
+    "gc.collect()\n",
+    "print(\"PyTorch model freed from memory\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 4. Conversion to OpenVINO IR\n",
+    "[back to top ⬆️](#Table-of-contents:)\n",
+    "\n",
+    "FunASR Nano is a **multi-component model** that cannot be exported as a single OpenVINO IR. Instead, we convert each component separately:\n",
+    "\n",
+    "| Component | File | Conversion method |\n",
+    "|---|---|---|\n",
+    "| Text Embeddings | `openvino_text_embeddings_model.xml` | `ov.convert_model` from PyTorch embedding layer |\n",
+    "| Audio Encoder + Adaptor | `openvino_encoder_model.xml` | `ov.convert_model` with wrapped forward |\n",
+    "| Language Model (Qwen3) | `openvino_model.xml` | `ov.convert_model` → make stateful (KV-cache) |\n",
+    "\n",
+    "The conversion also saves the tokenizer and frontend configuration so the pipeline is self-contained.\n",
+    "\n",
+    "The helper `convert_funasr()` from the OpenVINO Notebooks repository handles the full conversion pipeline."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "ov_funasr_helper.py already patched\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Apply Python 3.9 compatibility patch to ov_funasr_helper.py\n",
+    "# The helper uses X | Y union type syntax (PEP 604) which requires Python 3.10+.\n",
+    "# Adding `from __future__ import annotations` makes annotations lazily evaluated,\n",
+    "# fixing the runtime TypeError on Python 3.9.\n",
+    "helper_path = Path(\"ov_funasr_helper.py\")\n",
+    "content = helper_path.read_text()\n",
+    "if not content.startswith(\"from __future__ import annotations\"):\n",
+    "    helper_path.write_text(\"from __future__ import annotations\\n\\n\" + content)\n",
+    "    print(\"Applied Python 3.9 compatibility patch to ov_funasr_helper.py\")\n",
+    "else:\n",
+    "    print(\"ov_funasr_helper.py already patched\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n",
+      "To disable this warning, you can either:\n",
+      "\t- Avoid using `tokenizers` before the fork if possible\n",
+      "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[OK] Fun-ASR-Nano-2512 model already converted. You can find results in Fun-ASR-Nano-2512-ov\n",
+      "\n",
+      "OpenVINO IR files in Fun-ASR-Nano-2512-ov:\n",
+      "  openvino_encoder_model.xml                    weights: 445.8 MB\n",
+      "  openvino_model.xml                            weights: 1136.9 MB\n",
+      "  openvino_text_embeddings_model.xml            weights: 296.8 MB\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/pkrzemin/tasks/benchmark/venv/lib/python3.9/site-packages/openvino/runtime/__init__.py:10: DeprecationWarning: The `openvino.runtime` module is deprecated and will be removed in the 2026.0 release. Please replace `openvino.runtime` with `openvino`.\n",
+      "  warnings.warn(\n"
+     ]
+    }
+   ],
+   "source": [
+    "from ov_funasr_helper import convert_funasr\n",
+    "\n",
+    "ov_model_dir = Path(model_name + \"-ov\")\n",
+    "convert_funasr(str(model_dir), ov_model_dir)\n",
+    "\n",
+    "# Show resulting IR files\n",
+    "print(f\"\\nOpenVINO IR files in {ov_model_dir}:\")\n",
+    "for p in sorted(ov_model_dir.glob(\"*.xml\")):\n",
+    "    bin_path = p.with_suffix(\".bin\")\n",
+    "    bin_size = bin_path.stat().st_size / (1024 * 1024) if bin_path.exists() else 0\n",
+    "    print(f\"  {p.name:45s} weights: {bin_size:.1f} MB\")"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Inspect OpenVINO IR models\n",
+    "\n",
+    "Let's verify the converted IR files and examine their input/output specifications."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "==================================================\n",
+      "[Text Embeddings] — openvino_text_embeddings_model.xml\n",
+      "==================================================\n",
+      "  Inputs (1):\n",
+      "    input                               shape=[?,?]  dtype=<Type: 'int32_t'>\n",
+      "  Outputs (1):\n",
+      "    (unnamed)                           shape=[?,?,1024]  dtype=<Type: 'float32'>\n",
+      "\n",
+      "==================================================\n",
+      "[Audio Encoder] — openvino_encoder_model.xml\n",
+      "==================================================\n",
+      "  Inputs (2):\n",
+      "    speech                              shape=[?,?,?]  dtype=<Type: 'float32'>\n",
+      "    speech_lengths                      shape=[?]  dtype=<Type: 'int32_t'>\n",
+      "  Outputs (2):\n",
+      "    (unnamed)                           shape=[?,?,1024]  dtype=<Type: 'float32'>\n",
+      "    lengths.1                           shape=[?]  dtype=<Type: 'int32_t'>\n",
+      "\n",
+      "==================================================\n",
+      "[Language Model] — openvino_model.xml\n",
+      "==================================================\n",
+      "  Inputs (4):\n",
+      "    attention_mask                      shape=[?,?]  dtype=<Type: 'int64_t'>\n",
+      "    position_ids                        shape=[?,?]  dtype=<Type: 'int64_t'>\n",
+      "    inputs_embeds                       shape=[?,?,1024]  dtype=<Type: 'float32'>\n",
+      "    beam_idx                            shape=[?]  dtype=<Type: 'int32_t'>\n",
+      "  Outputs (1):\n",
+      "    logits                              shape=[?,?,151936]  dtype=<Type: 'float32'>\n",
+      "  Stateful: Yes (56 state variables — KV-cache hidden inside model)\n"
+     ]
+    }
+   ],
+   "source": [
+    "import openvino as ov\n",
+    "\n",
+    "core = ov.Core()\n",
+    "\n",
+    "ir_files = {\n",
+    "    \"Text Embeddings\": ov_model_dir / \"openvino_text_embeddings_model.xml\",\n",
+    "    \"Audio Encoder\":   ov_model_dir / \"openvino_encoder_model.xml\",\n",
+    "    \"Language Model\":  ov_model_dir / \"openvino_model.xml\",\n",
+    "}\n",
+    "\n",
+    "def tensor_name(t):\n",
+    "    try:\n",
+    "        return t.get_any_name()\n",
+    "    except RuntimeError:\n",
+    "        return \"(unnamed)\"\n",
+    "\n",
+    "for name, xml_path in ir_files.items():\n",
+    "    model = core.read_model(xml_path)\n",
+    "    print(f\"\\n{'=' * 50}\")\n",
+    "    print(f\"[{name}] — {xml_path.name}\")\n",
+    "    print(f\"{'=' * 50}\")\n",
+    "    print(f\"  Inputs ({len(model.inputs)}):\")\n",
+    "    for inp in model.inputs:\n",
+    "        print(f\"    {tensor_name(inp):35s} shape={inp.get_partial_shape()}  dtype={inp.get_element_type()}\")\n",
+    "    print(f\"  Outputs ({len(model.outputs)}):\")\n",
+    "    for out in model.outputs[:5]:  # show first 5 to avoid flooding\n",
+    "        print(f\"    {tensor_name(out):35s} shape={out.get_partial_shape()}  dtype={out.get_element_type()}\")\n",
+    "    if len(model.outputs) > 5:\n",
+    "        print(f\"    ... and {len(model.outputs) - 5} more outputs (KV-cache)\")\n",
+    "    if len(model.get_sinks()) > 0:\n",
+    "        print(f\"  Stateful: Yes ({len(model.get_sinks())} state variables — KV-cache hidden inside model)\")\n",
+    "    del model"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 5. OpenVINO Runtime Inference\n",
+    "[back to top ⬆️](#Table-of-contents:)\n",
+    "\n",
+    "Now we load the converted models and run inference using the `OVFunASRNano` wrapper class. This class:\n",
+    "- Loads all three IR components (text embeddings, encoder, LLM)\n",
+    "- Orchestrates the full pipeline: audio preprocessing -> encoding -> embedding merge -> LLM generation\n",
+    "- Uses `OVModelForCausalLMWithEmbed` to support `inputs_embeds` input for the LLM (needed for multimodal fusion)\n",
+    "\n",
+    "### Select Inference Device"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "9e45878a2b434aaba08b7da58d23a514",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Dropdown(description='Device:', options=('CPU',), value='CPU')"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from notebook_utils import device_widget\n",
+    "\n",
+    "device = device_widget(\"CPU\", exclude=[\"AUTO\"])\n",
+    "\n",
+    "device"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Device-specific LLM configuration\n",
+    "llm_ov_config = {\n",
+    "    \"CPU\": {},\n",
+    "    \"GPU\": {\"ACTIVATIONS_SCALE_FACTOR\": \"8.0\"},\n",
+    "    \"NPU\": {\n",
+    "        \"ACTIVATIONS_SCALE_FACTOR\": \"8.0\",\n",
+    "        \"NPU_USE_NPUW\": \"YES\",\n",
+    "        \"NPUW_LLM\": \"YES\",\n",
+    "        \"NPUW_ONLINE_PIPELINE\": \"NONE\",\n",
+    "        \"MAX_PROMPT_LEN\": 1024,\n",
+    "        \"NPUW_LLM_MIN_RESPONSE_LEN\": 512,\n",
+    "    },\n",
+    "}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[OK] Tokenizer loaded from Fun-ASR-Nano-2512-ov\n",
+      "[OK] Frontend and inference config loaded from Fun-ASR-Nano-2512-ov/frontend_config.json\n"
+     ]
+    }
+   ],
+   "source": [
+    "from ov_funasr_helper import OVFunASRNano\n",
+    "\n",
+    "ov_model = OVFunASRNano(ov_model_dir, device=device.value, llm_ov_config=llm_ov_config[device.value])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Run Speech Recognition\n",
+    "\n",
+    "Let's transcribe a sample English audio file included with the model."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Audio file: Fun-ASR-Nano-2512/example/en.mp3\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Transcription: The tribal chieftain called for the boy, and presented him with fifty pieces of gold.\n",
+      "\n",
+      "Inference time: 1.50s\n",
+      "Audio duration:  7.20s\n",
+      "Real-time factor: 0.21x\n"
+     ]
+    }
+   ],
+   "source": [
+    "import time\n",
+    "\n",
+    "# Transcribe English sample\n",
+    "wav_path_en = str(model_dir / \"example\" / \"en.mp3\")\n",
+    "print(f\"Audio file: {wav_path_en}\\n\")\n",
+    "\n",
+    "start = time.perf_counter()\n",
+    "res, meta_data = ov_model.inference(data_in=[wav_path_en])\n",
+    "elapsed = time.perf_counter() - start\n",
+    "\n",
+    "text = res[0][\"text\"]\n",
+    "print(f\"Transcription: {text}\")\n",
+    "print(f\"\\nInference time: {elapsed:.2f}s\")\n",
+    "if \"batch_data_time\" in meta_data:\n",
+    "    print(f\"Audio duration:  {meta_data['batch_data_time']:.2f}s\")\n",
+    "    print(f\"Real-time factor: {elapsed / meta_data['batch_data_time']:.2f}x\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Chinese transcription: 开饭时间早上九点至下午五点。\n",
+      "Inference time: 0.82s\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Transcribe Chinese sample (if available)\n",
+    "wav_path_zh = str(model_dir / \"example\" / \"zh.mp3\")\n",
+    "if Path(wav_path_zh).exists():\n",
+    "    start = time.perf_counter()\n",
+    "    res_zh, meta_zh = ov_model.inference(data_in=[wav_path_zh])\n",
+    "    elapsed = time.perf_counter() - start\n",
+    "    print(f\"Chinese transcription: {res_zh[0]['text']}\")\n",
+    "    print(f\"Inference time: {elapsed:.2f}s\")\n",
+    "else:\n",
+    "    print(\"Chinese sample not available in this model variant\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 6. Multi-Device Inference (CPU / GPU / NPU)\n",
+    "[back to top ⬆️](#Table-of-contents:)\n",
+    "\n",
+    "OpenVINO supports multiple hardware backends. The following subsections validate inference on CPU and provide ready-to-run code for GPU and NPU devices."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Available OpenVINO devices: ['CPU']\n",
+      "\n",
+      "=======================================================\n",
+      "6.1  CPU Inference\n",
+      "=======================================================\n",
+      "[OK] Tokenizer loaded from Fun-ASR-Nano-2512-ov\n",
+      "[OK] Frontend and inference config loaded from Fun-ASR-Nano-2512-ov/frontend_config.json\n",
+      "  Result: The tribal chieftain called for the boy and presented him with fifty pieces of gold.\n",
+      "  Time:   1.58s\n"
+     ]
+    }
+   ],
+   "source": [
+    "import openvino as ov\n",
+    "import time\n",
+    "\n",
+    "core = ov.Core()\n",
+    "available_devices = core.available_devices\n",
+    "print(f\"Available OpenVINO devices: {available_devices}\")\n",
+    "\n",
+    "wav_test = str(model_dir / \"example\" / \"en.mp3\")\n",
+    "\n",
+    "# --- CPU inference (always available) ----------------------------------------\n",
+    "print(\"\\n\" + \"=\" * 55)\n",
+    "print(\"6.1  CPU Inference\")\n",
+    "print(\"=\" * 55)\n",
+    "\n",
+    "ov_cpu = OVFunASRNano(ov_model_dir, device=\"CPU\", llm_ov_config={})\n",
+    "start = time.perf_counter()\n",
+    "res_cpu, _ = ov_cpu.inference(data_in=[wav_test])\n",
+    "cpu_time = time.perf_counter() - start\n",
+    "print(f\"  Result: {res_cpu[0]['text']}\")\n",
+    "print(f\"  Time:   {cpu_time:.2f}s\")\n",
+    "del ov_cpu"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 6.2  GPU Inference\n",
+    "\n",
+    "Intel Xe / Arc / Iris Xe GPUs are supported via the `GPU` OpenVINO plugin.\n",
+    "Run the cell below on a machine with an Intel GPU to compare throughput against CPU.\n",
+    "\n",
+    "Key configuration knob:\n",
+    "- `ACTIVATIONS_SCALE_FACTOR` (`\"8.0\"`) - scales activations to reduce numeric range overflow on GPU, improving accuracy for quantized attention layers.\n",
+    "\n",
+    "> **Tip**: The first run may be slower due to kernel compilation. Use `CACHE_DIR` to persist compiled kernels across sessions."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "No GPU device found. Available devices: ['CPU']\n",
+      "To run GPU inference, use a machine with an Intel GPU (Arc, Iris Xe, UHD).\n"
+     ]
+    }
+   ],
+   "source": [
+    "gpu_llm_config = {\n",
+    "    \"ACTIVATIONS_SCALE_FACTOR\": \"8.0\",\n",
+    "    \"CACHE_DIR\": \".ovms_cache_gpu\",\n",
+    "}\n",
+    "\n",
+    "if \"GPU\" in core.available_devices:\n",
+    "    print(\"=\" * 55)\n",
+    "    print(\"GPU Inference\")\n",
+    "    print(\"=\" * 55)\n",
+    "    try:\n",
+    "        ov_gpu = OVFunASRNano(\n",
+    "            model_dir=ov_model_dir,\n",
+    "            device=\"GPU\",\n",
+    "            llm_ov_config=gpu_llm_config,\n",
+    "        )\n",
+    "        wav_test_en = str(model_dir / \"example\" / \"en.mp3\")\n",
+    "        wav_test_zh = str(model_dir / \"example\" / \"zh.mp3\")\n",
+    "\n",
+    "        start = time.perf_counter()\n",
+    "        res_gpu_en, _ = ov_gpu.inference(data_in=[wav_test_en])\n",
+    "        gpu_time_en = time.perf_counter() - start\n",
+    "        print(f\"  [EN] Result: {res_gpu_en[0]['text']}\")\n",
+    "        print(f\"       Time:   {gpu_time_en:.2f}s\")\n",
+    "\n",
+    "        start = time.perf_counter()\n",
+    "        res_gpu_zh, _ = ov_gpu.inference(data_in=[wav_test_zh])\n",
+    "        gpu_time_zh = time.perf_counter() - start\n",
+    "        print(f\"  [ZH] Result: {res_gpu_zh[0]['text']}\")\n",
+    "        print(f\"       Time:   {gpu_time_zh:.2f}s\")\n",
+    "\n",
+    "        del ov_gpu\n",
+    "    except Exception as e:\n",
+    "        print(f\"GPU inference failed: {e}\")\n",
+    "else:\n",
+    "    print(f\"No GPU device found. Available devices: {core.available_devices}\")\n",
+    "    print(\"To run GPU inference, use a machine with an Intel GPU (Arc, Iris Xe, UHD).\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 6.3  NPU Inference\n",
+    "\n",
+    "Intel NPU (Neural Processing Unit) is available on Intel Core Ultra (Series 1 / 2), Meteor Lake, and Lunar Lake processors.\n",
+    "\n",
+    "For FunASR Nano, **the LLM component runs on the NPU** while the audio encoder runs on CPU (NPU does not support dynamic shapes required by the encoder). This hybrid approach can reduce CPU load significantly.\n",
+    "\n",
+    "> **Note**: NPU compilation can take 30-60 seconds on the first run. Use `CACHE_DIR` to cache compiled models for instant re-use."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "No NPU device found. Available devices: ['CPU']\n",
+      "To run NPU inference, use an Intel Core Ultra (Meteor Lake / Lunar Lake) CPU.\n"
+     ]
+    }
+   ],
+   "source": [
+    "npu_llm_config = {\n",
+    "    \"CACHE_DIR\": \".ovms_cache_npu\",\n",
+    "    # Uncomment the line below for higher numerical precision on NPU layers:\n",
+    "    # \"NPU_COMPILATION_MODE_PARAMS\": \"compute-layers-with-higher-precision=Sqrt,Power,ReduceMean,Add\",\n",
+    "}\n",
+    "\n",
+    "if \"NPU\" in core.available_devices:\n",
+    "    print(\"=\" * 55)\n",
+    "    print(\"NPU Inference  (LLM → NPU | encoder → CPU)\")\n",
+    "    print(\"=\" * 55)\n",
+    "    try:\n",
+    "        ov_npu = OVFunASRNano(\n",
+    "            model_dir=ov_model_dir,\n",
+    "            device=\"NPU\",\n",
+    "            llm_ov_config=npu_llm_config,\n",
+    "        )\n",
+    "        wav_test_en = str(model_dir / \"example\" / \"en.mp3\")\n",
+    "        wav_test_zh = str(model_dir / \"example\" / \"zh.mp3\")\n",
+    "\n",
+    "        start = time.perf_counter()\n",
+    "        res_npu_en, _ = ov_npu.inference(data_in=[wav_test_en])\n",
+    "        npu_time_en = time.perf_counter() - start\n",
+    "        print(f\"  [EN] Result: {res_npu_en[0]['text']}\")\n",
+    "        print(f\"       Time:   {npu_time_en:.2f}s\")\n",
+    "\n",
+    "        start = time.perf_counter()\n",
+    "        res_npu_zh, _ = ov_npu.inference(data_in=[wav_test_zh])\n",
+    "        npu_time_zh = time.perf_counter() - start\n",
+    "        print(f\"  [ZH] Result: {res_npu_zh[0]['text']}\")\n",
+    "        print(f\"       Time:   {npu_time_zh:.2f}s\")\n",
+    "\n",
+    "        del ov_npu\n",
+    "    except Exception as e:\n",
+    "        print(f\"NPU inference failed: {e}\")\n",
+    "else:\n",
+    "    print(f\"No NPU device found. Available devices: {core.available_devices}\")\n",
+    "    print(\"To run NPU inference, use an Intel Core Ultra (Meteor Lake / Lunar Lake) CPU.\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 7. OpenVINO GenAI Integration\n",
+    "[back to top ⬆️](#Table-of-contents:)\n",
+    "\n",
+    "### Can FunASR Nano use OpenVINO GenAI?\n",
+    "\n",
+    "OpenVINO GenAI provides high-level pipeline APIs for common model types. The relevant pipelines for an ASR model are:\n",
+    "\n",
+    "| GenAI Pipeline | Architecture | Applicable to FunASR? |\n",
+    "|---|---|---|\n",
+    "| `WhisperPipeline` | Whisper encoder-decoder with cross-attention | No - Different architecture |\n",
+    "| `LLMPipeline` | Text-only autoregressive LLMs (`input_ids`) | No - Needs `inputs_embeds` |\n",
+    "| `VLMPipeline` | Vision-Language models (image + text) | No - Not audio-based |\n",
+    "\n",
+    "The following cells **actually attempt** each pipeline and capture the resulting errors to show exactly why they fail.\n",
+    "\n",
+    "---\n",
+    "\n",
+    "**Why `WhisperPipeline` fails:**\n",
+    "\n",
+    "Whisper is a self-contained encoder-decoder where the encoder processes mel spectrograms and the decoder attends to encoder outputs via cross-attention. `WhisperPipeline` expects this specific two-model structure (e.g. `encoder_model.xml` + `decoder_model.xml`).\n",
+    "\n",
+    "FunASR is fundamentally different: its audio encoder produces embeddings that are **spliced directly into the LLM's token embedding sequence** before being fed to a standard causal LM. There is no cross-attention - the architecture is closer to a Vision-Language Model than to Whisper.\n",
+    "\n",
+    "**Why `LLMPipeline` fails:**\n",
+    "\n",
+    "The Qwen3-0.6B backbone *is* a standard causal LM, so at first glance `LLMPipeline` looks promising. However `LLMPipeline.generate()` only accepts raw text or `input_ids` - it has **no `inputs_embeds` pathway**. FunASR requires passing audio embeddings as `inputs_embeds` so they can be merged with text token embeddings before the first transformer layer. Without that, audio context cannot reach the LLM."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "OpenVINO GenAI version: 2025.3.0.0-2463-3c0e2d3e7e1\n",
+      "\n",
+      "Available GenAI pipeline classes:\n",
+      "  - ContinuousBatchingPipeline\n",
+      "  - Image2ImagePipeline\n",
+      "  - InpaintingPipeline\n",
+      "  - LLMPipeline\n",
+      "  - Text2ImagePipeline\n",
+      "  - Text2SpeechPipeline\n",
+      "  - TextEmbeddingPipeline\n",
+      "  - TextRerankPipeline\n",
+      "  - VLMPipeline\n",
+      "  - WhisperPipeline\n",
+      "\n",
+      "======================================================================\n",
+      "Attempt 1: openvino_genai.WhisperPipeline(ov_model_dir, 'CPU')\n",
+      "======================================================================\n",
+      "Expected: FAILS -- WhisperPipeline needs encoder_model.xml / decoder_model.xml\n",
+      "          FunASR uses openvino_encoder_model.xml + openvino_model.xml\n",
+      "\n",
+      "  [FAIL] RuntimeError: Exception from src/inference/src/cpp/core.cpp:126:\n",
+      "Exception from src/inference/src/dev/plugin.cpp:58:\n",
+      "Check 'consumer.get_expr()->get_loop_ids() == loop_ids' failed at src/common/snippets/src/lowered/pass/move_scalar_to_consumer.cpp:34:\n",
+      "All consumers of a Scalar expression are expected to have the same loop IDs\n",
+      "\n",
+      "\n",
+      "\n",
+      "\n",
+      "======================================================================\n",
+      "Attempt 2: openvino_genai.LLMPipeline(ov_model_dir, 'CPU')\n",
+      "======================================================================\n",
+      "Expected: FAILS or gives wrong answers -- openvino_model.xml uses\n",
+      "          'inputs_embeds' not 'input_ids'; audio context is not injected.\n",
+      "\n",
+      "  Pipeline loaded. Attempting text-only generation (no audio)...\n",
+      "  [FAIL] RuntimeError: Check 'm_ireq_queue_tokenizer' failed at /home/jenkins/agent/workspace/private-ci/ie/build-linux-manylinux2014/b/repos/openvino.genai/src/cpp/src/tokenizer/tokenizer_impl.cpp:554:\n",
+      "Either openvino_tokenizer.xml was not provided or it was not loaded correctly. Tokenizer::encode is not available\n",
+      "\n",
+      "\n",
+      "======================================================================\n",
+      "Result: FunASR Nano cannot be used with any standard GenAI pipeline.\n",
+      "        The OVFunASRNano wrapper in Section 5 is the correct approach.\n",
+      "======================================================================\n"
+     ]
+    }
+   ],
+   "source": [
+    "import openvino_genai\n",
+    "\n",
+    "print(\"OpenVINO GenAI version:\", openvino_genai.__version__)\n",
+    "print()\n",
+    "print(\"Available GenAI pipeline classes:\")\n",
+    "for attr in sorted(dir(openvino_genai)):\n",
+    "    if \"Pipeline\" in attr:\n",
+    "        print(f\"  - {attr}\")\n",
+    "\n",
+    "# -----------------------------------------------------------------------------\n",
+    "# Attempt 1: WhisperPipeline on the FunASR IR directory\n",
+    "# -----------------------------------------------------------------------------\n",
+    "print()\n",
+    "print(\"=\" * 70)\n",
+    "print(\"Attempt 1: openvino_genai.WhisperPipeline(ov_model_dir, 'CPU')\")\n",
+    "print(\"=\" * 70)\n",
+    "print(\"Expected: FAILS -- WhisperPipeline needs encoder_model.xml / decoder_model.xml\")\n",
+    "print(\"          FunASR uses openvino_encoder_model.xml + openvino_model.xml\")\n",
+    "print()\n",
+    "try:\n",
+    "    whisper_pipe = openvino_genai.WhisperPipeline(str(ov_model_dir), \"CPU\")\n",
+    "    import numpy as np\n",
+    "    import soundfile as sf\n",
+    "    audio, sr = sf.read(wav_path_en, dtype=\"float32\")\n",
+    "    result = whisper_pipe.generate(audio.tolist())\n",
+    "    print(f\"  [Unexpected success] Result: {result}\")\n",
+    "except Exception as e:\n",
+    "    print(f\"  [FAIL] {type(e).__name__}: {e}\")\n",
+    "\n",
+    "# -----------------------------------------------------------------------------\n",
+    "# Attempt 2: LLMPipeline on the FunASR IR directory\n",
+    "# -----------------------------------------------------------------------------\n",
+    "print()\n",
+    "print(\"=\" * 70)\n",
+    "print(\"Attempt 2: openvino_genai.LLMPipeline(ov_model_dir, 'CPU')\")\n",
+    "print(\"=\" * 70)\n",
+    "print(\"Expected: FAILS or gives wrong answers -- openvino_model.xml uses\")\n",
+    "print(\"          'inputs_embeds' not 'input_ids'; audio context is not injected.\")\n",
+    "print()\n",
+    "try:\n",
+    "    llm_pipe = openvino_genai.LLMPipeline(str(ov_model_dir), \"CPU\")\n",
+    "    print(\"  Pipeline loaded. Attempting text-only generation (no audio)...\")\n",
+    "    text_out = llm_pipe.generate(\n",
+    "        \"Transcribe the following audio:\",\n",
+    "        openvino_genai.GenerationConfig(max_new_tokens=20),\n",
+    "    )\n",
+    "    print(f\"  Output (no audio context): {text_out!r}\")\n",
+    "    print()\n",
+    "    print(\"  NOTE: Even if loading succeeds, there is no way to pass audio into\")\n",
+    "    print(\"        LLMPipeline -- it only accepts text/input_ids, not inputs_embeds.\")\n",
+    "except Exception as e:\n",
+    "    print(f\"  [FAIL] {type(e).__name__}: {e}\")\n",
+    "\n",
+    "print()\n",
+    "print(\"=\" * 70)\n",
+    "print(\"Result: FunASR Nano cannot be used with any standard GenAI pipeline.\")\n",
+    "print(\"        The OVFunASRNano wrapper in Section 5 is the correct approach.\")\n",
+    "print(\"=\" * 70)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 8. Interactive Demo\n",
+    "[back to top ⬆️](#Table-of-contents:)\n",
+    "\n",
+    "Launch a Gradio interface for interactive audio transcription. You can upload audio files or record from microphone."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from gradio_helper import make_demo\n",
+    "\n",
+    "demo = make_demo(ov_model, model_dir)\n",
+    "\n",
+    "try:\n",
+    "    demo.launch(debug=True)\n",
+    "except Exception:\n",
+    "    demo.launch(debug=True, share=True)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.21"
+  },
+  "openvino_notebooks": {
+   "imageUrl": "https://github.com/user-attachments/assets/d55ea91b-0dd2-4a92-b6a1-3460edb41b6f",
+   "tags": {
+    "categories": [
+     "Model Demos",
+     "AI Trends"
+    ],
+    "libraries": [],
+    "other": [],
+    "tasks": [
+     "Speech Recognition"
+    ]
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/notebooks/funasr-nano/notebook_utils.py b/notebooks/funasr-nano/notebook_utils.py
new file mode 100644
index 00000000000..7b6967b6271
--- /dev/null
+++ b/notebooks/funasr-nano/notebook_utils.py
@@ -0,0 +1,756 @@
+#!/usr/bin/env python
+# coding: utf-8
+
+# In[ ]:
+
+
+import platform
+import sys
+import threading
+import time
+from os import PathLike
+from pathlib import Path
+from typing import List, NamedTuple, Optional
+
+
+# ## Files
+#
+# Load an image, download a file, download an IR model, and create a progress bar to show download progress.
+
+# In[ ]:
+
+
+def device_widget(default="AUTO", exclude=None, added=None, description="Device:"):
+    import openvino as ov
+    import ipywidgets as widgets
+
+    core = ov.Core()
+
+    supported_devices = core.available_devices + ["AUTO"]
+    exclude = exclude or []
+    if exclude:
+        for ex_device in exclude:
+            if ex_device in supported_devices:
+                supported_devices.remove(ex_device)
+
+    added = added or []
+    if added:
+        for add_device in added:
+            if add_device not in supported_devices:
+                supported_devices.append(add_device)
+
+    device = widgets.Dropdown(
+        options=supported_devices,
+        value=default,
+        description=description,
+        disabled=False,
+    )
+    return device
+
+
+def quantization_widget(default=True):
+    import ipywidgets as widgets
+
+    to_quantize = widgets.Checkbox(
+        value=default,
+        description="Quantization",
+        disabled=False,
+    )
+
+    return to_quantize
+
+
+def pip_install(*args):
+    import subprocess  # nosec - disable B404:import-subprocess check
+
+    cli_args = []
+    for arg in args:
+        cli_args.extend(str(arg).split(" "))
+    subprocess.run([sys.executable, "-m", "pip", "install", *cli_args], shell=(platform.system() == "Windows"), check=True)
+
+
+def load_image(name: str, url: str = None):
+    """
+    Loads an image by `url` and returns it as BGR numpy array. The image is
+    stored to the filesystem with name `name`. If the image file already exists
+    loads the local image.
+
+    :param name: Local path name of the image.
+    :param url: url to the image
+    :return: image as BGR numpy array
+    """
+    import cv2
+    import numpy as np
+    import requests
+
+    if not Path(name).exists():
+        # Set User-Agent to Mozilla because some websites block
+        # requests with User-Agent Python
+        response = requests.get(url, headers={"User-Agent": "Mozilla/5.0"})
+        array = np.asarray(bytearray(response.content), dtype="uint8")
+        image = cv2.imdecode(array, -1)  # Loads the image as BGR
+        cv2.imwrite(name, image)
+    else:
+        image = cv2.imread(name)
+
+    return image
+
+
+def download_file(
+    url: PathLike,
+    filename: PathLike = None,
+    directory: PathLike = None,
+    show_progress: bool = True,
+) -> PathLike:
+    """
+    Download a file from a url and save it to the local filesystem. The file is saved to the
+    current directory by default, or to `directory` if specified. If a filename is not given,
+    the filename of the URL will be used.
+
+    :param url: URL that points to the file to download
+    :param filename: Name of the local file to save. Should point to the name of the file only,
+                     not the full path. If None the filename from the url will be used
+    :param directory: Directory to save the file to. Will be created if it doesn't exist
+                      If None the file will be saved to the current working directory
+    :param show_progress: If True, show an TQDM ProgressBar
+    :param silent: If True, do not print a message if the file already exists
+    :param timeout: Number of seconds before cancelling the connection attempt
+    :return: path to downloaded file
+    """
+    from tqdm.notebook import tqdm_notebook
+    import requests
+    import urllib.parse
+
+    filename = filename or Path(urllib.parse.urlparse(url).path).name
+    chunk_size = 16384  # make chunks bigger so that not too many updates are triggered for Jupyter front-end
+
+    filename = Path(filename)
+    if len(filename.parts) > 1:
+        raise ValueError(
+            "`filename` should refer to the name of the file, excluding the directory. "
+            "Use the `directory` parameter to specify a target directory for the downloaded file."
+        )
+
+    filepath = Path(directory) / filename if directory is not None else filename
+    if filepath.exists():
+        return filepath.resolve()
+
+    # create the directory if it does not exist, and add the directory to the filename
+    if directory is not None:
+        Path(directory).mkdir(parents=True, exist_ok=True)
+
+    try:
+        response = requests.get(url=url, headers={"User-agent": "Mozilla/5.0"}, stream=True)
+        response.raise_for_status()
+    except (
+        requests.exceptions.HTTPError
+    ) as error:  # For error associated with not-200 codes. Will output something like: "404 Client Error: Not Found for url: {url}"
+        raise Exception(error) from None
+    except requests.exceptions.Timeout:
+        raise Exception(
+            "Connection timed out. If you access the internet through a proxy server, please "
+            "make sure the proxy is set in the shell from where you launched Jupyter."
+        ) from None
+    except requests.exceptions.RequestException as error:
+        raise Exception(f"File downloading failed with error: {error}") from None
+
+    # download the file if it does not exist
+    filesize = int(response.headers.get("Content-length", 0))
+    if not filepath.exists():
+        with tqdm_notebook(
+            total=filesize,
+            unit="B",
+            unit_scale=True,
+            unit_divisor=1024,
+            desc=str(filename),
+            disable=not show_progress,
+        ) as progress_bar:
+            with open(filepath, "wb") as file_object:
+                for chunk in response.iter_content(chunk_size):
+                    file_object.write(chunk)
+                    progress_bar.update(len(chunk))
+                    progress_bar.refresh()
+    else:
+        print(f"'{filepath}' already exists.")
+
+    response.close()
+
+    return filepath.resolve()
+
+
+def download_ir_model(model_xml_url: str, destination_folder: PathLike = None) -> PathLike:
+    """
+    Download IR model from `model_xml_url`. Downloads model xml and bin file; the weights file is
+    assumed to exist at the same location and name as model_xml_url with a ".bin" extension.
+
+    :param model_xml_url: URL to model xml file to download
+    :param destination_folder: Directory where downloaded model xml and bin are saved. If None, model
+                               files are saved to the current directory
+    :return: path to downloaded xml model file
+    """
+    model_bin_url = model_xml_url[:-4] + ".bin"
+    model_xml_path = download_file(model_xml_url, directory=destination_folder, show_progress=False)
+    download_file(model_bin_url, directory=destination_folder)
+    return model_xml_path
+
+
+# ## Images
+
+# ### Convert Pixel Data
+#
+# Normalize image pixel values between 0 and 1, and convert images to RGB and BGR.
+
+# In[ ]:
+
+
+def normalize_minmax(data):
+    """
+    Normalizes the values in `data` between 0 and 1
+    """
+    if data.max() == data.min():
+        raise ValueError("Normalization is not possible because all elements of" f"`data` have the same value: {data.max()}.")
+    return (data - data.min()) / (data.max() - data.min())
+
+
+def to_rgb(image_data):
+    """
+    Convert image_data from BGR to RGB
+    """
+    import cv2
+
+    return cv2.cvtColor(image_data, cv2.COLOR_BGR2RGB)
+
+
+def to_bgr(image_data):
+    """
+    Convert image_data from RGB to BGR
+    """
+    import cv2
+
+    return cv2.cvtColor(image_data, cv2.COLOR_RGB2BGR)
+
+
+# ## Videos
+
+# ### Video Player
+#
+# Custom video player to fulfill FPS requirements. You can set target FPS and output size, flip the video horizontally or skip first N frames.
+
+# In[ ]:
+
+
+class VideoPlayer:
+    """
+    Custom video player to fulfill FPS requirements. You can set target FPS and output size,
+    flip the video horizontally or skip first N frames.
+
+    :param source: Video source. It could be either camera device or video file.
+    :param size: Output frame size.
+    :param flip: Flip source horizontally.
+    :param fps: Target FPS.
+    :param skip_first_frames: Skip first N frames.
+    """
+
+    def __init__(self, source, size=None, flip=False, fps=None, skip_first_frames=0, width=1280, height=720):
+        import cv2
+
+        self.cv2 = cv2  # This is done to access the package in class methods
+        self.__cap = cv2.VideoCapture(source)
+        # try HD by default to get better video quality
+        self.__cap.set(cv2.CAP_PROP_FRAME_WIDTH, width)
+        self.__cap.set(cv2.CAP_PROP_FRAME_HEIGHT, height)
+
+        if not self.__cap.isOpened():
+            raise RuntimeError(f"Cannot open {'camera' if isinstance(source, int) else ''} {source}")
+        # skip first N frames
+        self.__cap.set(cv2.CAP_PROP_POS_FRAMES, skip_first_frames)
+        # fps of input file
+        self.__input_fps = self.__cap.get(cv2.CAP_PROP_FPS)
+        if self.__input_fps <= 0:
+            self.__input_fps = 60
+        # target fps given by user
+        self.__output_fps = fps if fps is not None else self.__input_fps
+        self.__flip = flip
+        self.__size = None
+        self.__interpolation = None
+        if size is not None:
+            self.__size = size
+            # AREA better for shrinking, LINEAR better for enlarging
+            self.__interpolation = cv2.INTER_AREA if size[0] < self.__cap.get(cv2.CAP_PROP_FRAME_WIDTH) else cv2.INTER_LINEAR
+        # first frame
+        _, self.__frame = self.__cap.read()
+        self.__lock = threading.Lock()
+        self.__thread = None
+        self.__stop = False
+
+    """
+    Start playing.
+    """
+
+    def start(self):
+        self.__stop = False
+        self.__thread = threading.Thread(target=self.__run, daemon=True)
+        self.__thread.start()
+
+    """
+    Stop playing and release resources.
+    """
+
+    def stop(self):
+        self.__stop = True
+        if self.__thread is not None:
+            self.__thread.join()
+        self.__cap.release()
+
+    def __run(self):
+        prev_time = 0
+        while not self.__stop:
+            t1 = time.time()
+            ret, frame = self.__cap.read()
+            if not ret:
+                break
+
+            # fulfill target fps
+            if 1 / self.__output_fps < time.time() - prev_time:
+                prev_time = time.time()
+                # replace by current frame
+                with self.__lock:
+                    self.__frame = frame
+
+            t2 = time.time()
+            # time to wait [s] to fulfill input fps
+            wait_time = 1 / self.__input_fps - (t2 - t1)
+            # wait until
+            time.sleep(max(0, wait_time))
+
+        self.__frame = None
+
+    """
+    Get current frame.
+    """
+
+    def next(self):
+        import cv2
+
+        with self.__lock:
+            if self.__frame is None:
+                return None
+            # need to copy frame, because can be cached and reused if fps is low
+            frame = self.__frame.copy()
+        if self.__size is not None:
+            frame = self.cv2.resize(frame, self.__size, interpolation=self.__interpolation)
+        if self.__flip:
+            frame = self.cv2.flip(frame, 1)
+        return frame
+
+
+# ## Visualization
+
+# ### Segmentation
+#
+# Define a SegmentationMap NamedTuple that keeps the labels and colormap for a segmentation project/dataset. Create CityScapesSegmentation and BinarySegmentation SegmentationMaps. Create a function to convert a segmentation map to an RGB image with a colormap, and to show the segmentation result as an overlay over the original image.
+
+# In[ ]:
+
+
+class Label(NamedTuple):
+    index: int
+    color: tuple
+    name: Optional[str] = None
+
+
+# In[ ]:
+
+
+class SegmentationMap(NamedTuple):
+    labels: list
+
+    def get_colormap(self):
+        import numpy as np
+
+        return np.array([label.color for label in self.labels])
+
+    def get_labels(self):
+        labelnames = [label.name for label in self.labels]
+        if any(labelnames):
+            return labelnames
+        else:
+            return None
+
+
+# In[ ]:
+
+
+cityscape_labels = [
+    Label(index=0, color=(128, 64, 128), name="road"),
+    Label(index=1, color=(244, 35, 232), name="sidewalk"),
+    Label(index=2, color=(70, 70, 70), name="building"),
+    Label(index=3, color=(102, 102, 156), name="wall"),
+    Label(index=4, color=(190, 153, 153), name="fence"),
+    Label(index=5, color=(153, 153, 153), name="pole"),
+    Label(index=6, color=(250, 170, 30), name="traffic light"),
+    Label(index=7, color=(220, 220, 0), name="traffic sign"),
+    Label(index=8, color=(107, 142, 35), name="vegetation"),
+    Label(index=9, color=(152, 251, 152), name="terrain"),
+    Label(index=10, color=(70, 130, 180), name="sky"),
+    Label(index=11, color=(220, 20, 60), name="person"),
+    Label(index=12, color=(255, 0, 0), name="rider"),
+    Label(index=13, color=(0, 0, 142), name="car"),
+    Label(index=14, color=(0, 0, 70), name="truck"),
+    Label(index=15, color=(0, 60, 100), name="bus"),
+    Label(index=16, color=(0, 80, 100), name="train"),
+    Label(index=17, color=(0, 0, 230), name="motorcycle"),
+    Label(index=18, color=(119, 11, 32), name="bicycle"),
+    Label(index=19, color=(255, 255, 255), name="background"),
+]
+
+CityScapesSegmentation = SegmentationMap(cityscape_labels)
+
+binary_labels = [
+    Label(index=0, color=(255, 255, 255), name="background"),
+    Label(index=1, color=(0, 0, 0), name="foreground"),
+]
+
+BinarySegmentation = SegmentationMap(binary_labels)
+
+
+# In[ ]:
+
+
+def segmentation_map_to_image(result, colormap, remove_holes: bool = False):
+    """
+    Convert network result of floating point numbers to an RGB image with
+    integer values from 0-255 by applying a colormap.
+
+    :param result: A single network result after converting to pixel values in H,W or 1,H,W shape.
+    :param colormap: A numpy array of shape (num_classes, 3) with an RGB value per class.
+    :param remove_holes: If True, remove holes in the segmentation result.
+    :return: An RGB image where each pixel is an int8 value according to colormap.
+    """
+    import cv2
+    import numpy as np
+
+    if len(result.shape) != 2 and result.shape[0] != 1:
+        raise ValueError(f"Expected result with shape (H,W) or (1,H,W), got result with shape {result.shape}")
+
+    if len(np.unique(result)) > colormap.shape[0]:
+        raise ValueError(
+            f"Expected max {colormap[0]} classes in result, got {len(np.unique(result))} "
+            "different output values. Please make sure to convert the network output to "
+            "pixel values before calling this function."
+        )
+    elif result.shape[0] == 1:
+        result = result.squeeze(0)
+
+    result = result.astype(np.uint8)
+
+    contour_mode = cv2.RETR_EXTERNAL if remove_holes else cv2.RETR_TREE
+    mask = np.zeros((result.shape[0], result.shape[1], 3), dtype=np.uint8)
+    for label_index, color in enumerate(colormap):
+        label_index_map = result == label_index
+        label_index_map = label_index_map.astype(np.uint8) * 255
+        contours, hierarchies = cv2.findContours(label_index_map, contour_mode, cv2.CHAIN_APPROX_SIMPLE)
+        cv2.drawContours(
+            mask,
+            contours,
+            contourIdx=-1,
+            color=color.tolist(),
+            thickness=cv2.FILLED,
+        )
+
+    return mask
+
+
+def segmentation_map_to_overlay(image, result, alpha, colormap, remove_holes=False):
+    """
+    Returns a new image where a segmentation mask (created with colormap) is overlayed on
+    the source image.
+
+    :param image: Source image.
+    :param result: A single network result after converting to pixel values in H,W or 1,H,W shape.
+    :param alpha: Alpha transparency value for the overlay image.
+    :param colormap: A numpy array of shape (num_classes, 3) with an RGB value per class.
+    :param remove_holes: If True, remove holes in the segmentation result.
+    :return: An RGP image with segmentation mask overlayed on the source image.
+    """
+    import cv2
+    import numpy as np
+
+    if len(image.shape) == 2:
+        image = np.repeat(np.expand_dims(image, -1), 3, 2)
+    mask = segmentation_map_to_image(result, colormap, remove_holes)
+    image_height, image_width = image.shape[:2]
+    mask = cv2.resize(src=mask, dsize=(image_width, image_height))
+    return cv2.addWeighted(mask, alpha, image, 1 - alpha, 0)
+
+
+# ### Network Results
+#
+# Show network result image, optionally together with the source image and a legend with labels.
+
+# In[ ]:
+
+
+def viz_result_image(
+    result_image,
+    source_image=None,
+    source_title: str = None,
+    result_title: str = None,
+    labels: Optional[List[Label]] = None,
+    resize: bool = False,
+    bgr_to_rgb: bool = False,
+    hide_axes: bool = False,
+):
+    """
+    Show result image, optionally together with source images, and a legend with labels.
+
+    :param result_image: Numpy array of RGB result image.
+    :param source_image: Numpy array of source image. If provided this image will be shown
+                         next to the result image. source_image is expected to be in RGB format.
+                         Set bgr_to_rgb to True if source_image is in BGR format.
+    :param source_title: Title to display for the source image.
+    :param result_title: Title to display for the result image.
+    :param labels: list of labels. If provided, a legend will be shown with the given labels.
+    :param resize: If true, resize the result image to the same shape as the source image.
+    :param bgr_to_rgb: If true, convert the source image from BGR to RGB. Use this option if
+                       source_image is a BGR image.
+    :param hide_axes: If true, do not show matplotlib axes.
+    :return: Matplotlib figure with result image
+    """
+    import cv2
+    import numpy as np
+    import matplotlib.pyplot as plt
+    from matplotlib.lines import Line2D
+
+    if bgr_to_rgb:
+        source_image = to_rgb(source_image)
+    if resize:
+        result_image = cv2.resize(result_image, (source_image.shape[1], source_image.shape[0]))
+
+    num_images = 1 if source_image is None else 2
+
+    fig, ax = plt.subplots(1, num_images, figsize=(16, 8), squeeze=False)
+    if source_image is not None:
+        ax[0, 0].imshow(source_image)
+        ax[0, 0].set_title(source_title)
+
+    ax[0, num_images - 1].imshow(result_image)
+    ax[0, num_images - 1].set_title(result_title)
+
+    if hide_axes:
+        for a in ax.ravel():
+            a.axis("off")
+    if labels:
+        colors = labels.get_colormap()
+        lines = [
+            Line2D(
+                [0],
+                [0],
+                color=[item / 255 for item in c.tolist()],
+                linewidth=3,
+                linestyle="-",
+            )
+            for c in colors
+        ]
+        plt.legend(
+            lines,
+            labels.get_labels(),
+            bbox_to_anchor=(1, 1),
+            loc="upper left",
+            prop={"size": 12},
+        )
+    plt.close(fig)
+    return fig
+
+
+# ### Live Inference
+
+# In[ ]:
+
+
+def show_array(frame, display_handle=None):
+    """
+    Display array `frame`. Replace information at `display_handle` with `frame`
+    encoded as jpeg image. `frame` is expected to have data in BGR order.
+
+    Create a display_handle with: `display_handle = display(display_id=True)`
+    """
+    import cv2
+    from IPython.display import Image, display
+
+    _, frame = cv2.imencode(ext=".jpeg", img=frame)
+    if display_handle is None:
+        display_handle = display(Image(data=frame.tobytes()), display_id=True)
+    else:
+        display_handle.update(Image(data=frame.tobytes()))
+    return display_handle
+
+
+# ## Checks and Alerts
+#
+# Create an alert class to show stylized info/error/warning messages and a `check_device` function that checks whether a given device is available.
+
+# In[ ]:
+
+
+class NotebookAlert(Exception):
+    def __init__(self, message: str, alert_class: str):
+        """
+        Show an alert box with the given message.
+
+        :param message: The message to display.
+        :param alert_class: The class for styling the message. Options: info, warning, success, danger.
+        """
+        self.message = message
+        self.alert_class = alert_class
+        self.show_message()
+
+    def show_message(self):
+        from IPython.display import HTML, display
+
+        display(HTML(f"""<div class="alert alert-{self.alert_class}">{self.message}"""))
+
+
+class DeviceNotFoundAlert(NotebookAlert):
+    def __init__(self, device: str):
+        """
+        Show a warning message about an unavailable device. This class does not check whether or
+        not the device is available, use the `check_device` function to check this. `check_device`
+        also shows the warning if the device is not found.
+
+        :param device: The unavailable device.
+        :return: A formatted alert box with the message that `device` is not available, and a list
+                 of devices that are available.
+        """
+        import openvino as ov
+
+        core = ov.Core()
+        supported_devices = core.available_devices
+        self.message = f"Running this cell requires a {device} device, " "which is not available on this system. "
+        self.alert_class = "warning"
+        if len(supported_devices) == 1:
+            self.message += f"The following device is available: {core.available_devices[0]}"
+        else:
+            self.message += "The following devices are available: " f"{', '.join(core.available_devices)}"
+        super().__init__(self.message, self.alert_class)
+
+
+def check_device(device: str) -> bool:
+    """
+    Check if the specified device is available on the system.
+
+    :param device: Device to check. e.g. CPU, GPU
+    :return: True if the device is available, False if not. If the device is not available,
+             a DeviceNotFoundAlert will be shown.
+    """
+    import openvino as ov
+
+    core = ov.Core()
+    if device not in core.available_devices:
+        DeviceNotFoundAlert(device)
+        return False
+    else:
+        return True
+
+
+def check_openvino_version(version: str) -> bool:
+    """
+    Check if the specified OpenVINO version is installed.
+
+    :param version: the OpenVINO version to check. Example: 2021.4
+    :return: True if the version is installed, False if not. If the version is not installed,
+             an alert message will be shown.
+    """
+    import openvino as ov
+
+    installed_version = ov.get_version()
+    if version not in installed_version:
+        NotebookAlert(
+            f"This notebook requires OpenVINO {version}. "
+            f"The version on your system is: <i>{installed_version}</i>.<br>"
+            "Please run <span style='font-family:monospace'>pip install --upgrade -r requirements.txt</span> "
+            "in the openvino_env environment to install this version. "
+            "See the <a href='https://github.com/openvinotoolkit/openvino_notebooks'>"
+            "OpenVINO Notebooks README</a> for detailed instructions",
+            alert_class="danger",
+        )
+        return False
+    else:
+        return True
+
+
+def optimize_bge_embedding(model_path, output_model_path):
+    """
+    optimize_bge_embedding used to optimize BGE model for NPU device
+
+    Arguments:
+        model_path {str} -- original BGE IR model path
+        output_model_path {str} -- Converted BGE IR model path
+    """
+    import openvino as ov
+
+    try:
+        from openvino.passes import Manager, MatcherPass, WrapType, Matcher
+        from openvino import opset10 as ops
+    except ImportError:
+        from openvino.runtime.passes import Manager, MatcherPass, WrapType, Matcher
+        from openvino.runtime import opset10 as ops
+    core = ov.Core()
+    ov_model = core.read_model(model_path)
+    manager = Manager()
+    packed_layername_tensor_dict_list = [{"name": "aten::mul/Multiply"}]
+
+    class ReplaceTensor(MatcherPass):
+        def __init__(self, packed_layername_tensor_dict_list):
+            MatcherPass.__init__(self)
+            self.model_changed = False
+
+            param = WrapType("opset10.Multiply")
+
+            def callback(matcher: Matcher) -> bool:
+                import numpy as np
+
+                root = matcher.get_match_root()
+                if root is None:
+                    return False
+                for y in packed_layername_tensor_dict_list:
+                    root_name = root.get_friendly_name()
+                    if root_name.find(y["name"]) != -1:
+                        max_fp16 = np.array([[[[-np.finfo(np.float16).max]]]]).astype(np.float32)
+                        new_tenser = ops.constant(max_fp16, ov.Type.f32, name="Constant_4431")
+                        root.set_arguments([root.input_value(0).node, new_tenser])
+                        packed_layername_tensor_dict_list.remove(y)
+
+                return True
+
+            self.register_matcher(Matcher(param, "ReplaceTensor"), callback)
+
+    manager.register_pass(ReplaceTensor(packed_layername_tensor_dict_list))
+    manager.run_passes(ov_model)
+    ov.save_model(ov_model, output_model_path, compress_to_fp16=False)
+
+
+def collect_telemetry(file: str = ""):
+    """
+    The function only tracks that the notebooks cell was executed and does not include any personally identifiable information (PII).
+    """
+    try:
+        import os
+        import requests
+        import platform
+        from pathlib import Path
+
+        if os.getenv("SCARF_NO_ANALYTICS") == "1" or os.getenv("DO_NOT_TRACK") == "1":
+            return
+        url = "https://openvino.gateway.scarf.sh/telemetry"
+        params = {
+            "notebook_dir": Path(__file__).parent.name,
+            "platform": platform.system(),
+            "arch": platform.machine(),
+            "python_version": platform.python_version(),
+        }
+        if file:
+            params["file"] = file
+        requests.get(url, params=params)
+    except Exception:
+        pass
diff --git a/notebooks/funasr-nano/ov_funasr_helper.py b/notebooks/funasr-nano/ov_funasr_helper.py
index 0f2aa53ddfc..31892ccb91c 100644
--- a/notebooks/funasr-nano/ov_funasr_helper.py
+++ b/notebooks/funasr-nano/ov_funasr_helper.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 import openvino as ov
 import nncf
 from pathlib import Path
@@ -417,7 +419,7 @@ def convert_funasr(model_id, model_path=None, quantization_config=None):
         model_path = Path(model_path)
 
     if all((model_path / model_name).exists() for model_name in [FRONTEND_CONFIG_PATH, TEXT_EMBEDDINGS_PATH, ENCODER_PATH, LANGUAGE_PATH]):
-        print(f"✅ {model_id} model already converted. You can find results in {model_path}")
+        print(f"[OK] {model_id} model already converted. You can find results in {model_path}")
         return model_path
     print(f"⌛ {model_id} conversion started. Be patient, it may takes some time.")
     print("⌛ Load Original model")
@@ -426,7 +428,7 @@ def convert_funasr(model_id, model_path=None, quantization_config=None):
     pt_model, kwargs = FunASRNano.from_pretrained(model=model_id, device="cpu")
     kwargs
     pt_model = pt_model.to(torch.float32)
-    print("✅ Original model successfully loaded")
+    print("[OK] Original model successfully loaded")
     print("⌛ Export tokenizer and config")
     kwargs["tokenizer"].save_pretrained(model_path)
     for json_file in Path(model_id + "/Qwen3-0.6B").glob("*.json"):
@@ -457,7 +459,7 @@ def convert_funasr(model_id, model_path=None, quantization_config=None):
         }
         with open(model_path / FRONTEND_CONFIG_PATH, "w") as f:
             json.dump(frontend_config, f, indent=2)
-        print("✅ Frontend config exported")
+        print("[OK] Frontend config exported")
 
     if not (model_path / TEXT_EMBEDDINGS_PATH).exists():
         print("⌛ Convert TEXT_EMBEDDINGS model")
@@ -467,7 +469,7 @@ def convert_funasr(model_id, model_path=None, quantization_config=None):
         del ov_model
         cleanup_torchscript_cache()
         gc.collect()
-        print("✅ TEXT_EMBEDDINGS model successfully converted")
+        print("[OK] TEXT_EMBEDDINGS model successfully converted")
 
     if not (model_path / ENCODER_PATH).exists():
         print("⌛ Convert ENCODER_PATH model")
@@ -493,7 +495,7 @@ def forward_wrap_encoder(self, speech: torch.Tensor, speech_lengths: torch.Tenso
         del pt_model._orig_forward
         cleanup_torchscript_cache()
         gc.collect()
-        print("✅ ENCODER model successfully converted")
+        print("[OK] ENCODER model successfully converted")
 
     if not (model_path / LANGUAGE_PATH).exists():
         print("⌛ Convert LANGUAGE_MODEL model")
@@ -584,11 +586,11 @@ def forward_wrap(
         for output, output_name in zip(ov_model.outputs, output_names):
             output.get_tensor().set_names({output_name})
         patch_stateful(ov_model)
-        print("✅ Decoder model successfully converted")
+        print("[OK] Decoder model successfully converted")
         if quantization_config is not None and "llm" in quantization_config:
             print(f"⌛ Weights compression with {quantization_config['llm']['mode']} mode started")
             ov_model = nncf.compress_weights(ov_model, **quantization_config["llm"])
-            print("✅ Weights compression finished")
+            print("[OK] Weights compression finished")
         else:
             ov_model.set_rt_info("f16", ["runtime_options", "KV_CACHE_PRECISION"])
         ov_model.set_rt_info("8.0", ["runtime_options", "ACTIVATIONS_SCALE_FACTOR"])
@@ -599,7 +601,7 @@ def forward_wrap(
 
     del pt_model
     gc.collect()
-    print(f"✅ {model_id} model conversion finished. You can find results in {model_path}")
+    print(f"[OK] {model_id} model conversion finished. You can find results in {model_path}")
     return model_path
 
 
@@ -851,7 +853,7 @@ def __init__(self, pretrained_dir, device, llm_ov_config={}):
 
         # Load tokenizer from saved config
         self.tokenizer = AutoTokenizer.from_pretrained(model_dir)
-        print(f"✅ Tokenizer loaded from {model_dir}")
+        print(f"[OK] Tokenizer loaded from {model_dir}")
 
         # Load frontend from saved config
         frontend_config_path = model_dir / FRONTEND_CONFIG_PATH
@@ -869,11 +871,11 @@ def __init__(self, pretrained_dir, device, llm_ov_config={}):
                 "max_length": config.get("max_length", 512),
                 "batch_size": config.get("batch_size", 1),
             }
-            print(f"✅ Frontend and inference config loaded from {frontend_config_path}")
+            print(f"[OK] Frontend and inference config loaded from {frontend_config_path}")
         else:
             self.frontend = None
             self.inference_kwargs = {}
-            print(f"⚠️ Frontend config not found at {frontend_config_path}, frontend will need to be provided manually")
+            print(f"[WARN] Frontend config not found at {frontend_config_path}, frontend will need to be provided manually")
 
     def data_template(self, data):
         system, user, assistant = [], [], []
diff --git a/notebooks/funasr-nano/pip_helper.py b/notebooks/funasr-nano/pip_helper.py
new file mode 100644
index 00000000000..18116023b9e
--- /dev/null
+++ b/notebooks/funasr-nano/pip_helper.py
@@ -0,0 +1,10 @@
+import sys
+
+
+def pip_install(*args):
+    import subprocess  # nosec - disable B404:import-subprocess check
+
+    cli_args = []
+    for arg in args:
+        cli_args.extend(str(arg).split(" "))
+    subprocess.run([sys.executable, "-m", "pip", "install", *cli_args], check=True)