diff --git a/.gitignore b/.gitignore
index 9a79cad0..52a9a37b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -70,3 +70,5 @@ inference-api/__pycache__/
 CLAUDE.md
 docs/RAG_PRODUCTIONIZATION_PLAN.md
 docs/DOCKER_CONTROL_SERVICE_PLAN.md
+!app/backend/shared_config/models_from_inference_server.json
+
diff --git a/app/backend/api/settings.py b/app/backend/api/settings.py
index cf7d799e..06671024 100644
--- a/app/backend/api/settings.py
+++ b/app/backend/api/settings.py
@@ -64,11 +64,6 @@
 # Application definition
 
 INSTALLED_APPS = [
-    "django.contrib.admin",
-    "django.contrib.auth",
-    "django.contrib.contenttypes",
-    "django.contrib.sessions",
-    "django.contrib.messages",
     "django.contrib.staticfiles",
     "docker_control.apps.DockerControlConfig",
     "model_control",
@@ -81,11 +76,8 @@
 MIDDLEWARE = [
     "corsheaders.middleware.CorsMiddleware",
     "django.middleware.security.SecurityMiddleware",
-    "django.contrib.sessions.middleware.SessionMiddleware",
     "django.middleware.common.CommonMiddleware",
     "django.middleware.csrf.CsrfViewMiddleware",
-    "django.contrib.auth.middleware.AuthenticationMiddleware",
-    "django.contrib.messages.middleware.MessageMiddleware",
     "django.middleware.clickjacking.XFrameOptionsMiddleware",
 ]
 
@@ -100,25 +92,12 @@
             "context_processors": [
                 "django.template.context_processors.debug",
                 "django.template.context_processors.request",
-                "django.contrib.auth.context_processors.auth",
-                "django.contrib.messages.context_processors.messages",
             ],
         },
     },
 ]
 
 WSGI_APPLICATION = "api.wsgi.application"
-SESSIONS_ENGINE = "django.contrib.sessions.backends.cache"
-# Database
-# https://docs.djangoproject.com/en/4.2/ref/settings/#databases
-
-# SQLite database for deployment history and other persistent data
-DATABASES = {
-    "default": {
-        "ENGINE": "django.db.backends.sqlite3",
-        "NAME": backend_config.backend_cache_root / "db.sqlite3",
-    }
-}
 
 # local memory thread-safe default
 # the LOCATION for locmem.LocMemCache cache backend is just a name for tracking
@@ -135,24 +114,6 @@
     },
 }
 
-# Password validation
-# https://docs.djangoproject.com/en/4.2/ref/settings/#auth-password-validators
-
-AUTH_PASSWORD_VALIDATORS = [
-    {
-        "NAME": "django.contrib.auth.password_validation.UserAttributeSimilarityValidator",
-    },
-    {
-        "NAME": "django.contrib.auth.password_validation.MinimumLengthValidator",
-    },
-    {
-        "NAME": "django.contrib.auth.password_validation.CommonPasswordValidator",
-    },
-    {
-        "NAME": "django.contrib.auth.password_validation.NumericPasswordValidator",
-    },
-]
-
 # Internationalization
 # https://docs.djangoproject.com/en/4.2/topics/i18n/
 
diff --git a/app/backend/api/urls.py b/app/backend/api/urls.py
index 441f06b7..f8aa4a19 100644
--- a/app/backend/api/urls.py
+++ b/app/backend/api/urls.py
@@ -19,12 +19,10 @@
     2. Add a URL to urlpatterns:  path('blog/', include('blog.urls'))
 """
 
-from django.contrib import admin
 from api.views import UpStatusView
 from django.urls import include, path
 
 urlpatterns = [
-    path("admin/", admin.site.urls),
     path("up/", UpStatusView.as_view()),
     path("docker/", include("docker_control.urls")),
     path("models/", include("model_control.urls")),
diff --git a/app/backend/board_control/services.py b/app/backend/board_control/services.py
index 2c08a231..48b8c30f 100644
--- a/app/backend/board_control/services.py
+++ b/app/backend/board_control/services.py
@@ -16,12 +16,16 @@
 
 class SystemResourceService:
     """Service for monitoring system resources and TT device telemetry"""
-    
+
     # Cache keys and timeout
     TT_SMI_CACHE_KEY = "tt_smi_data"
     TT_SMI_CACHE_TIMEOUT = 3600  # Cache for 1 hour (since we'll refresh on events only)
     BOARD_TYPE_CACHE_KEY = "board_type_data"
     BOARD_TYPE_CACHE_TIMEOUT = 3600  # Cache board type for 1 hour (since it rarely changes)
+
+    # Device state cache keys
+    DEVICE_STATE_CACHE_KEY = "device_state_v2"
+    DEVICE_RESETTING_KEY = "device_resetting"
     
     @staticmethod
     def get_tt_smi_data(timeout=10):
@@ -412,9 +416,245 @@ def force_refresh_tt_smi_cache():
         # Clear the existing cache
         cache.delete(SystemResourceService.TT_SMI_CACHE_KEY)
         cache.delete(SystemResourceService.BOARD_TYPE_CACHE_KEY)
-        
+
         # Fetch fresh data
         SystemResourceService.get_tt_smi_data()
         SystemResourceService.get_board_type()
-        
-        logger.info("tt-smi cache refreshed successfully") 
\ No newline at end of file
+
+        logger.info("tt-smi cache refreshed successfully")
+
+    # -------------------------------------------------------------------------
+    # Device State Machine — single source of truth
+    # -------------------------------------------------------------------------
+
+    @staticmethod
+    def _extract_board_type_from_data(data):
+        """Extract canonical board-type string from tt-smi JSON data."""
+        if not data or "device_info" not in data or not data["device_info"]:
+            return "unknown"
+
+        board_types = []
+        for info in data["device_info"]:
+            board_info = info.get("board_info", {})
+            board_types.append(board_info.get("board_type", "unknown"))
+
+        if not board_types:
+            return "unknown"
+
+        # Strip "local"/"remote" suffix if present
+        filtered = [bt.rsplit(" ", 1)[0] for bt in board_types]
+        unique = set(filtered)
+
+        if len(unique) > 1:
+            logger.warning(f"Mixed board types detected: {unique}")
+            return "unknown"
+
+        raw = unique.pop()
+        num_devices = len(data["device_info"])
+        raw_lower = raw.lower()
+
+        if "n150" in raw_lower:
+            return "N150X4" if num_devices >= 4 else "N150"
+        if "n300" in raw_lower:
+            return "T3K" if num_devices >= 4 else "N300"
+        if "p300" in raw_lower:
+            if num_devices >= 8:
+                return "P300Cx4"
+            if num_devices >= 4:
+                return "P300Cx2"
+            return "P300c"
+        if "p150" in raw_lower:
+            if num_devices >= 8:
+                return "P150X8"
+            if num_devices >= 4:
+                return "P150X4"
+            return "P150"
+        if "p100" in raw_lower:
+            return "P100"
+        if "e150" in raw_lower:
+            return "E150"
+        if "galaxy" in raw_lower:
+            return "GALAXY_T3K" if "t3k" in raw_lower else "GALAXY"
+
+        logger.warning(f"Unknown board type string: {raw!r}")
+        return "unknown"
+
+    @staticmethod
+    def _extract_devices_from_data(data):
+        """Extract device summary list from tt-smi JSON data."""
+        devices = []
+        if not data or "device_info" not in data:
+            return devices
+
+        for idx, device in enumerate(data["device_info"]):
+            board_info = device.get("board_info", {})
+            telemetry = device.get("telemetry", {})
+
+            def _f(v):
+                try:
+                    return float(v) if v is not None else 0.0
+                except (TypeError, ValueError):
+                    return 0.0
+
+            devices.append({
+                "index": idx,
+                "board_type": board_info.get("board_type", "Unknown"),
+                "bus_id": board_info.get("bus_id", "N/A"),
+                "temperature": _f(telemetry.get("asic_temperature")),
+                "power": _f(telemetry.get("power")),
+                "voltage": _f(telemetry.get("voltage")),
+            })
+        return devices
+
+    @staticmethod
+    def get_device_state():
+        """
+        Single authoritative device state resolver.
+
+        States:
+          HEALTHY     — tt-smi -s succeeded, devices visible
+          BAD_STATE   — /dev/tenstorrent present but tt-smi timed out / errored
+          RESETTING   — tt-smi -r is actively running
+          NOT_PRESENT — /dev/tenstorrent path does not exist
+          UNKNOWN     — can't determine (startup / tt-smi missing)
+        """
+        # RESETTING takes priority — check before cache
+        if cache.get(SystemResourceService.DEVICE_RESETTING_KEY):
+            return {
+                "state": "RESETTING",
+                "board_type": "unknown",
+                "board_name": "Resetting…",
+                "devices": [],
+                "last_updated": timezone.now().isoformat(),
+                "reset_suggested": False,
+            }
+
+        # Return cached result if still fresh
+        cached = cache.get(SystemResourceService.DEVICE_STATE_CACHE_KEY)
+        if cached is not None:
+            return cached
+
+        # Check physical device presence
+        if not os.path.exists("/dev/tenstorrent"):
+            result = {
+                "state": "NOT_PRESENT",
+                "board_type": "unknown",
+                "board_name": "Not Present",
+                "devices": [],
+                "last_updated": timezone.now().isoformat(),
+                "reset_suggested": False,
+            }
+            cache.set(SystemResourceService.DEVICE_STATE_CACHE_KEY, result, timeout=15)
+            return result
+
+        # Try tt-smi -s with 10-second timeout
+        try:
+            logger.info("Running tt-smi -s for device state check")
+            process = subprocess.Popen(
+                ["tt-smi", "-s"],
+                stdout=subprocess.PIPE,
+                stderr=subprocess.PIPE,
+                stdin=subprocess.DEVNULL,
+                text=True,
+                preexec_fn=os.setsid,
+            )
+
+            try:
+                stdout, stderr = process.communicate(timeout=10)
+            except subprocess.TimeoutExpired:
+                logger.error("tt-smi -s timed out after 10s — board in BAD_STATE")
+                try:
+                    os.killpg(os.getpgid(process.pid), signal.SIGTERM)
+                    process.wait(timeout=2)
+                except Exception:
+                    try:
+                        os.killpg(os.getpgid(process.pid), signal.SIGKILL)
+                    except Exception:
+                        pass
+                result = {
+                    "state": "BAD_STATE",
+                    "board_type": "unknown",
+                    "board_name": "Bad State",
+                    "devices": [],
+                    "last_updated": timezone.now().isoformat(),
+                    "reset_suggested": True,
+                }
+                cache.set(SystemResourceService.DEVICE_STATE_CACHE_KEY, result, timeout=10)
+                return result
+
+            if process.returncode != 0:
+                logger.error(f"tt-smi -s exit code {process.returncode}: {stderr.strip()!r}")
+                result = {
+                    "state": "BAD_STATE",
+                    "board_type": "unknown",
+                    "board_name": "Bad State",
+                    "devices": [],
+                    "last_updated": timezone.now().isoformat(),
+                    "reset_suggested": True,
+                }
+                cache.set(SystemResourceService.DEVICE_STATE_CACHE_KEY, result, timeout=10)
+                return result
+
+            try:
+                data = json.loads(stdout)
+            except json.JSONDecodeError as e:
+                logger.error(f"Failed to parse tt-smi JSON: {e}")
+                result = {
+                    "state": "BAD_STATE",
+                    "board_type": "unknown",
+                    "board_name": "Bad State",
+                    "devices": [],
+                    "last_updated": timezone.now().isoformat(),
+                    "reset_suggested": True,
+                }
+                cache.set(SystemResourceService.DEVICE_STATE_CACHE_KEY, result, timeout=10)
+                return result
+
+            board_type = SystemResourceService._extract_board_type_from_data(data)
+            devices = SystemResourceService._extract_devices_from_data(data)
+            result = {
+                "state": "HEALTHY",
+                "board_type": board_type,
+                "board_name": board_type,
+                "devices": devices,
+                "last_updated": timezone.now().isoformat(),
+                "reset_suggested": False,
+            }
+            cache.set(SystemResourceService.DEVICE_STATE_CACHE_KEY, result, timeout=30)
+            return result
+
+        except FileNotFoundError:
+            logger.error("tt-smi command not found")
+            # Don't cache UNKNOWN so each call re-checks (tt-smi may be installed later)
+            return {
+                "state": "UNKNOWN",
+                "board_type": "unknown",
+                "board_name": "Unknown",
+                "devices": [],
+                "last_updated": timezone.now().isoformat(),
+                "reset_suggested": False,
+            }
+        except Exception as e:
+            logger.error(f"Unexpected error in get_device_state: {e}")
+            return {
+                "state": "UNKNOWN",
+                "board_type": "unknown",
+                "board_name": "Unknown",
+                "devices": [],
+                "last_updated": timezone.now().isoformat(),
+                "reset_suggested": False,
+            }
+
+    @staticmethod
+    def set_resetting_state():
+        """Mark the device as actively resetting (clears state cache)."""
+        cache.set(SystemResourceService.DEVICE_RESETTING_KEY, True, timeout=120)
+        cache.delete(SystemResourceService.DEVICE_STATE_CACHE_KEY)
+        logger.info("Device state set to RESETTING")
+
+    @staticmethod
+    def clear_device_state_cache():
+        """Clear device state cache and resetting flag after reset completes."""
+        cache.delete(SystemResourceService.DEVICE_STATE_CACHE_KEY)
+        cache.delete(SystemResourceService.DEVICE_RESETTING_KEY)
+        logger.info("Device state cache cleared")
\ No newline at end of file
diff --git a/app/backend/board_control/urls.py b/app/backend/board_control/urls.py
index 42e59361..3e2b323d 100644
--- a/app/backend/board_control/urls.py
+++ b/app/backend/board_control/urls.py
@@ -19,4 +19,8 @@
     
     # Cache management
     path("refresh-cache/", views.RefreshCacheView.as_view(), name="refresh-cache"),
-] 
\ No newline at end of file
+
+    # Unified device state & reset (new)
+    path("device-state/", views.DeviceStateView.as_view(), name="device-state"),
+    path("device-reset/", views.DeviceResetView.as_view(), name="device-reset"),
+]
\ No newline at end of file
diff --git a/app/backend/board_control/views.py b/app/backend/board_control/views.py
index f904557c..7dd83428 100644
--- a/app/backend/board_control/views.py
+++ b/app/backend/board_control/views.py
@@ -228,20 +228,78 @@ def patch(self, request, alert_id, *args, **kwargs):
 @method_decorator(csrf_exempt, name='dispatch')
 class RefreshCacheView(APIView):
     """Manual cache refresh endpoint for debugging and manual triggering"""
-    
+
     def post(self, request, *args, **kwargs):
         try:
             logger.info("Manual cache refresh requested")
             SystemResourceService.force_refresh_tt_smi_cache()
-            
+
             return Response({
                 "status": "success",
                 "message": "tt-smi cache refreshed successfully"
             }, status=status.HTTP_200_OK)
-            
+
         except Exception as e:
             logger.error(f"Error manually refreshing cache: {str(e)}")
             return Response(
                 {"error": "Failed to refresh cache", "details": str(e)},
                 status=status.HTTP_500_INTERNAL_SERVER_ERROR
-            ) 
\ No newline at end of file
+            )
+
+
+@method_decorator(csrf_exempt, name='dispatch')
+class DeviceStateView(APIView):
+    """
+    GET /board-api/device-state/
+
+    Single source of truth for board state.  Replaces the need to call
+    /board-api/status/, /board-api/footer-data/, and /docker-api/board-info/
+    separately.  All components should poll this endpoint.
+    """
+
+    def get(self, request, *args, **kwargs):
+        try:
+            state = SystemResourceService.get_device_state()
+            return Response(state, status=status.HTTP_200_OK)
+        except Exception as e:
+            logger.error(f"Error getting device state: {e}")
+            return Response({
+                "state": "UNKNOWN",
+                "board_type": "unknown",
+                "board_name": "Unknown",
+                "devices": [],
+                "last_updated": timezone.now().isoformat(),
+                "reset_suggested": False,
+            }, status=status.HTTP_200_OK)
+
+
+@method_decorator(csrf_exempt, name='dispatch')
+class DeviceResetView(APIView):
+    """
+    POST /board-api/device-reset/
+
+    Dedicated board reset endpoint.  Separated from the Docker-coupled
+    /docker-api/reset_board/ for clarity; the old endpoint keeps working via
+    the same perform_reset() logic.
+    """
+
+    def post(self, request, *args, **kwargs):
+        from docker_control.docker_utils import perform_reset
+        try:
+            logger.info("Device reset requested via /board-api/device-reset/")
+            result = perform_reset()
+            http_status_code = result.pop("http_status", 200)
+
+            success = result.get("status") == "success"
+            return Response({
+                "success": success,
+                "message": result.get("message", ""),
+                "attempts_used": result.get("attempts_used", 0),
+            }, status=http_status_code)
+        except Exception as e:
+            logger.error(f"Error in device reset: {e}")
+            return Response({
+                "success": False,
+                "message": str(e),
+                "attempts_used": 0,
+            }, status=status.HTTP_500_INTERNAL_SERVER_ERROR)
\ No newline at end of file
diff --git a/app/backend/docker_control/admin.py b/app/backend/docker_control/admin.py
index 2c79060a..917beb36 100644
--- a/app/backend/docker_control/admin.py
+++ b/app/backend/docker_control/admin.py
@@ -1,7 +1,3 @@
 # SPDX-License-Identifier: Apache-2.0
 #
 # SPDX-FileCopyrightText: © 2024 Tenstorrent AI ULC
-
-from django.contrib import admin
-
-# Register your models here.
diff --git a/app/backend/docker_control/apps.py b/app/backend/docker_control/apps.py
index 0a263c9c..68dfa377 100644
--- a/app/backend/docker_control/apps.py
+++ b/app/backend/docker_control/apps.py
@@ -14,32 +14,15 @@ class DockerControlConfig(AppConfig):
     def ready(self):
         """Initialize docker control services"""
         logger.info("Docker control app is ready")
-        
-        # Verify database migrations are applied
+
+        # Log how many deployments are already tracked
         try:
-            from django.db import connection
-            
-            # Check if ModelDeployment table exists
-            with connection.cursor() as cursor:
-                cursor.execute("""
-                    SELECT name FROM sqlite_master 
-                    WHERE type='table' AND name='docker_control_modeldeployment'
-                """)
-                table_exists = cursor.fetchone() is not None
-            
-            if not table_exists:
-                logger.warning(
-                    "ModelDeployment table not found. Database migrations may not be applied. "
-                    "Run: python manage.py migrate docker_control"
-                )
-            else:
-                # Count existing deployment records
-                from docker_control.models import ModelDeployment
-                count = ModelDeployment.objects.count()
-                logger.info(f"Deployment history table verified. Existing records: {count}")
+            from docker_control.models import ModelDeployment
+            count = ModelDeployment.objects.count()
+            logger.info(f"Deployment store loaded. Existing records: {count}")
         except Exception as e:
-            logger.warning(f"Could not verify deployment history table: {e}")
-        
+            logger.warning(f"Could not read deployment store: {e}")
+
         # Start container health monitoring service
         try:
             from docker_control.health_monitor import start_health_monitoring
diff --git a/app/backend/docker_control/deployment_store.py b/app/backend/docker_control/deployment_store.py
new file mode 100644
index 00000000..ba5421fd
--- /dev/null
+++ b/app/backend/docker_control/deployment_store.py
@@ -0,0 +1,248 @@
+# SPDX-License-Identifier: Apache-2.0
+#
+# SPDX-FileCopyrightText: © 2024 Tenstorrent AI ULC
+
+"""
+Thread-safe JSON file store replacing Django ORM for ModelDeployment.
+
+Provides a drop-in ORM-like interface (objects.create, filter, all, get, save)
+backed by a single JSON file in the persistent storage volume.
+"""
+
+import json
+import os
+import threading
+from datetime import datetime, timezone
+from pathlib import Path
+from typing import Any, List, Optional
+
+from shared_config.logger_config import get_logger
+
+logger = get_logger(__name__)
+
+_STORE_PATH = (
+    Path(os.getenv("INTERNAL_PERSISTENT_STORAGE_VOLUME", "/tt_studio_persistent_volume"))
+    / "backend_volume"
+    / "deployments.json"
+)
+
+_lock = threading.Lock()
+
+
+def _now() -> datetime:
+    return datetime.now(timezone.utc)
+
+
+def _parse_dt(s: Optional[str]) -> Optional[datetime]:
+    if s is None:
+        return None
+    try:
+        return datetime.fromisoformat(s)
+    except Exception:
+        return None
+
+
+def _sort_key(record: dict, field: str):
+    """Return a sortable key for a field, handling None and datetime strings."""
+    val = record.get(field)
+    if val is None:
+        return ""
+    return val  # ISO strings sort lexicographically = chronologically
+
+
+def _load_raw() -> dict:
+    if not _STORE_PATH.exists():
+        return {"next_id": 1, "records": []}
+    try:
+        with open(_STORE_PATH, "r") as f:
+            return json.load(f)
+    except Exception as e:
+        logger.warning(f"Could not read deployment store, starting fresh: {e}")
+        return {"next_id": 1, "records": []}
+
+
+def _save_raw(data: dict) -> None:
+    _STORE_PATH.parent.mkdir(parents=True, exist_ok=True)
+    tmp = _STORE_PATH.with_suffix(".tmp")
+    try:
+        with open(tmp, "w") as f:
+            json.dump(data, f, indent=2, default=str)
+        os.replace(tmp, _STORE_PATH)
+    except Exception as e:
+        logger.error(f"Failed to save deployment store: {e}")
+        try:
+            tmp.unlink(missing_ok=True)
+        except Exception:
+            pass
+
+
+def _match(record: dict, kwargs: dict) -> bool:
+    """Match a record against filter kwargs, supporting __in and __isnull suffixes."""
+    for key, val in kwargs.items():
+        if key.endswith("__in"):
+            field = key[: -len("__in")]
+            if record.get(field) not in val:
+                return False
+        elif key.endswith("__isnull"):
+            field = key[: -len("__isnull")]
+            is_null = record.get(field) is None
+            if is_null != val:
+                return False
+        else:
+            if record.get(key) != val:
+                return False
+    return True
+
+
+class _QuerySet:
+    def __init__(self, records: List[dict]):
+        self._records = records
+
+    def filter(self, **kwargs) -> "_QuerySet":
+        return _QuerySet([r for r in self._records if _match(r, kwargs)])
+
+    def order_by(self, *fields) -> "_QuerySet":
+        records = list(self._records)
+        for field in reversed(fields):
+            reverse = field.startswith("-")
+            fname = field.lstrip("-")
+            records.sort(key=lambda r: _sort_key(r, fname), reverse=reverse)
+        return _QuerySet(records)
+
+    def first(self) -> Optional["ModelDeployment"]:
+        if not self._records:
+            return None
+        return ModelDeployment._from_dict(self._records[0])
+
+    def exists(self) -> bool:
+        return len(self._records) > 0
+
+    def count(self) -> int:
+        return len(self._records)
+
+    def get(self, **kwargs) -> "ModelDeployment":
+        matches = [r for r in self._records if _match(r, kwargs)]
+        if not matches:
+            raise ModelDeployment.DoesNotExist(f"No record matching {kwargs}")
+        if len(matches) > 1:
+            raise Exception(f"Multiple records matching {kwargs}")
+        return ModelDeployment._from_dict(matches[0])
+
+    def __iter__(self):
+        return (ModelDeployment._from_dict(r) for r in self._records)
+
+    def __getitem__(self, key):
+        if isinstance(key, slice):
+            return _QuerySet(self._records[key])
+        return ModelDeployment._from_dict(self._records[key])
+
+    def __len__(self) -> int:
+        return len(self._records)
+
+
+class _Manager:
+    def create(self, **kwargs) -> "ModelDeployment":
+        with _lock:
+            data = _load_raw()
+            record = {
+                "id": data["next_id"],
+                "container_id": kwargs.get("container_id", ""),
+                "container_name": kwargs.get("container_name", ""),
+                "model_name": kwargs.get("model_name", ""),
+                "device": kwargs.get("device", ""),
+                "deployed_at": _now().isoformat(),
+                "stopped_at": None,
+                "status": kwargs.get("status", "running"),
+                "stopped_by_user": kwargs.get("stopped_by_user", False),
+                "port": kwargs.get("port", None),
+                "device_id": kwargs.get("device_id", 0),
+                "workflow_log_path": kwargs.get("workflow_log_path", None),
+            }
+            data["next_id"] += 1
+            data["records"].append(record)
+            _save_raw(data)
+        return ModelDeployment._from_dict(record)
+
+    def all(self) -> _QuerySet:
+        with _lock:
+            data = _load_raw()
+        return _QuerySet(list(data["records"]))
+
+    def filter(self, **kwargs) -> _QuerySet:
+        return self.all().filter(**kwargs)
+
+    def get(self, **kwargs) -> "ModelDeployment":
+        return self.all().get(**kwargs)
+
+
+class ModelDeployment:
+    class DoesNotExist(Exception):
+        pass
+
+    objects: _Manager  # set below
+
+    def __init__(self):
+        self.id: Optional[int] = None
+        self.container_id: str = ""
+        self.container_name: str = ""
+        self.model_name: str = ""
+        self.device: str = ""
+        self.deployed_at: Optional[datetime] = None
+        self.stopped_at: Optional[datetime] = None
+        self.status: str = "running"
+        self.stopped_by_user: bool = False
+        self.port: Optional[int] = None
+        self.device_id: int = 0
+        self.workflow_log_path: Optional[str] = None
+
+    @classmethod
+    def _from_dict(cls, d: dict) -> "ModelDeployment":
+        obj = cls()
+        obj.id = d.get("id")
+        obj.container_id = d.get("container_id", "")
+        obj.container_name = d.get("container_name", "")
+        obj.model_name = d.get("model_name", "")
+        obj.device = d.get("device", "")
+        obj.deployed_at = _parse_dt(d.get("deployed_at"))
+        obj.stopped_at = _parse_dt(d.get("stopped_at"))
+        obj.status = d.get("status", "running")
+        obj.stopped_by_user = d.get("stopped_by_user", False)
+        obj.port = d.get("port")
+        obj.device_id = d.get("device_id", 0)
+        obj.workflow_log_path = d.get("workflow_log_path")
+        return obj
+
+    def _to_dict(self) -> dict:
+        return {
+            "id": self.id,
+            "container_id": self.container_id,
+            "container_name": self.container_name,
+            "model_name": self.model_name,
+            "device": self.device,
+            "deployed_at": self.deployed_at.isoformat() if self.deployed_at else None,
+            "stopped_at": self.stopped_at.isoformat() if self.stopped_at else None,
+            "status": self.status,
+            "stopped_by_user": self.stopped_by_user,
+            "port": self.port,
+            "device_id": self.device_id,
+            "workflow_log_path": self.workflow_log_path,
+        }
+
+    def save(self) -> None:
+        with _lock:
+            data = _load_raw()
+            for i, r in enumerate(data["records"]):
+                if r.get("id") == self.id:
+                    data["records"][i] = self._to_dict()
+                    _save_raw(data)
+                    return
+            # Not found — append as new (shouldn't happen in normal flow)
+            logger.warning(f"save() called on deployment id={self.id} not found in store; appending")
+            data["records"].append(self._to_dict())
+            _save_raw(data)
+
+    def __str__(self) -> str:
+        return f"{self.model_name} on {self.device} - {self.status}"
+
+
+ModelDeployment.objects = _Manager()
diff --git a/app/backend/docker_control/docker_utils.py b/app/backend/docker_control/docker_utils.py
index 0c4ab8f5..25abecd5 100644
--- a/app/backend/docker_control/docker_utils.py
+++ b/app/backend/docker_control/docker_utils.py
@@ -86,7 +86,7 @@ def map_board_type_to_device_name(board_type):
     logger.info(f"Mapped board type '{board_type}' to device name '{device_name}'")
     return device_name
 
-def run_container(impl, weights_id):
+def run_container(impl, weights_id, device_id=0):
     """Run a docker container via TT Inference Server API"""
     if (impl.model_type == ModelTypes.CHAT):
         # For chat models, we use the TT Inference Server API to run the container
@@ -103,11 +103,29 @@ def run_container(impl, weights_id):
                 "workflow": "server",  # Default workflow for container runs
                 "device": device,  # Use mapped device name
                 "docker_server": True,
-                "dev_mode": True
+                "dev_mode": True,
+                "chip_id": device_id,  # Pin to specific chip; requires inference server support
             }
 
             logger.info(f"API payload: {payload}")
 
+            # Write a "starting" record immediately so history shows the deployment in-progress
+            pending_record = None
+            try:
+                pending_record = ModelDeployment.objects.create(
+                    container_id=f"pending_{impl.model_name}",
+                    container_name=f"pending_{impl.model_name}",
+                    model_name=impl.model_name,
+                    device=device,
+                    device_id=device_id,
+                    status="starting",
+                    stopped_by_user=False,
+                    port=7000,
+                )
+                logger.info(f"Created pending deployment record for {impl.model_name}")
+            except Exception as e:
+                logger.warning(f"Could not create pending deployment record: {e}")
+
             # Make POST request to TT Inference Server API
             api_url = "http://172.18.0.1:8001/run"
 
@@ -128,17 +146,17 @@ def run_container(impl, weights_id):
 
                 # Update deploy cache on success
                 update_deploy_cache()
-                
+
                 # Notify agent about new container deployment
                 notify_agent_of_new_container(api_result["container_name"])
-                
-                # Save deployment record to database
+
+                # Update the pending record (or create one if pending write failed)
                 container_id = None
                 container_name = "unknown"
                 try:
                     container_id = api_result.get("container_id")
                     container_name = api_result.get("container_name", "unknown")
-                    
+
                     # If container_id is not in response, try to get it from Docker by name
                     if not container_id and container_name:
                         try:
@@ -148,30 +166,33 @@ def run_container(impl, weights_id):
                             logger.info(f"Retrieved container_id {container_id} from Docker for {container_name}")
                         except Exception as docker_error:
                             logger.warning(f"Could not get container_id from Docker: {docker_error}")
-                            # Use container_name as fallback ID if we can't get the actual ID
                             container_id = container_name
-                    
+
                     if container_id:
-                        # Extract workflow log path from API response
                         workflow_log_path = api_result.get("docker_log_file_path")
                         logger.info(f"Extracted workflow_log_path from api_result: {workflow_log_path}")
-                        logger.info(f"workflow_log_path type: {type(workflow_log_path)}, is None: {workflow_log_path is None}")
-                        
-                        ModelDeployment.objects.create(
-                            container_id=container_id,
-                            container_name=container_name,
-                            model_name=impl.model_name,
-                            device=device,
-                            status="running",
-                            stopped_by_user=False,
-                            port=7000,  # TT Inference Server default port
-                            workflow_log_path=workflow_log_path
-                        )
-                        logger.info(f"Saved deployment record for {container_name} (ID: {container_id})")
-                        if workflow_log_path:
-                            logger.info(f"Workflow log path saved: {workflow_log_path}")
+
+                        if pending_record:
+                            # Update the pending record with real container info
+                            pending_record.container_id = container_id
+                            pending_record.container_name = container_name
+                            pending_record.status = "running"
+                            pending_record.workflow_log_path = workflow_log_path
+                            pending_record.save()
+                            logger.info(f"Updated pending record to running for {container_name} (ID: {container_id})")
                         else:
-                            logger.warning(f"Workflow log path is None/empty for {container_name}")
+                            ModelDeployment.objects.create(
+                                container_id=container_id,
+                                container_name=container_name,
+                                model_name=impl.model_name,
+                                device=device,
+                                device_id=device_id,
+                                status="running",
+                                stopped_by_user=False,
+                                port=7000,
+                                workflow_log_path=workflow_log_path
+                            )
+                            logger.info(f"Saved deployment record for {container_name} (ID: {container_id})")
                     else:
                         logger.warning(f"Could not save deployment record: no container_id or container_name")
                 except Exception as e:
@@ -229,7 +250,7 @@ def run_container(impl, weights_id):
 
             run_kwargs = copy.deepcopy(impl.docker_config)
             # handle runtime configuration changes to docker kwargs
-            device_mounts = get_devices_mounts(impl)
+            device_mounts = get_devices_mounts(impl, device_id)
             if device_mounts:
                 run_kwargs.update({"devices": device_mounts})
             run_kwargs.update({"ports": get_port_mounts(impl)})
@@ -292,6 +313,7 @@ def run_container(impl, weights_id):
                     container_name=container_name,
                     model_name=impl.model_name,
                     device=device_name,
+                    device_id=device_id,
                     status="running",
                     stopped_by_user=False,
                     port=host_port
@@ -355,22 +377,47 @@ def get_runtime_device_configuration(device_configurations):
     return next(iter(device_configurations))
 
 
-def get_devices_mounts(impl):
+def get_devices_mounts(impl, device_id=0):
     device_config = get_runtime_device_configuration(impl.device_configurations)
     assert isinstance(device_config, DeviceConfigurations)
-    # TODO: add logic to handle multiple devices and multiple containers
-    single_device_mounts = ["/dev/tenstorrent/0:/dev/tenstorrent/0"]
+
+    # Single-chip device configurations: pin to the requested chip slot
+    single_chip_configs = {
+        DeviceConfigurations.E150,
+        DeviceConfigurations.N150,
+        DeviceConfigurations.N150_WH_ARCH_YAML,
+        DeviceConfigurations.N300,
+        DeviceConfigurations.N300_WH_ARCH_YAML,
+        DeviceConfigurations.P100,
+        DeviceConfigurations.P150,
+        DeviceConfigurations.P300c,
+    }
+
+    # Multi-chip configurations manage their own chip allocation; expose full directory
     all_device_mounts = ["/dev/tenstorrent:/dev/tenstorrent"]
-    device_map = {
-        DeviceConfigurations.E150: single_device_mounts,
-        DeviceConfigurations.N150: single_device_mounts,
-        DeviceConfigurations.N150_WH_ARCH_YAML: single_device_mounts,
-        DeviceConfigurations.N300: single_device_mounts,
-        DeviceConfigurations.N300x4_WH_ARCH_YAML: all_device_mounts,
-        DeviceConfigurations.N300x4: all_device_mounts,
+
+    if device_config in single_chip_configs:
+        return [f"/dev/tenstorrent/{device_id}:/dev/tenstorrent/{device_id}"]
+
+    # Multi-chip (T3K, Galaxy, N300x4, P150X4, P150X8, etc.)
+    multi_chip_configs = {
+        DeviceConfigurations.N150X4,
+        DeviceConfigurations.N300x4,
+        DeviceConfigurations.N300x4_WH_ARCH_YAML,
+        DeviceConfigurations.T3K,
+        DeviceConfigurations.T3K_RING,
+        DeviceConfigurations.T3K_LINE,
+        DeviceConfigurations.P150X4,
+        DeviceConfigurations.P150X8,
+        DeviceConfigurations.P300Cx2,
+        DeviceConfigurations.P300Cx4,
+        DeviceConfigurations.GALAXY,
+        DeviceConfigurations.GALAXY_T3K,
     }
-    device_mounts = device_map.get(device_config)
-    return device_mounts
+    if device_config in multi_chip_configs:
+        return all_device_mounts
+
+    return None
 
 
 def get_port_mounts(impl):
@@ -550,12 +597,12 @@ def update_deploy_cache():
             if is_tt_inference_container:
                 logger.info(f"Detected TT Inference Server container: {con['name']} (ID: {con_id})")
                 
-                # Try to find the model implementation from the database
+                # Try to find the model implementation from the deployment store
                 deployment_found = False
                 try:
                     from docker_control.models import ModelDeployment
                     deployment = ModelDeployment.objects.filter(container_id=con_id).first()
-                    
+
                     if deployment:
                         # Find the model implementation by model name
                         model_impl = None
@@ -565,11 +612,12 @@ def update_deploy_cache():
                                 logger.info(f"Matched TT Inference Server container to model_impl: {model_impl.model_name}")
                                 deployment_found = True
                                 break
-                        
+
                         if not model_impl:
                             logger.warning(f"Could not find model_impl for {deployment.model_name} in container {con['name']}")
                     else:
-                        logger.warning(f"No deployment record found for TT Inference Server container {con_id}")
+                        # No record by container_id — could be a pre-existing container or still starting up
+                        logger.debug(f"No deployment record found for TT Inference Server container {con_id}")
                 except Exception as e:
                     # Check if this is a migration/database issue
                     error_str = str(e).lower()
@@ -582,13 +630,25 @@ def update_deploy_cache():
                 if not deployment_found:
                     logger.info(f"Using fallback logic to match container {con['name']}")
                     # Try to match by container name
+                    # First try exact match
                     model_impl = None
                     for k, v in model_implmentations.items():
-                        if v.model_name in con["name"]:
+                        if v.model_name == con["name"]:
                             model_impl = v
-                            logger.info(f"Matched container by name to model_impl: {model_impl.model_name}")
+                            logger.info(f"Matched container by exact name to model_impl: {model_impl.model_name}")
                             break
-                    
+
+                    # Fall back to longest-substring match (prevents short names like "Llama-3.1-8B"
+                    # from beating "Llama-3.1-8B-Instruct" on container name "Llama-3.1-8B-Instruct")
+                    if not model_impl:
+                        best_match_len = 0
+                        for k, v in model_implmentations.items():
+                            if v.model_name in con["name"] and len(v.model_name) > best_match_len:
+                                model_impl = v
+                                best_match_len = len(v.model_name)
+                        if model_impl:
+                            logger.info(f"Matched container by name substring to model_impl: {model_impl.model_name}")
+
                     if not model_impl:
                         logger.warning(f"Could not match TT Inference Server container {con['name']} to any model_impl. Skipping.")
                         continue
@@ -655,195 +715,92 @@ def remove_id_prefix(s):
 
 
 def perform_reset():
+    """
+    Reset the TT board using tt-smi -r (up to 2 attempts, 30-second timeout each).
+
+    The tt-smi -s pre-check has been intentionally removed: when the board is in
+    a bad state tt-smi -s itself hangs, which makes recovery worse.  We go
+    straight to tt-smi -r and let the result speak for itself.
+    """
     try:
-        logger.info("Running initial tt-smi -s command to check device detection.")
-
-        # Initial check to see if Tenstorrent devices are detected
-        def check_device_detection():
-            process = subprocess.Popen(
-                ["tt-smi", "-s"],
-                stdout=subprocess.PIPE,
-                stderr=subprocess.STDOUT,
-                stdin=subprocess.DEVNULL,  # Prevents interactive command-line interface
-                text=True,
-            )
-            output = []
-            detected_chips = 0
-            warnings = []
-            for line in iter(process.stdout.readline, ""):
-                logger.info(f"tt-smi output: {line.strip()}")
-                output.append(line)
-                lower_line = line.lower()
-                if "detected chips" in lower_line:
-                    # Expect format like: "Detected Chips: 2"
-                    try:
-                        parts = line.strip().split(":")
-                        if len(parts) == 2:
-                            detected_chips = int(parts[1].strip().split()[0])
-                    except (ValueError, IndexError) as e:
-                        warnings.append(f"Unable to parse detected chips from line: {line.strip()}")
-                        logger.warning(f"Unable to parse detected chips from line '{line.strip()}': {e}")
-                if "response_q out of sync" in lower_line or "rd_ptr" in lower_line:
-                    warnings.append(line.strip())
-                if "No Tenstorrent devices detected" in line:
-                    return {
-                        "status": "error",
-                        "message": "No Tenstorrent devices detected! Please check your hardware and try again.",
-                        "output": "".join(output),
-                        "http_status": 503,  # Service Unavailable
-                    }
-            process.stdout.close()
-            return_code = process.wait()
-            
-            # Parse JSON output if text parsing didn't find chips
-            if detected_chips == 0:
-                full_output = "".join(output)
-                try:
-                    json_data = json.loads(full_output)
-                    if "device_info" in json_data and isinstance(json_data["device_info"], list):
-                        detected_chips = len(json_data["device_info"])
-                        logger.info(f"Detected {detected_chips} chips from JSON output")
-                except json.JSONDecodeError as e:
-                    logger.warning(f"Could not parse tt-smi output as JSON: {e}")
-            
-            # If chips are detected, allow reset but surface warnings/return code
-            if detected_chips > 0:
-                if return_code != 0:
-                    warnings.append(f"tt-smi -s exited with code {return_code}")
-                status_val = "success" if not warnings and return_code == 0 else "warning"
-                return {
-                    "status": status_val,
-                    "output": "".join(output),
-                    "warnings": warnings,
-                    "detected_chips": detected_chips,
-                    "return_code": return_code,
-                }
-            if return_code != 0:
-                return {
-                    "status": "error",
-                    "message": f"tt-smi -s command failed with return code {return_code}. Please check if tt-smi is properly installed.",
-                    "output": "".join(output),
-                    "http_status": 500,  # Internal Server Error
-                }
-            return {
-                "status": "success",
-                "message": "No Tenstorrent devices detected. tt-smi executed successfully.",
-                "output": "".join(output),
-                "detected_chips": 0,
-                "return_code": return_code,
-            }
+        logger.info("Starting board reset — running tt-smi -r directly (no pre-check)")
 
-        # Run the device detection check
-        detection_result = check_device_detection()
-        detection_warnings = detection_result.get("warnings", [])
-        detection_output = detection_result.get("output", "")
-        if detection_result.get("status") == "error":
-            return detection_result
-        if detection_output:
-            cumulative_output = [detection_output]
-        else:
-            cumulative_output = []
-        if detection_warnings:
-            cumulative_output.append("Warnings during device detection:\n")
-            cumulative_output.extend([w + "\n" for w in detection_warnings])
-
-        logger.info("Running tt-smi reset command.")
-
-        def stream_command_output(command):
-            logger.info(f"Executing command: {' '.join(command)}")
-            process = subprocess.Popen(
-                command,
-                stdout=subprocess.PIPE,
-                stderr=subprocess.STDOUT,
-                stdin=subprocess.DEVNULL,  # Prevents interactive command-line interface
-                text=True,
-            )
-            output = []
-            for line in iter(process.stdout.readline, ""):
-                logger.info(f"Command output: {line.strip()}")
-                output.append(line)
-            process.stdout.close()
-            return_code = process.wait()
-            if return_code != 0:
-                logger.info(f"Command failed with return code {return_code}")
-                output.append(f"Command failed with return code {return_code}")
-                error_message = "tt-smi reset failed. Please check if:\n"
-                error_message += "1. The Tenstorrent device is properly connected\n"
-                error_message += "2. You have the correct permissions to access the device\n"
-                error_message += "3. The tt-smi utility is properly installed\n"
-                error_message += "4. The device firmware is up to date"
-                return {
-                    "status": "error",
-                    "message": error_message,
-                    "output": "".join(output),
-                    "http_status": 500,  # Internal Server Error
-                }
-            else:
-                logger.info(
-                    f"Command completed successfully with return code {return_code}"
+        # Signal that a reset is in progress so the device-state endpoint reports RESETTING
+        SystemResourceService.set_resetting_state()
+
+        MAX_ATTEMPTS = 2
+        last_output = ""
+
+        for attempt in range(1, MAX_ATTEMPTS + 1):
+            logger.info(f"Reset attempt {attempt} of {MAX_ATTEMPTS}")
+            try:
+                process = subprocess.Popen(
+                    ["tt-smi", "-r"],
+                    stdout=subprocess.PIPE,
+                    stderr=subprocess.STDOUT,
+                    stdin=subprocess.DEVNULL,
+                    text=True,
+                    preexec_fn=os.setsid,
                 )
-                return {"status": "success", "output": "".join(output)}
-
-        # Attempt software resets first (up to MAX_RESET_ATTEMPTS)
-        MAX_RESET_ATTEMPTS = 3
-        reset_attempts = 0
-        reset_success = False
-
-        # Try tt-smi reset with retries (no reset config file; use default tt-smi behavior)
-        while reset_attempts < MAX_RESET_ATTEMPTS and not reset_success:
-            reset_attempts += 1
-            logger.info(f"Reset attempt {reset_attempts} of {MAX_RESET_ATTEMPTS}")
-            cumulative_output.append(f"Attempting reset {reset_attempts} of {MAX_RESET_ATTEMPTS}...\n")
-
-            # Perform reset using tt-smi default behavior (no reset_config.json)
-            cumulative_output.append("Executing tt-smi -r with default reset configuration.\n")
-            reset_result = stream_command_output(["tt-smi", "-r"])
-            cumulative_output.append(reset_result.get('output', '') + "\n")
-
-            if reset_result.get("status") == "success":
-                logger.info(f"Reset attempt {reset_attempts} succeeded")
-                reset_success = True
-                break
-
-            logger.warning(f"Reset attempt {reset_attempts} failed")
-            # Small delay between attempts
-            time.sleep(2)
-
-        # If all reset attempts failed
-        if not reset_success:
-            all_output = "".join(cumulative_output)
-            logger.error(f"All {MAX_RESET_ATTEMPTS} reset attempts failed")
-            return {
-                "status": "error", 
-                "message": f"All {MAX_RESET_ATTEMPTS} reset attempts failed using tt-smi --reset command.",
-                "output": all_output,
-                "http_status": 500
-            }
 
-        all_output = "".join(cumulative_output)
-        if reset_success:
-            return {
-                "status": "success",
-                "message": f"Reset successful after {reset_attempts} attempt(s)",
-                "output": all_output,
-                "warnings": detection_warnings,
-                "http_status": 200
-            }
-        else:
-            return {
-                "status": "error",
-                "message": "All reset attempts failed with no specific error",
-                "output": all_output,
-                "warnings": detection_warnings,
-                "http_status": 500
-            }
+                try:
+                    stdout, _ = process.communicate(timeout=30)
+                    last_output = stdout
+                    logger.info(f"tt-smi -r attempt {attempt} output: {stdout.strip()!r:.200}")
+
+                    if process.returncode == 0:
+                        logger.info(f"Reset succeeded on attempt {attempt}")
+                        SystemResourceService.clear_device_state_cache()
+                        return {
+                            "status": "success",
+                            "message": f"Board reset successfully after {attempt} attempt(s)",
+                            "attempts_used": attempt,
+                            "output": stdout,
+                            "http_status": 200,
+                        }
+
+                    logger.warning(
+                        f"Reset attempt {attempt} failed: exit code {process.returncode}"
+                    )
+
+                except subprocess.TimeoutExpired:
+                    logger.warning(f"Reset attempt {attempt} timed out after 30s")
+                    try:
+                        os.killpg(os.getpgid(process.pid), signal.SIGTERM)
+                        process.wait(timeout=2)
+                    except Exception:
+                        try:
+                            os.killpg(os.getpgid(process.pid), signal.SIGKILL)
+                        except Exception:
+                            pass
+                    last_output = "(timeout)"
+
+            except Exception as exc:
+                logger.error(f"Reset attempt {attempt} raised exception: {exc}")
+                last_output = str(exc)
+
+        # All attempts failed
+        logger.error(f"All {MAX_ATTEMPTS} reset attempts failed")
+        SystemResourceService.clear_device_state_cache()
+        return {
+            "status": "error",
+            "message": (
+                f"Board did not recover after {MAX_ATTEMPTS} reset attempts. "
+                "Manual intervention may be required."
+            ),
+            "attempts_used": MAX_ATTEMPTS,
+            "output": last_output,
+            "http_status": 500,
+        }
 
     except Exception as e:
-        logger.exception("Exception occurred during reset operation.")
+        logger.exception("Unexpected error during reset operation")
+        SystemResourceService.clear_device_state_cache()
         return {
             "status": "error",
             "message": str(e),
-            "output": "An exception occurred during the reset operation.",
+            "attempts_used": 0,
+            "output": "",
             "http_status": 500,
         }
 
diff --git a/app/backend/docker_control/migrations/0001_initial.py b/app/backend/docker_control/migrations/0001_initial.py
deleted file mode 100644
index 0b4c168d..00000000
--- a/app/backend/docker_control/migrations/0001_initial.py
+++ /dev/null
@@ -1,33 +0,0 @@
-# Generated by Django 5.0.4 on 2025-11-12 15:18
-
-from django.db import migrations, models
-
-
-class Migration(migrations.Migration):
-
-    initial = True
-
-    dependencies = [
-    ]
-
-    operations = [
-        migrations.CreateModel(
-            name='ModelDeployment',
-            fields=[
-                ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
-                ('container_id', models.CharField(db_index=True, max_length=255, unique=True)),
-                ('container_name', models.CharField(db_index=True, max_length=255)),
-                ('model_name', models.CharField(db_index=True, max_length=255)),
-                ('device', models.CharField(max_length=50)),
-                ('deployed_at', models.DateTimeField(auto_now_add=True, db_index=True)),
-                ('stopped_at', models.DateTimeField(blank=True, null=True)),
-                ('status', models.CharField(db_index=True, default='running', max_length=50)),
-                ('stopped_by_user', models.BooleanField(default=False)),
-                ('port', models.IntegerField(blank=True, null=True)),
-            ],
-            options={
-                'ordering': ['-deployed_at'],
-                'indexes': [models.Index(fields=['status', '-deployed_at'], name='docker_cont_status_a5afde_idx'), models.Index(fields=['model_name', '-deployed_at'], name='docker_cont_model_n_2ecff9_idx')],
-            },
-        ),
-    ]
diff --git a/app/backend/docker_control/migrations/0002_modeldeployment_workflow_log_path.py b/app/backend/docker_control/migrations/0002_modeldeployment_workflow_log_path.py
deleted file mode 100644
index 518dde93..00000000
--- a/app/backend/docker_control/migrations/0002_modeldeployment_workflow_log_path.py
+++ /dev/null
@@ -1,18 +0,0 @@
-# Generated by Django 5.0.4 on 2025-11-12 21:35
-
-from django.db import migrations, models
-
-
-class Migration(migrations.Migration):
-
-    dependencies = [
-        ('docker_control', '0001_initial'),
-    ]
-
-    operations = [
-        migrations.AddField(
-            model_name='modeldeployment',
-            name='workflow_log_path',
-            field=models.CharField(blank=True, help_text='Path to workflow log file from tt-inference-server', max_length=512, null=True),
-        ),
-    ]
diff --git a/app/backend/docker_control/models.py b/app/backend/docker_control/models.py
index a94f60ff..7f6b1f02 100644
--- a/app/backend/docker_control/models.py
+++ b/app/backend/docker_control/models.py
@@ -2,39 +2,6 @@
 #
 # SPDX-FileCopyrightText: © 2024 Tenstorrent AI ULC
 
-from django.db import models
-from django.utils import timezone
+from docker_control.deployment_store import ModelDeployment
 
-
-class ModelDeployment(models.Model):
-    """Track all model deployments with full history"""
-    # Deployment identification
-    container_id = models.CharField(max_length=255, unique=True, db_index=True)
-    container_name = models.CharField(max_length=255, db_index=True)
-    
-    # Model information
-    model_name = models.CharField(max_length=255, db_index=True)
-    device = models.CharField(max_length=50)  # n150, n300, etc.
-    
-    # Deployment metadata
-    deployed_at = models.DateTimeField(auto_now_add=True, db_index=True)
-    stopped_at = models.DateTimeField(null=True, blank=True)
-    
-    # Status tracking
-    status = models.CharField(max_length=50, default="running", db_index=True)
-    # Choices: starting, running, stopped, exited, dead, error
-    stopped_by_user = models.BooleanField(default=False)  # True if user clicked stop/delete
-    
-    # Container details
-    port = models.IntegerField(null=True, blank=True)
-    workflow_log_path = models.CharField(max_length=512, null=True, blank=True, help_text="Path to workflow log file from tt-inference-server")
-    
-    class Meta:
-        ordering = ['-deployed_at']
-        indexes = [
-            models.Index(fields=['status', '-deployed_at']),
-            models.Index(fields=['model_name', '-deployed_at']),
-        ]
-    
-    def __str__(self):
-        return f"{self.model_name} on {self.device} - {self.status}"
+__all__ = ["ModelDeployment"]
diff --git a/app/backend/docker_control/views.py b/app/backend/docker_control/views.py
index 741a8bcf..f9ce640d 100644
--- a/app/backend/docker_control/views.py
+++ b/app/backend/docker_control/views.py
@@ -11,10 +11,11 @@
 from rest_framework.renderers import JSONRenderer
 from django.views.decorators.csrf import csrf_exempt
 from django.utils.decorators import method_decorator
-import json  
+import json
 import shutil
 import subprocess
 import os
+from pathlib import Path
 
 import re
 import os
@@ -43,6 +44,15 @@
 logger = get_logger(__name__)
 logger.info(f"importing {__name__}")
 
+# Build model_name → status lookup from catalog JSON
+_CATALOG_PATH = Path(__file__).parent.parent / "shared_config/models_from_inference_server.json"
+try:
+    _catalog = json.loads(_CATALOG_PATH.read_text())
+    _status_lookup: dict[str, str | None] = {m["model_name"]: m.get("status") for m in _catalog["models"]}
+except Exception:
+    logger.warning(f"Could not load model catalog from {_CATALOG_PATH}; status will be null for all models")
+    _status_lookup = {}
+
 # Track when deployment started
 deployment_start_times = {}  # {job_id: timestamp} - Track when deployment started
 
@@ -188,7 +198,9 @@ def get(self, request, *args, **kwargs):
                 "is_compatible": is_compatible,
                 "compatible_boards": compatible_boards,
                 "model_type": impl.model_type.value,
-                "current_board": current_board
+                "display_model_type": impl.display_model_type,
+                "current_board": current_board,
+                "status": _status_lookup.get(impl.model_name),
             })
         
         return Response(data, status=status.HTTP_200_OK)
@@ -209,8 +221,9 @@ def post(self, request, *args, **kwargs):
         if serializer.is_valid():
             impl_id = request.data.get("model_id")
             weights_id = request.data.get("weights_id")
+            device_id = int(request.data.get("device_id", 0))
             impl = model_implmentations[impl_id]
-            response = run_container(impl, weights_id)
+            response = run_container(impl, weights_id, device_id=device_id)
             
             # Ensure job_id is set for progress tracking
             # Use job_id from API response, or fallback to container_id or container_name
@@ -706,14 +719,7 @@ def get(self, request, model_id):
             logger.info(f"Checking status for image: {image_name}:{image_tag}")
             image_status = check_image_exists(image_name, image_tag)
             logger.info(f"Image status result: {image_status}")
-            
-            # Add pull progress if available
-            if model_id in pull_progress:
-                image_status['pull_in_progress'] = True
-                image_status['progress'] = pull_progress[model_id]
-            else:
-                image_status['pull_in_progress'] = False
-            
+            image_status['pull_in_progress'] = False
             return Response(image_status, status=status.HTTP_200_OK)
         except KeyError:
             logger.warning(f"Model {model_id} not found in model_implementations")
@@ -1172,6 +1178,7 @@ def get(self, request):
                     'container_name': deployment.container_name,
                     'model_name': deployment.model_name,
                     'device': deployment.device,
+                    'device_id': deployment.device_id,
                     'deployed_at': deployment.deployed_at.isoformat() if deployment.deployed_at else None,
                     'stopped_at': deployment.stopped_at.isoformat() if deployment.stopped_at else None,
                     'status': deployment.status,
diff --git a/app/backend/model_control/model_utils.py b/app/backend/model_control/model_utils.py
index 4e91e214..ad44619e 100644
--- a/app/backend/model_control/model_utils.py
+++ b/app/backend/model_control/model_utils.py
@@ -26,6 +26,53 @@
 encoded_jwt = jwt.encode(json_payload, backend_config.jwt_secret, algorithm="HS256")
 AUTH_TOKEN = os.getenv('CLOUD_CHAT_UI_AUTH_TOKEN', '')
 
+def messages_to_prompt(messages: list) -> str:
+    """Convert chat messages list to a plain text prompt for base/completion models."""
+    parts = []
+    for msg in messages:
+        role = msg.get("role", "user")
+        content = msg.get("content", "")
+        if role == "system":
+            parts.append(content)
+        elif role == "user":
+            parts.append(f"User: {content}")
+        elif role == "assistant":
+            parts.append(f"Assistant: {content}")
+    parts.append("Assistant:")
+    return "\n\n".join(parts)
+
+
+def get_model_name_from_container(internal_url: str, fallback: str) -> str:
+    """Query vLLM /v1/models to get the exact model name loaded in the container.
+
+    Args:
+        internal_url: Raw internal URL from deploy cache (e.g. "container:7000/v1/chat/completions")
+        fallback: Value to return if the query fails (typically hf_model_id)
+
+    Returns:
+        The actual model name reported by vLLM, or fallback on any error.
+    """
+    try:
+        # Strip the route path to get just host:port
+        # e.g. "container:7000/v1/chat/completions" -> "container:7000"
+        base = internal_url.split("/")[0]
+        models_url = f"http://{base}/v1/models"
+        headers = {"Authorization": f"Bearer {encoded_jwt}"}
+        response = requests.get(models_url, headers=headers, timeout=3)
+        if response.status_code == 200:
+            model_id = response.json()["data"][0]["id"]
+            logger.info(f"Resolved actual model name from /v1/models: {model_id}")
+            return model_id
+        else:
+            logger.warning(
+                f"GET {models_url} returned {response.status_code}, using fallback: {fallback}"
+            )
+            return fallback
+    except Exception as e:
+        logger.warning(f"Failed to query /v1/models ({e}), using fallback: {fallback}")
+        return fallback
+
+
 def get_deploy_cache():
     # the cache is initialized when by docker_control is imported
     def get_all_records():
@@ -173,7 +220,7 @@ def stream_to_cloud_model(url, json_data):
         json_data["top_k"] = int(top_k) if top_k is not None else 20
         json_data["top_p"] = float(top_p) if top_p is not None else 0.9
         json_data["max_tokens"] = int(max_tokens) if max_tokens is not None else 512
-        json_data["stream_options"] = {"include_usage": True, "continuous_usage_stats": True}
+        json_data["stream_options"] = {"include_usage": True}
 
         # Log final parameters being used
         logger.info("=== Final Model Parameters ===")
@@ -231,7 +278,7 @@ def stream_to_cloud_model(url, json_data):
                                     chunk_dict = json.loads(sub_chunk)
                                     logger.info(f"Successfully parsed JSON: {chunk_dict}")
 
-                                    usage = chunk_dict.get("usage", {})
+                                    usage = chunk_dict.get("usage") or {}
                                     completion_tokens = usage.get("completion_tokens", 0)
                                     prompt_tokens = usage.get("prompt_tokens", 0)
                                     logger.info(f"Usage info: {usage}, completion tokens: {completion_tokens}")
@@ -314,7 +361,7 @@ def stream_response_from_external_api(url, json_data):
     json_data["top_k"] = int(top_k) if top_k is not None else 20
     json_data["top_p"] = float(top_p) if top_p is not None else 0.9
     json_data["max_tokens"] = int(max_tokens) if max_tokens is not None else 512
-    json_data["stream_options"] = {"include_usage": True, "continuous_usage_stats": True}
+    json_data["stream_options"] = {"include_usage": True}
 
     # Log final parameters being used
     logger.info("=== Final Model Parameters ===")
@@ -366,7 +413,7 @@ def stream_response_from_external_api(url, json_data):
 
                     elif new_chunk != "":
                         chunk_dict = json.loads(new_chunk)
-                        usage = chunk_dict.get("usage", {})
+                        usage = chunk_dict.get("usage") or {}
                         completion_tokens = usage.get("completion_tokens", 0)
                         prompt_tokens = usage.get("prompt_tokens", 0)
 
@@ -383,6 +430,10 @@ def stream_response_from_external_api(url, json_data):
 
             logger.info("stream_response_from_external done")
 
+    except requests.exceptions.HTTPError as e:
+        body = e.response.text if e.response is not None else "(no body)"
+        logger.error(f"HTTPError {e.response.status_code}: {body}")
+        yield f"error: {str(e)}"
     except requests.RequestException as e:
         logger.error(f"RequestException: {str(e)}")
         yield f"error: {str(e)}"
diff --git a/app/backend/model_control/pipeline_views.py b/app/backend/model_control/pipeline_views.py
new file mode 100644
index 00000000..dea491ef
--- /dev/null
+++ b/app/backend/model_control/pipeline_views.py
@@ -0,0 +1,167 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: © 2026 Tenstorrent AI ULC
+
+"""
+Voice pipeline view: Whisper STT → LLM → TTS (optional).
+Accepts multipart/form-data and streams SSE events to the client.
+"""
+
+import base64
+import json
+import time
+
+import requests
+from django.http import StreamingHttpResponse
+from rest_framework.views import APIView
+
+from model_control.model_utils import (
+    encoded_jwt,
+    get_deploy_cache,
+    stream_response_from_external_api,
+)
+from shared_config.logger_config import get_logger
+
+logger = get_logger(__name__)
+
+
+class VoicePipelineView(APIView):
+    """
+    POST /models-api/pipeline/voice/
+
+    Multipart fields:
+        audio_file        – audio blob
+        whisper_deploy_id – deploy_id of running Whisper
+        llm_deploy_id     – deploy_id of running LLM
+        tts_deploy_id     – (optional) deploy_id of running speecht5_tts
+        system_prompt     – (optional) string
+    """
+
+    def post(self, request, *args, **kwargs):
+        audio_file = request.FILES.get("audio_file")
+        whisper_deploy_id = request.data.get("whisper_deploy_id")
+        llm_deploy_id = request.data.get("llm_deploy_id")
+        tts_deploy_id = request.data.get("tts_deploy_id")
+        system_prompt = request.data.get(
+            "system_prompt",
+            "You are a helpful assistant. Be concise.",
+        )
+
+        if not audio_file:
+            from rest_framework.response import Response
+            from rest_framework import status
+            return Response(
+                {"error": "audio_file is required"},
+                status=status.HTTP_400_BAD_REQUEST,
+            )
+        if not whisper_deploy_id or not llm_deploy_id:
+            from rest_framework.response import Response
+            from rest_framework import status
+            return Response(
+                {"error": "whisper_deploy_id and llm_deploy_id are required"},
+                status=status.HTTP_400_BAD_REQUEST,
+            )
+
+        def event_stream():
+            headers = {"Authorization": f"Bearer {encoded_jwt}"}
+            deploy_cache = get_deploy_cache()
+
+            # ------------------------------------------------------------------
+            # Step 1: STT (Whisper)
+            # ------------------------------------------------------------------
+            try:
+                whisper_deploy = deploy_cache[whisper_deploy_id]
+                whisper_url = "http://" + whisper_deploy["internal_url"]
+                file_payload = {
+                    "file": (audio_file.name, audio_file, audio_file.content_type)
+                }
+                stt_resp = requests.post(
+                    whisper_url, files=file_payload, headers=headers, timeout=60
+                )
+                stt_resp.raise_for_status()
+                transcript = stt_resp.json().get("text", "")
+                yield f"data: {json.dumps({'type': 'transcript', 'text': transcript})}\n\n"
+            except Exception as exc:
+                logger.error(f"STT step failed: {exc}")
+                yield f"data: {json.dumps({'type': 'error', 'stage': 'stt', 'message': str(exc)})}\n\n"
+                return
+
+            if not transcript:
+                yield f"data: {json.dumps({'type': 'error', 'stage': 'stt', 'message': 'Empty transcript'})}\n\n"
+                return
+
+            # ------------------------------------------------------------------
+            # Step 2: LLM streaming
+            # ------------------------------------------------------------------
+            llm_deploy = deploy_cache[llm_deploy_id]
+            llm_url = "http://" + llm_deploy["internal_url"]
+            hf_model_id = llm_deploy["model_impl"].hf_model_id
+
+            messages = []
+            if system_prompt:
+                messages.append({"role": "system", "content": system_prompt})
+            messages.append({"role": "user", "content": transcript})
+
+            llm_payload = {
+                "model": hf_model_id,
+                "messages": messages,
+                "stream": True,
+                "max_tokens": 512,
+            }
+
+            llm_full_text = ""
+            try:
+                for chunk in stream_response_from_external_api(llm_url, llm_payload):
+                    if isinstance(chunk, bytes):
+                        chunk = chunk.decode("utf-8")
+                    llm_full_text += chunk
+                    yield f"data: {json.dumps({'type': 'llm_chunk', 'text': chunk})}\n\n"
+            except Exception as exc:
+                logger.error(f"LLM step failed: {exc}")
+                yield f"data: {json.dumps({'type': 'error', 'stage': 'llm', 'message': str(exc)})}\n\n"
+                return
+
+            # ------------------------------------------------------------------
+            # Step 3: TTS (optional)
+            # ------------------------------------------------------------------
+            if tts_deploy_id and llm_full_text.strip():
+                try:
+                    tts_deploy = deploy_cache[tts_deploy_id]
+                    tts_url = "http://" + tts_deploy["internal_url"]
+
+                    tts_resp = requests.post(
+                        tts_url,
+                        json={"text": llm_full_text.strip()},
+                        headers=headers,
+                        timeout=30,
+                    )
+                    tts_resp.raise_for_status()
+
+                    task_id = tts_resp.json().get("task_id")
+                    status_url = tts_url.replace("/enqueue", f"/status/{task_id}")
+
+                    # Poll for completion
+                    for _ in range(120):
+                        st = requests.get(status_url, headers=headers, timeout=10)
+                        if st.status_code != 404 and st.json().get("status") == "Completed":
+                            break
+                        time.sleep(1)
+
+                    audio_url = tts_url.replace("/enqueue", f"/fetch_audio/{task_id}")
+                    audio_resp = requests.get(audio_url, headers=headers, timeout=30)
+                    audio_resp.raise_for_status()
+
+                    audio_b64 = base64.b64encode(audio_resp.content).decode("utf-8")
+                    content_type = audio_resp.headers.get("Content-Type", "audio/wav")
+                    data_uri = f"data:{content_type};base64,{audio_b64}"
+                    yield f"data: {json.dumps({'type': 'audio_url', 'url': data_uri})}\n\n"
+                except Exception as exc:
+                    logger.error(f"TTS step failed: {exc}")
+                    yield f"data: {json.dumps({'type': 'error', 'stage': 'tts', 'message': str(exc)})}\n\n"
+                    # Don't abort — transcript and LLM response were already sent
+
+            yield f"data: {json.dumps({'type': 'done'})}\n\n"
+
+        response = StreamingHttpResponse(event_stream(), content_type="text/event-stream")
+        response["Cache-Control"] = "no-cache"
+        response["X-Accel-Buffering"] = "no"
+        return response
diff --git a/app/backend/model_control/urls.py b/app/backend/model_control/urls.py
index 158dfde3..74590b91 100644
--- a/app/backend/model_control/urls.py
+++ b/app/backend/model_control/urls.py
@@ -5,6 +5,7 @@
 # model_control/urls.py
 from django.urls import path
 from . import views
+from .pipeline_views import VoicePipelineView
 
 urlpatterns = [
     path("inference/", views.InferenceView.as_view()),
@@ -18,6 +19,8 @@
     path("object-detection-cloud/", views.ObjectDetectionInferenceCloudView.as_view()),
     path("speech-recognition/", views.SpeechRecognitionInferenceView.as_view()),
     path("speech-recognition-cloud/", views.SpeechRecognitionInferenceCloudView.as_view()),
+    path("tts/", views.TtsInferenceView.as_view()),
+    path("pipeline/voice/", VoicePipelineView.as_view()),
     path("health/", views.ModelHealthView.as_view()),
     path("inference_cloud/", views.InferenceCloudView.as_view()),
     path("logs/<str:container_id>/", views.ContainerLogsView.as_view(), name="container-logs"),
diff --git a/app/backend/model_control/views.py b/app/backend/model_control/views.py
index 64bdc46b..6c44d110 100644
--- a/app/backend/model_control/views.py
+++ b/app/backend/model_control/views.py
@@ -42,6 +42,8 @@ def select_renderer(self, request, renderers, format_suffix):
 from model_control.model_utils import (
     encoded_jwt,
     get_deploy_cache,
+    get_model_name_from_container,
+    messages_to_prompt,
     stream_response_from_external_api,
     stream_response_from_agent_api,
     health_check,
@@ -85,8 +87,18 @@ def post(self, request, *args, **kwargs):
             internal_url = "http://" + deploy["internal_url"]
             logger.info(f"internal_url:= {internal_url}")
             logger.info(f"using vllm model:= {deploy["model_impl"].model_name}")
-            data["model"] = deploy["model_impl"].hf_model_id
-            
+            data["model"] = get_model_name_from_container(
+                deploy["internal_url"], fallback=deploy["model_impl"].hf_model_id
+            )
+
+            # Route base/completion models to /v1/completions with a plain prompt
+            service_route = deploy["model_impl"].service_route
+            logger.info(f"service_route:= {service_route}")
+            if service_route == "/v1/completions":
+                messages = data.pop("messages", [])
+                data["prompt"] = messages_to_prompt(messages)
+                data.pop("stream_options", None)
+
             # Create a generator that can be cancelled
             def generate_response():
                 try:
@@ -116,7 +128,9 @@ def post(self, request, *agrs, **kwargs):
         if deploy_id and deploy_id in deploy_cache:
             deploy = deploy_cache[deploy_id]
             logger.info(f"using vllm model:= {deploy['model_impl'].model_name}")
-            data["model"] = deploy["model_impl"].hf_model_id
+            data["model"] = get_model_name_from_container(
+                deploy["internal_url"], fallback=deploy["model_impl"].hf_model_id
+            )
         else:
             logger.info("No valid deployment found, proceeding with agent-only mode (cloud LLM)")
             # Remove deploy_id from data since it's not needed for agent
@@ -615,6 +629,57 @@ def post(self, request, *args, **kwargs):
 
         return Response(inference_data.json(), status=status.HTTP_200_OK)
 
+class TtsInferenceView(APIView):
+    """Text-to-speech inference: POST text → /enqueue → poll → return audio blob."""
+    def post(self, request, *args, **kwargs):
+        data = request.data
+        logger.info(f"{self.__class__.__name__} data:={data}")
+        serializer = InferenceSerializer(data=data)
+        if serializer.is_valid():
+            deploy_id = data.get("deploy_id")
+            text = data.get("text") or data.get("prompt")
+            if not text:
+                return Response({"error": "text is required"}, status=status.HTTP_400_BAD_REQUEST)
+            deploy = get_deploy_cache()[deploy_id]
+            internal_url = "http://" + deploy["internal_url"]
+            try:
+                headers = {"Authorization": f"Bearer {encoded_jwt}"}
+                inference_data = requests.post(internal_url, json={"text": text}, headers=headers, timeout=30)
+                inference_data.raise_for_status()
+
+                # Poll status until completed
+                task_id = inference_data.json().get("task_id")
+                get_status_url = internal_url.replace("/enqueue", f"/status/{task_id}")
+                ready = False
+                for _ in range(120):  # up to ~2 minutes
+                    status_resp = requests.get(get_status_url, headers=headers, timeout=10)
+                    if status_resp.status_code != status.HTTP_404_NOT_FOUND:
+                        status_resp.raise_for_status()
+                        if status_resp.json().get("status") == "Completed":
+                            ready = True
+                            break
+                    time.sleep(1)
+
+                if not ready:
+                    return Response({"error": "TTS task timed out"}, status=status.HTTP_504_GATEWAY_TIMEOUT)
+
+                # Fetch audio result
+                get_audio_url = internal_url.replace("/enqueue", f"/fetch_audio/{task_id}")
+                audio_resp = requests.get(get_audio_url, headers=headers, stream=True, timeout=30)
+                audio_resp.raise_for_status()
+
+                content_type = audio_resp.headers.get("Content-Type", "audio/wav")
+                django_response = HttpResponse(audio_resp.content, content_type=content_type)
+                django_response["Content-Disposition"] = "attachment; filename=tts_output.wav"
+                return django_response
+
+            except requests.exceptions.HTTPError as http_err:
+                logger.error(f"TTS HTTP error: {http_err}")
+                return Response(status=status.HTTP_500_INTERNAL_SERVER_ERROR)
+        else:
+            return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST)
+
+
 class ContainerLogsView(View):
     # Define event detection configuration before the get method
     SIMPLE_EVENT_KEYWORDS = [
diff --git a/app/backend/shared_config/model_config.py b/app/backend/shared_config/model_config.py
index 9d9a7e93..22033b21 100644
--- a/app/backend/shared_config/model_config.py
+++ b/app/backend/shared_config/model_config.py
@@ -2,6 +2,7 @@
 #
 # SPDX-FileCopyrightText: © 2024 Tenstorrent AI ULC
 
+import json
 import os
 from dataclasses import dataclass, asdict
 from typing import Set, Dict, Any, Union
@@ -11,7 +12,6 @@
 from shared_config.backend_config import backend_config
 from shared_config.setup_config import SetupTypes
 from shared_config.model_type_config import ModelTypes
-from shared_config.model_type_config import ModelTypes
 from shared_config.logger_config import get_logger
 
 logger = get_logger(__name__)
@@ -62,6 +62,7 @@ class ModelImpl:
     service_port: int = 7000
     env_file: str = ""
     health_route: str = "/health"
+    display_model_type: str = "LLM"
 
     def __post_init__(self):
         # _init methods compute values that are dependent on other values
@@ -216,203 +217,136 @@ def base_docker_config():
     }
 
 
-# model_ids are unique strings to define a model, they could be uuids but
-# using friendly strings prefixed with id_ is more helpful for debugging
+# ---------------------------------------------------------------------------
+# JSON-based model loader
+# ---------------------------------------------------------------------------
+
+CATALOG_JSON = Path(__file__).parent / "models_from_inference_server.json"
+
+# device_type strings in the catalog → DeviceConfigurations member names
+# (only names that actually exist in the enum; others are skipped)
+_CATALOG_DEVICE_MAP = {
+    "N150": "N150",
+    "N300": "N300",
+    "T3K": "T3K",
+    "N150X4": "N150X4",
+    "P100": "P100",
+    "P150": "P150",
+    "P150X4": "P150X4",
+    "P150X8": "P150X8",
+    "GALAXY": "GALAXY",
+    "GALAXY_T3K": "GALAXY_T3K",
+}
+
+
+def load_model_implementations_from_json(json_path: Path) -> list:
+    with open(json_path) as f:
+        catalog = json.load(f)
+    impls = []
+    for entry in catalog["models"]:
+        docker_image = entry.get("docker_image") or ""
+        if ":" in docker_image:
+            image_name, image_tag = docker_image.rsplit(":", 1)
+        else:
+            image_name, image_tag = docker_image, "latest"
 
-# Helper device configuration sets for easier management
-N150_N300 = {DeviceConfigurations.N150, DeviceConfigurations.N150_WH_ARCH_YAML, DeviceConfigurations.N300, DeviceConfigurations.N300_WH_ARCH_YAML}
-ALL_BOARDS = {DeviceConfigurations.N150, DeviceConfigurations.N150_WH_ARCH_YAML, DeviceConfigurations.N300, DeviceConfigurations.N300_WH_ARCH_YAML, DeviceConfigurations.N300x4, DeviceConfigurations.N300x4_WH_ARCH_YAML}
-T3000_ONLY = {DeviceConfigurations.N300x4, DeviceConfigurations.N300x4_WH_ARCH_YAML}
+        device_configs = {
+            DeviceConfigurations[_CATALOG_DEVICE_MAP[d]]
+            for d in entry.get("device_configurations", [])
+            if d in _CATALOG_DEVICE_MAP
+        }
 
-model_implmentations_list = [
-    # Speech Recognition - Can run on N150 and N300
-    ModelImpl(
-        model_name="Whisper-Distil-Large-v3",
-        model_id="id_whisper_distil_large_v3_v0.1.0",
-        image_name="ghcr.io/tenstorrent/tt-inference-server/tt-metal-whisper-distil-large-v3-dev",
-        image_tag="v0.0.1-tt-metal-1a1a9e2bb102",
-        device_configurations=ALL_BOARDS,  # Can run on N150 and N300
-        docker_config=base_docker_config(),
-        shm_size="32G",
-        service_port=7000,
-        service_route="/inference",
-        health_route="/",
-        setup_type=SetupTypes.TT_INFERENCE_SERVER,
-        model_type=ModelTypes.SPEECH_RECOGNITION,
-    ),
-    # TODO: add this model back in when its in tt-inference-server-main branch
-    # Image Generation - Can run on N150 and N300
-    # ModelImpl(
-    #     model_name="Stable-Diffusion-3.5-medium",
-    #     model_id="id_stable_diffusion_3.5_mediumv0.1.0",
-    #     image_name="ghcr.io/tenstorrent/tt-inference-server/tt-metal-stable-diffusion-3.5-src-base",
-    #     image_tag="v0.0.1-tt-metal-a0560feb3eed",
-    #     device_configurations=ALL_BOARDS,  # Can run on N150 and N300
-    #     docker_config=base_docker_config(),
-    #     shm_size="32G",
-    #     service_port=7000,
-    #     service_route="/enqueue",
-    #     health_route="/",
-    #     setup_type=SetupTypes.TT_INFERENCE_SERVER,
-    #     model_type=ModelTypes.IMAGE_GENERATION,
-    # ),
-
-    # Image Generation - Can run on N150 and N300
-    ModelImpl(
-        model_name="Stable-Diffusion-1.4",
-        model_id="id_stable_diffusionv0.1.0",
-        image_name="ghcr.io/tenstorrent/tt-inference-server/tt-metal-stable-diffusion-1.4-src-base",
-        image_tag="v0.0.1-tt-metal-cc8b4e1dac99",
-        device_configurations=ALL_BOARDS,  # Can run on N150 and N300
-        docker_config=base_docker_config(),
-        shm_size="32G",
-        service_port=7000,
-        service_route="/enqueue",
-        health_route="/",
-        setup_type=SetupTypes.TT_INFERENCE_SERVER,
-        model_type=ModelTypes.IMAGE_GENERATION,
-    ),
+        try:
+            model_type = ModelTypes[entry["model_type"]]
+        except KeyError:
+            model_type = ModelTypes.CHAT
+
+        try:
+            setup_type = SetupTypes[entry["setup_type"]]
+        except KeyError:
+            setup_type = SetupTypes.TT_INFERENCE_SERVER
+
+        cfg = base_docker_config()
+        cfg["environment"].update(entry.get("env_vars") or {})
+
+        impl = ModelImpl(
+            model_name=entry["model_name"],
+            hf_model_id=entry.get("hf_model_id"),
+            image_name=image_name,
+            image_tag=image_tag,
+            device_configurations=device_configs,
+            docker_config=cfg,
+            service_route=entry["service_route"],
+            setup_type=setup_type,
+            model_type=model_type,
+            version=entry.get("version", "0.0.1"),
+            shm_size=entry.get("shm_size", "32G"),
+            display_model_type=entry.get("display_model_type", "LLM"),
+        )
+        impls.append(impl)
+    return impls
 
-    # Object Detection - Can run on all boards
+
+# ---------------------------------------------------------------------------
+# Hardcoded models NOT present in tt-inference-server catalog
+# ---------------------------------------------------------------------------
+
+_ALL_WH_BOARDS = {
+    DeviceConfigurations.N150,
+    DeviceConfigurations.N150_WH_ARCH_YAML,
+    DeviceConfigurations.N300,
+    DeviceConfigurations.N300_WH_ARCH_YAML,
+    DeviceConfigurations.N300x4,
+    DeviceConfigurations.N300x4_WH_ARCH_YAML,
+}
+
+_hardcoded_impls = [
+    # Object Detection - legacy YOLOv4 (not in tt-inference-server catalog)
     ModelImpl(
         model_name="YOLOv4",
         model_id="id_yolov4v0.0.1",
         image_name="ghcr.io/tenstorrent/tt-inference-server/tt-metal-yolov4-src-base",
         image_tag="v0.0.1-tt-metal-65d246482b3f",
-        device_configurations=ALL_BOARDS,  # Can run on all boards
+        device_configurations=_ALL_WH_BOARDS,
         docker_config=base_docker_config(),
         shm_size="32G",
         service_port=7000,
         service_route="/objdetection_v2",
         setup_type=SetupTypes.NO_SETUP,
-        model_type=ModelTypes.OBJECT_DETECTION
-    ),
-
-    # Mock Chat 
-    # TODO: currently not working.
-    # remove this model for now until its in tt-inference-server-main branch
-    #  TODO: add / make a new mock model
-    # ModelImpl(
-    #     hf_model_id="meta-llama/Llama-3.1-70B-Instruct",
-    #     model_name="Mock-Llama-3.1-70B-Instruct",
-    #     model_id="id_mock_vllm_modelv0.0.1",
-    #     image_name="ghcr.io/tenstorrent/tt-inference-server/mock.vllm.openai.api",
-    #     image_tag="v0.0.1-tt-metal-385904186f81-384f1790c3be",
-    #     device_configurations={DeviceConfigurations.CPU},
-    #     docker_config=base_docker_config(),
-    #     shm_size="1G",
-    #     service_port=7000,
-    #     service_route="/v1/chat/completions",
-    #     setup_type=SetupTypes.MAKE_VOLUMES,
-    #     model_type=ModelTypes.MOCK
-    # ),
-
-    # --- Chat Models ---
-
-    # 1B, 3B, 8B, 11B models - Can run on all boards
-    ModelImpl(
-        hf_model_id="meta-llama/Llama-3.2-1B-Instruct",
-        image_name="ghcr.io/tenstorrent/tt-inference-server/vllm-tt-metal-src-release-ubuntu-20.04-amd64",
-        image_tag="0.0.4-v0.56.0-rc47-e2e0002ac7dc",
-        device_configurations=ALL_BOARDS,  # Can run on all boards
-        docker_config=base_docker_config(),
-        service_route="/v1/chat/completions",
-        setup_type=SetupTypes.TT_INFERENCE_SERVER,
-        model_type=ModelTypes.CHAT
-
-    ),
-    ModelImpl(
-        hf_model_id="meta-llama/Llama-3.2-3B-Instruct",
-        image_name="ghcr.io/tenstorrent/tt-inference-server/vllm-tt-metal-src-release-ubuntu-20.04-amd64",
-        image_tag="0.0.4-v0.56.0-rc47-e2e0002ac7dc",
-        device_configurations=ALL_BOARDS,  # Can run on all boards
-        docker_config=base_docker_config(),
-        service_route="/v1/chat/completions",
-        setup_type=SetupTypes.TT_INFERENCE_SERVER,
-        model_type=ModelTypes.CHAT
-  
-    ),
-    ModelImpl(
-        hf_model_id="meta-llama/Llama-3.1-8B-Instruct",
-        image_name="ghcr.io/tenstorrent/tt-inference-server/vllm-tt-metal-src-release-ubuntu-20.04-amd64",
-        image_tag="0.0.4-v0.56.0-rc47-e2e0002ac7dc",
-        device_configurations=ALL_BOARDS | {DeviceConfigurations.P300Cx2},
-        docker_config=base_docker_config(),
-        service_route="/v1/chat/completions",
-        setup_type=SetupTypes.TT_INFERENCE_SERVER,
-        model_type=ModelTypes.CHAT
-
+        model_type=ModelTypes.OBJECT_DETECTION,
+        display_model_type="CNN",
     ),
-    # TODO: add this model back in when its in tt-inference-server-main branch
-    # ModelImpl(
-    #     hf_model_id="meta-llama/Llama-3.2-11B-Vision-Instruct",
-    #     image_name="ghcr.io/tenstorrent/tt-inference-server/vllm-tt-metal-src-release-ubuntu-20.04-amd64",
-    #     image_tag="0.0.4-v0.56.0-rc47-e2e0002ac7dc",
-    #     device_configurations=ALL_BOARDS,  # Can run on all boards
-    #     docker_config=base_docker_config(),
-    #     service_route="/v1/chat/completions",
-    #     setup_type=SetupTypes.TT_INFERENCE_SERVER,
-    #     model_type=ModelTypes.CHAT
- 
-    # ),
-
-    # 32B models - T3000 and P300Cx2
+    # Legacy Stable-Diffusion-1.4 (not in tt-inference-server catalog)
     ModelImpl(
-        hf_model_id="Qwen/Qwen3-32B",
-        image_name="ghcr.io/tenstorrent/tt-inference-server/vllm-tt-metal-src-release-ubuntu-20.04-amd64",
-        image_tag="0.0.4-v0.56.0-rc47-e2e0002ac7dc",
-        device_configurations={DeviceConfigurations.N300x4, DeviceConfigurations.N300x4_WH_ARCH_YAML, DeviceConfigurations.P300Cx2},
-        docker_config=base_docker_config(),
-        service_route="/v1/chat/completions",
-        setup_type=SetupTypes.TT_INFERENCE_SERVER,
-        model_type=ModelTypes.CHAT
-    ),
-
-    # 70B models - Only T3000
-
-    ModelImpl(
-        hf_model_id="meta-llama/Llama-3.1-70B-Instruct",
-        image_name="ghcr.io/tenstorrent/tt-inference-server/vllm-tt-metal-src-release-ubuntu-20.04-amd64",
-        image_tag="0.0.4-v0.56.0-rc47-e2e0002ac7dc",
-        device_configurations=T3000_ONLY,  # Only T3000
+        model_name="Stable-Diffusion-1.4",
+        model_id="id_stable_diffusionv0.1.0",
+        image_name="ghcr.io/tenstorrent/tt-inference-server/tt-metal-stable-diffusion-1.4-src-base",
+        image_tag="v0.0.1-tt-metal-cc8b4e1dac99",
+        device_configurations=_ALL_WH_BOARDS,
         docker_config=base_docker_config(),
         shm_size="32G",
         service_port=7000,
-        service_route="/v1/chat/completions",
-        env_file=os.environ.get("VLLM_LLAMA31_ENV_FILE"),
-        setup_type=SetupTypes.TT_INFERENCE_SERVER,
-        model_type=ModelTypes.CHAT
-    ),
-    # ModelImpl(
-    #     hf_model_id="meta-llama/Llama-3.1-70B-Instruct",
-    #     image_name="ghcr.io/tenstorrent/tt-inference-server/vllm-tt-metal-src-release-ubuntu-20.04-amd64",
-    #     image_tag="0.0.4-v0.56.0-rc47-e2e0002ac7dc",
-    #     device_configurations=T3000_ONLY,  # Only T3000
-    #     docker_config=base_docker_config(),
-    #     service_route="/v1/chat/completions",
-    #     setup_type=SetupTypes.TT_INFERENCE_SERVER,
-    #     model_type=ModelTypes.CHAT
-    # ),
-    ModelImpl(
-        hf_model_id="meta-llama/Llama-3.3-70B-Instruct",
-        image_name="ghcr.io/tenstorrent/tt-inference-server/vllm-tt-metal-src-release-ubuntu-20.04-amd64",
-        image_tag="0.0.4-v0.56.0-rc47-e2e0002ac7dc",
-        device_configurations=T3000_ONLY | {DeviceConfigurations.P300Cx2},
-        docker_config=base_docker_config(),
-        service_route="/v1/chat/completions",
+        service_route="/enqueue",
+        health_route="/",
         setup_type=SetupTypes.TT_INFERENCE_SERVER,
-        model_type=ModelTypes.CHAT
+        model_type=ModelTypes.IMAGE_GENERATION,
+        display_model_type="IMAGE",
     ),
-    #! Add new model vLLM model implementations here
 ]
 
+
 def validate_model_implemenation_config(impl):
     # no / in model_id strings, model_id will be used in path names
-    assert not "/" in impl.model_id
+    assert "/" not in impl.model_id
+
+
+# ---------------------------------------------------------------------------
+# Build final model_implmentations dict
+# ---------------------------------------------------------------------------
 
+_json_impls = load_model_implementations_from_json(CATALOG_JSON)
 
-# build and validate the model_implmentations config
 model_implmentations = {}
-for impl in model_implmentations_list:
+for impl in _json_impls + _hardcoded_impls:
     validate_model_implemenation_config(impl)
     model_implmentations[impl.model_id] = impl
\ No newline at end of file
diff --git a/app/backend/shared_config/model_type_config.py b/app/backend/shared_config/model_type_config.py
index 769754c1..a5d0e584 100644
--- a/app/backend/shared_config/model_type_config.py
+++ b/app/backend/shared_config/model_type_config.py
@@ -9,4 +9,9 @@ class ModelTypes(Enum):
     CHAT = "chat"
     OBJECT_DETECTION = "object_detection"
     IMAGE_GENERATION = "image_generation"
-    SPEECH_RECOGNITION = "speech_recognition"
\ No newline at end of file
+    SPEECH_RECOGNITION = "speech_recognition"
+    VLM = "vlm"
+    TTS = "tts"
+    VIDEO = "video_generation"
+    EMBEDDING = "embedding"
+    CNN = "cnn"
\ No newline at end of file
diff --git a/app/backend/shared_config/models_from_inference_server.json b/app/backend/shared_config/models_from_inference_server.json
new file mode 100644
index 00000000..4a678104
--- /dev/null
+++ b/app/backend/shared_config/models_from_inference_server.json
@@ -0,0 +1,1740 @@
+{
+  "source": {
+    "artifact_version": "0.9.0",
+    "generated_at": "2026-02-24T23:09:31.324226+00:00"
+  },
+  "total_models": 60,
+  "models": [
+    {
+      "model_name": "DeepSeek-R1-Distill-Llama-70B",
+      "model_type": "CHAT",
+      "display_model_type": "LLM",
+      "device_configurations": [
+        "GALAXY",
+        "GALAXY_T3K",
+        "P150X4",
+        "P150X8",
+        "T3K"
+      ],
+      "hf_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B",
+      "inference_engine": "vLLM",
+      "supported_modalities": [
+        "text"
+      ],
+      "status": "COMPLETE",
+      "version": "0.9.0",
+      "docker_image": "ghcr.io/tenstorrent/tt-inference-server/vllm-tt-metal-src-release-ubuntu-22.04-amd64:0.9.0-65718bb-409b1cd",
+      "service_route": "/v1/chat/completions",
+      "shm_size": "32G",
+      "setup_type": "TT_INFERENCE_SERVER",
+      "env_vars": {
+        "VLLM_CONFIGURE_LOGGING": "1",
+        "VLLM_RPC_TIMEOUT": "900000",
+        "VLLM_TARGET_DEVICE": "tt",
+        "TORCHDYNAMO_DISABLE": "1"
+      },
+      "param_count": 70
+    },
+    {
+      "model_name": "distil-large-v3",
+      "model_type": "SPEECH_RECOGNITION",
+      "display_model_type": "AUDIO",
+      "device_configurations": [
+        "GALAXY",
+        "N150",
+        "T3K"
+      ],
+      "hf_model_id": "distil-whisper/distil-large-v3",
+      "inference_engine": "media",
+      "supported_modalities": [
+        "text"
+      ],
+      "status": "COMPLETE",
+      "version": "0.9.0",
+      "docker_image": "ghcr.io/tenstorrent/tt-media-inference-server:0.9.0-65718bb",
+      "service_route": "/enqueue",
+      "shm_size": "32G",
+      "setup_type": "TT_INFERENCE_SERVER",
+      "env_vars": {
+        "VLLM_CONFIGURE_LOGGING": "1",
+        "VLLM_RPC_TIMEOUT": "900000",
+        "VLLM_TARGET_DEVICE": "tt",
+        "TORCHDYNAMO_DISABLE": "1"
+      },
+      "param_count": null
+    },
+    {
+      "model_name": "FLUX.1-dev",
+      "model_type": "IMAGE_GENERATION",
+      "display_model_type": "IMAGE",
+      "device_configurations": [
+        "GALAXY",
+        "T3K"
+      ],
+      "hf_model_id": "black-forest-labs/FLUX.1-dev",
+      "inference_engine": "media",
+      "supported_modalities": [
+        "text"
+      ],
+      "status": "COMPLETE",
+      "version": "0.9.0",
+      "docker_image": "ghcr.io/tenstorrent/tt-media-inference-server:0.9.0-c180ef7",
+      "service_route": "/enqueue",
+      "shm_size": "32G",
+      "setup_type": "TT_INFERENCE_SERVER",
+      "env_vars": {
+        "VLLM_CONFIGURE_LOGGING": "1",
+        "VLLM_RPC_TIMEOUT": "900000",
+        "VLLM_TARGET_DEVICE": "tt",
+        "TORCHDYNAMO_DISABLE": "1"
+      },
+      "param_count": null
+    },
+    {
+      "model_name": "FLUX.1-schnell",
+      "model_type": "IMAGE_GENERATION",
+      "display_model_type": "IMAGE",
+      "device_configurations": [
+        "GALAXY",
+        "T3K"
+      ],
+      "hf_model_id": "black-forest-labs/FLUX.1-schnell",
+      "inference_engine": "media",
+      "supported_modalities": [
+        "text"
+      ],
+      "status": "COMPLETE",
+      "version": "0.9.0",
+      "docker_image": "ghcr.io/tenstorrent/tt-media-inference-server:0.9.0-c180ef7",
+      "service_route": "/enqueue",
+      "shm_size": "32G",
+      "setup_type": "TT_INFERENCE_SERVER",
+      "env_vars": {
+        "VLLM_CONFIGURE_LOGGING": "1",
+        "VLLM_RPC_TIMEOUT": "900000",
+        "VLLM_TARGET_DEVICE": "tt",
+        "TORCHDYNAMO_DISABLE": "1"
+      },
+      "param_count": null
+    },
+    {
+      "model_name": "Llama-3.1-70B",
+      "model_type": "CHAT",
+      "display_model_type": "LLM",
+      "device_configurations": [
+        "GALAXY",
+        "GALAXY_T3K",
+        "P150X4",
+        "P150X8",
+        "T3K"
+      ],
+      "hf_model_id": "meta-llama/Llama-3.1-70B",
+      "inference_engine": "vLLM",
+      "supported_modalities": [
+        "text"
+      ],
+      "status": "COMPLETE",
+      "version": "0.9.0",
+      "docker_image": "ghcr.io/tenstorrent/tt-inference-server/vllm-tt-metal-src-release-ubuntu-22.04-amd64:0.9.0-65718bb-409b1cd",
+      "service_route": "/v1/completions",
+      "shm_size": "32G",
+      "setup_type": "TT_INFERENCE_SERVER",
+      "env_vars": {
+        "VLLM_CONFIGURE_LOGGING": "1",
+        "VLLM_RPC_TIMEOUT": "900000",
+        "VLLM_TARGET_DEVICE": "tt",
+        "TORCHDYNAMO_DISABLE": "1"
+      },
+      "param_count": 70
+    },
+    {
+      "model_name": "Llama-3.1-70B-Instruct",
+      "model_type": "CHAT",
+      "display_model_type": "LLM",
+      "device_configurations": [
+        "GALAXY",
+        "GALAXY_T3K",
+        "P150X4",
+        "P150X8",
+        "T3K"
+      ],
+      "hf_model_id": "meta-llama/Llama-3.1-70B-Instruct",
+      "inference_engine": "vLLM",
+      "supported_modalities": [
+        "text"
+      ],
+      "status": "COMPLETE",
+      "version": "0.9.0",
+      "docker_image": "ghcr.io/tenstorrent/tt-inference-server/vllm-tt-metal-src-release-ubuntu-22.04-amd64:0.9.0-65718bb-409b1cd",
+      "service_route": "/v1/chat/completions",
+      "shm_size": "32G",
+      "setup_type": "TT_INFERENCE_SERVER",
+      "env_vars": {
+        "VLLM_CONFIGURE_LOGGING": "1",
+        "VLLM_RPC_TIMEOUT": "900000",
+        "VLLM_TARGET_DEVICE": "tt",
+        "TORCHDYNAMO_DISABLE": "1"
+      },
+      "param_count": 70
+    },
+    {
+      "model_name": "Llama-3.1-8B",
+      "model_type": "CHAT",
+      "display_model_type": "LLM",
+      "device_configurations": [
+        "GALAXY",
+        "GALAXY_T3K",
+        "N150",
+        "N300",
+        "P100",
+        "P150",
+        "P150X4",
+        "P150X8",
+        "T3K"
+      ],
+      "hf_model_id": "meta-llama/Llama-3.1-8B",
+      "inference_engine": "vLLM",
+      "supported_modalities": [
+        "text"
+      ],
+      "status": "COMPLETE",
+      "version": "0.9.0",
+      "docker_image": "ghcr.io/tenstorrent/tt-inference-server/vllm-tt-metal-src-release-ubuntu-22.04-amd64:0.9.0-25305db-6e67d2d",
+      "service_route": "/v1/completions",
+      "shm_size": "32G",
+      "setup_type": "TT_INFERENCE_SERVER",
+      "env_vars": {
+        "VLLM_CONFIGURE_LOGGING": "1",
+        "VLLM_RPC_TIMEOUT": "900000",
+        "VLLM_TARGET_DEVICE": "tt",
+        "TORCHDYNAMO_DISABLE": "1"
+      },
+      "param_count": 8
+    },
+    {
+      "model_name": "Llama-3.1-8B-Instruct",
+      "model_type": "CHAT",
+      "display_model_type": "LLM",
+      "device_configurations": [
+        "GALAXY",
+        "GALAXY_T3K",
+        "N150",
+        "N300",
+        "P100",
+        "P150",
+        "P150X4",
+        "P150X8",
+        "T3K"
+      ],
+      "hf_model_id": "meta-llama/Llama-3.1-8B-Instruct",
+      "inference_engine": "vLLM",
+      "supported_modalities": [
+        "text"
+      ],
+      "status": "COMPLETE",
+      "version": "0.9.0",
+      "docker_image": "ghcr.io/tenstorrent/tt-inference-server/vllm-tt-metal-src-release-ubuntu-22.04-amd64:0.9.0-25305db-6e67d2d",
+      "service_route": "/v1/chat/completions",
+      "shm_size": "32G",
+      "setup_type": "TT_INFERENCE_SERVER",
+      "env_vars": {
+        "VLLM_CONFIGURE_LOGGING": "1",
+        "VLLM_RPC_TIMEOUT": "900000",
+        "VLLM_TARGET_DEVICE": "tt",
+        "TORCHDYNAMO_DISABLE": "1"
+      },
+      "param_count": 8
+    },
+    {
+      "model_name": "Llama-3.3-70B-Instruct",
+      "model_type": "CHAT",
+      "display_model_type": "LLM",
+      "device_configurations": [
+        "GALAXY",
+        "GALAXY_T3K",
+        "P150X4",
+        "P150X8",
+        "T3K"
+      ],
+      "hf_model_id": "meta-llama/Llama-3.3-70B-Instruct",
+      "inference_engine": "vLLM",
+      "supported_modalities": [
+        "text"
+      ],
+      "status": "COMPLETE",
+      "version": "0.9.0",
+      "docker_image": "ghcr.io/tenstorrent/tt-inference-server/vllm-tt-metal-src-release-ubuntu-22.04-amd64:0.9.0-65718bb-409b1cd",
+      "service_route": "/v1/chat/completions",
+      "shm_size": "32G",
+      "setup_type": "TT_INFERENCE_SERVER",
+      "env_vars": {
+        "VLLM_CONFIGURE_LOGGING": "1",
+        "VLLM_RPC_TIMEOUT": "900000",
+        "VLLM_TARGET_DEVICE": "tt",
+        "TORCHDYNAMO_DISABLE": "1"
+      },
+      "param_count": 70
+    },
+    {
+      "model_name": "Mistral-7B-Instruct-v0.3",
+      "model_type": "CHAT",
+      "display_model_type": "LLM",
+      "device_configurations": [
+        "N150",
+        "N300",
+        "T3K"
+      ],
+      "hf_model_id": "mistralai/Mistral-7B-Instruct-v0.3",
+      "inference_engine": "vLLM",
+      "supported_modalities": [
+        "text"
+      ],
+      "status": "COMPLETE",
+      "version": "0.9.0",
+      "docker_image": "ghcr.io/tenstorrent/tt-inference-server/vllm-tt-metal-src-release-ubuntu-22.04-amd64:0.9.0-9b67e09-a91b644",
+      "service_route": "/v1/chat/completions",
+      "shm_size": "32G",
+      "setup_type": "TT_INFERENCE_SERVER",
+      "env_vars": {
+        "VLLM_CONFIGURE_LOGGING": "1",
+        "VLLM_RPC_TIMEOUT": "900000",
+        "VLLM_TARGET_DEVICE": "tt",
+        "TORCHDYNAMO_DISABLE": "1",
+        "VLLM_ALLOW_LONG_MAX_MODEL_LEN": 1
+      },
+      "param_count": 7
+    },
+    {
+      "model_name": "mochi-1-preview",
+      "model_type": "VIDEO",
+      "display_model_type": "VIDEO",
+      "device_configurations": [
+        "GALAXY",
+        "T3K"
+      ],
+      "hf_model_id": "genmo/mochi-1-preview",
+      "inference_engine": "media",
+      "supported_modalities": [
+        "text"
+      ],
+      "status": "COMPLETE",
+      "version": "0.9.0",
+      "docker_image": "ghcr.io/tenstorrent/tt-media-inference-server:0.9.0-65718bb",
+      "service_route": "/enqueue",
+      "shm_size": "32G",
+      "setup_type": "TT_INFERENCE_SERVER",
+      "env_vars": {
+        "VLLM_CONFIGURE_LOGGING": "1",
+        "VLLM_RPC_TIMEOUT": "900000",
+        "VLLM_TARGET_DEVICE": "tt",
+        "TORCHDYNAMO_DISABLE": "1"
+      },
+      "param_count": null
+    },
+    {
+      "model_name": "Motif-Image-6B-Preview",
+      "model_type": "IMAGE_GENERATION",
+      "display_model_type": "IMAGE",
+      "device_configurations": [
+        "GALAXY",
+        "T3K"
+      ],
+      "hf_model_id": "Motif-Technologies/Motif-Image-6B-Preview",
+      "inference_engine": "media",
+      "supported_modalities": [
+        "text"
+      ],
+      "status": "COMPLETE",
+      "version": "0.9.0",
+      "docker_image": "ghcr.io/tenstorrent/tt-media-inference-server:0.9.0-c180ef7",
+      "service_route": "/enqueue",
+      "shm_size": "32G",
+      "setup_type": "TT_INFERENCE_SERVER",
+      "env_vars": {
+        "VLLM_CONFIGURE_LOGGING": "1",
+        "VLLM_RPC_TIMEOUT": "900000",
+        "VLLM_TARGET_DEVICE": "tt",
+        "TORCHDYNAMO_DISABLE": "1"
+      },
+      "param_count": 6
+    },
+    {
+      "model_name": "Qwen3-32B",
+      "model_type": "CHAT",
+      "display_model_type": "LLM",
+      "device_configurations": [
+        "GALAXY",
+        "GALAXY_T3K",
+        "P150X8",
+        "T3K"
+      ],
+      "hf_model_id": "Qwen/Qwen3-32B",
+      "inference_engine": "vLLM",
+      "supported_modalities": [
+        "text"
+      ],
+      "status": "COMPLETE",
+      "version": "0.9.0",
+      "docker_image": "ghcr.io/tenstorrent/tt-inference-server/vllm-tt-metal-src-release-ubuntu-22.04-amd64:0.9.0-65718bb-409b1cd",
+      "service_route": "/v1/chat/completions",
+      "shm_size": "32G",
+      "setup_type": "TT_INFERENCE_SERVER",
+      "env_vars": {
+        "VLLM_CONFIGURE_LOGGING": "1",
+        "VLLM_RPC_TIMEOUT": "900000",
+        "VLLM_TARGET_DEVICE": "tt",
+        "TORCHDYNAMO_DISABLE": "1",
+        "VLLM_ALLOW_LONG_MAX_MODEL_LEN": 1
+      },
+      "param_count": 32
+    },
+    {
+      "model_name": "speecht5_tts",
+      "model_type": "TTS",
+      "display_model_type": "TEXT_TO_SPEECH",
+      "device_configurations": [
+        "N150",
+        "N300"
+      ],
+      "hf_model_id": "microsoft/speecht5_tts",
+      "inference_engine": "media",
+      "supported_modalities": [
+        "text"
+      ],
+      "status": "COMPLETE",
+      "version": "0.9.0",
+      "docker_image": "ghcr.io/tenstorrent/tt-media-inference-server:0.9.0-a9b09e0",
+      "service_route": "/enqueue",
+      "shm_size": "32G",
+      "setup_type": "TT_INFERENCE_SERVER",
+      "env_vars": {
+        "VLLM_CONFIGURE_LOGGING": "1",
+        "VLLM_RPC_TIMEOUT": "900000",
+        "VLLM_TARGET_DEVICE": "tt",
+        "TORCHDYNAMO_DISABLE": "1"
+      },
+      "param_count": null
+    },
+    {
+      "model_name": "stable-diffusion-3.5-large",
+      "model_type": "IMAGE_GENERATION",
+      "display_model_type": "IMAGE",
+      "device_configurations": [
+        "GALAXY",
+        "T3K"
+      ],
+      "hf_model_id": "stabilityai/stable-diffusion-3.5-large",
+      "inference_engine": "media",
+      "supported_modalities": [
+        "text"
+      ],
+      "status": "COMPLETE",
+      "version": "0.9.0",
+      "docker_image": "ghcr.io/tenstorrent/tt-media-inference-server:0.9.0-c180ef7",
+      "service_route": "/enqueue",
+      "shm_size": "32G",
+      "setup_type": "TT_INFERENCE_SERVER",
+      "env_vars": {
+        "VLLM_CONFIGURE_LOGGING": "1",
+        "VLLM_RPC_TIMEOUT": "900000",
+        "VLLM_TARGET_DEVICE": "tt",
+        "TORCHDYNAMO_DISABLE": "1"
+      },
+      "param_count": null
+    },
+    {
+      "model_name": "stable-diffusion-xl-1.0-inpainting-0.1",
+      "model_type": "IMAGE_GENERATION",
+      "display_model_type": "IMAGE",
+      "device_configurations": [
+        "GALAXY",
+        "N150",
+        "N300",
+        "T3K"
+      ],
+      "hf_model_id": "diffusers/stable-diffusion-xl-1.0-inpainting-0.1",
+      "inference_engine": "media",
+      "supported_modalities": [
+        "text"
+      ],
+      "status": "COMPLETE",
+      "version": "0.9.0",
+      "docker_image": "ghcr.io/tenstorrent/tt-media-inference-server:0.5.0-fbbbd2da8cfab49ddf43d28dd9c0813a3c3ee2bd",
+      "service_route": "/enqueue",
+      "shm_size": "32G",
+      "setup_type": "TT_INFERENCE_SERVER",
+      "env_vars": {
+        "VLLM_CONFIGURE_LOGGING": "1",
+        "VLLM_RPC_TIMEOUT": "900000",
+        "VLLM_TARGET_DEVICE": "tt",
+        "TORCHDYNAMO_DISABLE": "1"
+      },
+      "param_count": null
+    },
+    {
+      "model_name": "stable-diffusion-xl-base-1.0",
+      "model_type": "IMAGE_GENERATION",
+      "display_model_type": "IMAGE",
+      "device_configurations": [
+        "GALAXY",
+        "N150",
+        "N300",
+        "T3K"
+      ],
+      "hf_model_id": "stabilityai/stable-diffusion-xl-base-1.0",
+      "inference_engine": "media",
+      "supported_modalities": [
+        "text"
+      ],
+      "status": "COMPLETE",
+      "version": "0.9.0",
+      "docker_image": "ghcr.io/tenstorrent/tt-media-inference-server:0.9.0-65718bb",
+      "service_route": "/enqueue",
+      "shm_size": "32G",
+      "setup_type": "TT_INFERENCE_SERVER",
+      "env_vars": {
+        "VLLM_CONFIGURE_LOGGING": "1",
+        "VLLM_RPC_TIMEOUT": "900000",
+        "VLLM_TARGET_DEVICE": "tt",
+        "TORCHDYNAMO_DISABLE": "1"
+      },
+      "param_count": null
+    },
+    {
+      "model_name": "stable-diffusion-xl-base-1.0-img-2-img",
+      "model_type": "IMAGE_GENERATION",
+      "display_model_type": "IMAGE",
+      "device_configurations": [
+        "GALAXY",
+        "N150",
+        "N300",
+        "T3K"
+      ],
+      "hf_model_id": "stabilityai/stable-diffusion-xl-base-1.0-img-2-img",
+      "inference_engine": "media",
+      "supported_modalities": [
+        "text"
+      ],
+      "status": "COMPLETE",
+      "version": "0.9.0",
+      "docker_image": "ghcr.io/tenstorrent/tt-media-inference-server:0.9.0-65718bb",
+      "service_route": "/enqueue",
+      "shm_size": "32G",
+      "setup_type": "TT_INFERENCE_SERVER",
+      "env_vars": {
+        "VLLM_CONFIGURE_LOGGING": "1",
+        "VLLM_RPC_TIMEOUT": "900000",
+        "VLLM_TARGET_DEVICE": "tt",
+        "TORCHDYNAMO_DISABLE": "1"
+      },
+      "param_count": null
+    },
+    {
+      "model_name": "Wan2.2-T2V-A14B-Diffusers",
+      "model_type": "VIDEO",
+      "display_model_type": "VIDEO",
+      "device_configurations": [
+        "GALAXY",
+        "T3K"
+      ],
+      "hf_model_id": "Wan-AI/Wan2.2-T2V-A14B-Diffusers",
+      "inference_engine": "media",
+      "supported_modalities": [
+        "text"
+      ],
+      "status": "COMPLETE",
+      "version": "0.9.0",
+      "docker_image": "ghcr.io/tenstorrent/tt-media-inference-server:0.9.0-65718bb",
+      "service_route": "/enqueue",
+      "shm_size": "32G",
+      "setup_type": "TT_INFERENCE_SERVER",
+      "env_vars": {
+        "VLLM_CONFIGURE_LOGGING": "1",
+        "VLLM_RPC_TIMEOUT": "900000",
+        "VLLM_TARGET_DEVICE": "tt",
+        "TORCHDYNAMO_DISABLE": "1"
+      },
+      "param_count": 14
+    },
+    {
+      "model_name": "whisper-large-v3",
+      "model_type": "SPEECH_RECOGNITION",
+      "display_model_type": "AUDIO",
+      "device_configurations": [
+        "GALAXY",
+        "N150",
+        "T3K"
+      ],
+      "hf_model_id": "openai/whisper-large-v3",
+      "inference_engine": "media",
+      "supported_modalities": [
+        "text"
+      ],
+      "status": "COMPLETE",
+      "version": "0.9.0",
+      "docker_image": "ghcr.io/tenstorrent/tt-media-inference-server:0.9.0-65718bb",
+      "service_route": "/enqueue",
+      "shm_size": "32G",
+      "setup_type": "TT_INFERENCE_SERVER",
+      "env_vars": {
+        "VLLM_CONFIGURE_LOGGING": "1",
+        "VLLM_RPC_TIMEOUT": "900000",
+        "VLLM_TARGET_DEVICE": "tt",
+        "TORCHDYNAMO_DISABLE": "1"
+      },
+      "param_count": null
+    },
+    {
+      "model_name": "Llama-3.2-11B-Vision",
+      "model_type": "VLM",
+      "display_model_type": "VLM",
+      "device_configurations": [
+        "N300",
+        "T3K"
+      ],
+      "hf_model_id": "meta-llama/Llama-3.2-11B-Vision",
+      "inference_engine": "vLLM",
+      "supported_modalities": [
+        "text",
+        "image"
+      ],
+      "status": "FUNCTIONAL",
+      "version": "0.9.0",
+      "docker_image": "ghcr.io/tenstorrent/tt-inference-server/vllm-tt-metal-src-release-ubuntu-22.04-amd64:0.9.0-v0.61.1-rc1-5cbc982",
+      "service_route": "/v1/completions",
+      "shm_size": "32G",
+      "setup_type": "TT_INFERENCE_SERVER",
+      "env_vars": {
+        "VLLM_CONFIGURE_LOGGING": "1",
+        "VLLM_RPC_TIMEOUT": "900000",
+        "VLLM_TARGET_DEVICE": "tt",
+        "TORCHDYNAMO_DISABLE": "1"
+      },
+      "param_count": 11
+    },
+    {
+      "model_name": "Llama-3.2-11B-Vision-Instruct",
+      "model_type": "VLM",
+      "display_model_type": "VLM",
+      "device_configurations": [
+        "N300",
+        "T3K"
+      ],
+      "hf_model_id": "meta-llama/Llama-3.2-11B-Vision-Instruct",
+      "inference_engine": "vLLM",
+      "supported_modalities": [
+        "text",
+        "image"
+      ],
+      "status": "FUNCTIONAL",
+      "version": "0.9.0",
+      "docker_image": "ghcr.io/tenstorrent/tt-inference-server/vllm-tt-metal-src-release-ubuntu-22.04-amd64:0.9.0-v0.61.1-rc1-5cbc982",
+      "service_route": "/v1/chat/completions",
+      "shm_size": "32G",
+      "setup_type": "TT_INFERENCE_SERVER",
+      "env_vars": {
+        "VLLM_CONFIGURE_LOGGING": "1",
+        "VLLM_RPC_TIMEOUT": "900000",
+        "VLLM_TARGET_DEVICE": "tt",
+        "TORCHDYNAMO_DISABLE": "1"
+      },
+      "param_count": 11
+    },
+    {
+      "model_name": "Llama-3.2-1B",
+      "model_type": "CHAT",
+      "display_model_type": "LLM",
+      "device_configurations": [
+        "N150",
+        "N300",
+        "T3K"
+      ],
+      "hf_model_id": "meta-llama/Llama-3.2-1B",
+      "inference_engine": "vLLM",
+      "supported_modalities": [
+        "text"
+      ],
+      "status": "FUNCTIONAL",
+      "version": "0.9.0",
+      "docker_image": "ghcr.io/tenstorrent/tt-inference-server/vllm-tt-metal-src-release-ubuntu-22.04-amd64:0.9.0-9b67e09-a91b644",
+      "service_route": "/v1/completions",
+      "shm_size": "32G",
+      "setup_type": "TT_INFERENCE_SERVER",
+      "env_vars": {
+        "VLLM_CONFIGURE_LOGGING": "1",
+        "VLLM_RPC_TIMEOUT": "900000",
+        "VLLM_TARGET_DEVICE": "tt",
+        "TORCHDYNAMO_DISABLE": "1",
+        "VLLM_ALLOW_LONG_MAX_MODEL_LEN": 1
+      },
+      "param_count": 1
+    },
+    {
+      "model_name": "Llama-3.2-1B-Instruct",
+      "model_type": "CHAT",
+      "display_model_type": "LLM",
+      "device_configurations": [
+        "N150",
+        "N300",
+        "T3K"
+      ],
+      "hf_model_id": "meta-llama/Llama-3.2-1B-Instruct",
+      "inference_engine": "vLLM",
+      "supported_modalities": [
+        "text"
+      ],
+      "status": "FUNCTIONAL",
+      "version": "0.9.0",
+      "docker_image": "ghcr.io/tenstorrent/tt-inference-server/vllm-tt-metal-src-release-ubuntu-22.04-amd64:0.9.0-9b67e09-a91b644",
+      "service_route": "/v1/chat/completions",
+      "shm_size": "32G",
+      "setup_type": "TT_INFERENCE_SERVER",
+      "env_vars": {
+        "VLLM_CONFIGURE_LOGGING": "1",
+        "VLLM_RPC_TIMEOUT": "900000",
+        "VLLM_TARGET_DEVICE": "tt",
+        "TORCHDYNAMO_DISABLE": "1",
+        "VLLM_ALLOW_LONG_MAX_MODEL_LEN": 1
+      },
+      "param_count": 1
+    },
+    {
+      "model_name": "Llama-3.2-3B",
+      "model_type": "CHAT",
+      "display_model_type": "LLM",
+      "device_configurations": [
+        "N150",
+        "N300",
+        "T3K"
+      ],
+      "hf_model_id": "meta-llama/Llama-3.2-3B",
+      "inference_engine": "vLLM",
+      "supported_modalities": [
+        "text"
+      ],
+      "status": "FUNCTIONAL",
+      "version": "0.9.0",
+      "docker_image": "ghcr.io/tenstorrent/tt-inference-server/vllm-tt-metal-src-release-ubuntu-22.04-amd64:0.9.0-20edc39-03cb300",
+      "service_route": "/v1/completions",
+      "shm_size": "32G",
+      "setup_type": "TT_INFERENCE_SERVER",
+      "env_vars": {
+        "VLLM_CONFIGURE_LOGGING": "1",
+        "VLLM_RPC_TIMEOUT": "900000",
+        "VLLM_TARGET_DEVICE": "tt",
+        "TORCHDYNAMO_DISABLE": "1"
+      },
+      "param_count": 3
+    },
+    {
+      "model_name": "Llama-3.2-3B-Instruct",
+      "model_type": "CHAT",
+      "display_model_type": "LLM",
+      "device_configurations": [
+        "N150",
+        "N300",
+        "T3K"
+      ],
+      "hf_model_id": "meta-llama/Llama-3.2-3B-Instruct",
+      "inference_engine": "vLLM",
+      "supported_modalities": [
+        "text"
+      ],
+      "status": "FUNCTIONAL",
+      "version": "0.9.0",
+      "docker_image": "ghcr.io/tenstorrent/tt-inference-server/vllm-tt-metal-src-release-ubuntu-22.04-amd64:0.9.0-20edc39-03cb300",
+      "service_route": "/v1/chat/completions",
+      "shm_size": "32G",
+      "setup_type": "TT_INFERENCE_SERVER",
+      "env_vars": {
+        "VLLM_CONFIGURE_LOGGING": "1",
+        "VLLM_RPC_TIMEOUT": "900000",
+        "VLLM_TARGET_DEVICE": "tt",
+        "TORCHDYNAMO_DISABLE": "1"
+      },
+      "param_count": 3
+    },
+    {
+      "model_name": "Llama-3.2-90B-Vision",
+      "model_type": "VLM",
+      "display_model_type": "VLM",
+      "device_configurations": [
+        "T3K"
+      ],
+      "hf_model_id": "meta-llama/Llama-3.2-90B-Vision",
+      "inference_engine": "vLLM",
+      "supported_modalities": [
+        "text",
+        "image"
+      ],
+      "status": "FUNCTIONAL",
+      "version": "0.9.0",
+      "docker_image": "ghcr.io/tenstorrent/tt-inference-server/vllm-tt-metal-src-release-ubuntu-22.04-amd64:0.9.0-v0.61.1-rc1-5cbc982",
+      "service_route": "/v1/completions",
+      "shm_size": "32G",
+      "setup_type": "TT_INFERENCE_SERVER",
+      "env_vars": {
+        "VLLM_CONFIGURE_LOGGING": "1",
+        "VLLM_RPC_TIMEOUT": "900000",
+        "VLLM_TARGET_DEVICE": "tt",
+        "TORCHDYNAMO_DISABLE": "1",
+        "MAX_PREFILL_CHUNK_SIZE": 16
+      },
+      "param_count": 90
+    },
+    {
+      "model_name": "Llama-3.2-90B-Vision-Instruct",
+      "model_type": "VLM",
+      "display_model_type": "VLM",
+      "device_configurations": [
+        "T3K"
+      ],
+      "hf_model_id": "meta-llama/Llama-3.2-90B-Vision-Instruct",
+      "inference_engine": "vLLM",
+      "supported_modalities": [
+        "text",
+        "image"
+      ],
+      "status": "FUNCTIONAL",
+      "version": "0.9.0",
+      "docker_image": "ghcr.io/tenstorrent/tt-inference-server/vllm-tt-metal-src-release-ubuntu-22.04-amd64:0.9.0-v0.61.1-rc1-5cbc982",
+      "service_route": "/v1/chat/completions",
+      "shm_size": "32G",
+      "setup_type": "TT_INFERENCE_SERVER",
+      "env_vars": {
+        "VLLM_CONFIGURE_LOGGING": "1",
+        "VLLM_RPC_TIMEOUT": "900000",
+        "VLLM_TARGET_DEVICE": "tt",
+        "TORCHDYNAMO_DISABLE": "1",
+        "MAX_PREFILL_CHUNK_SIZE": 16
+      },
+      "param_count": 90
+    },
+    {
+      "model_name": "Qwen-Image",
+      "model_type": "IMAGE_GENERATION",
+      "display_model_type": "IMAGE",
+      "device_configurations": [
+        "GALAXY",
+        "T3K"
+      ],
+      "hf_model_id": "Qwen/Qwen-Image",
+      "inference_engine": "media",
+      "supported_modalities": [
+        "text"
+      ],
+      "status": "FUNCTIONAL",
+      "version": "0.9.0",
+      "docker_image": "ghcr.io/tenstorrent/tt-media-inference-server:0.9.0-be88351",
+      "service_route": "/enqueue",
+      "shm_size": "32G",
+      "setup_type": "TT_INFERENCE_SERVER",
+      "env_vars": {
+        "VLLM_CONFIGURE_LOGGING": "1",
+        "VLLM_RPC_TIMEOUT": "900000",
+        "VLLM_TARGET_DEVICE": "tt",
+        "TORCHDYNAMO_DISABLE": "1",
+        "TT_DIT_CACHE_DIR": "/tmp/TT_DIT_CACHE"
+      },
+      "param_count": null
+    },
+    {
+      "model_name": "Qwen-Image-2512",
+      "model_type": "IMAGE_GENERATION",
+      "display_model_type": "IMAGE",
+      "device_configurations": [
+        "GALAXY",
+        "T3K"
+      ],
+      "hf_model_id": "Qwen/Qwen-Image-2512",
+      "inference_engine": "media",
+      "supported_modalities": [
+        "text"
+      ],
+      "status": "FUNCTIONAL",
+      "version": "0.9.0",
+      "docker_image": "ghcr.io/tenstorrent/tt-media-inference-server:0.9.0-be88351",
+      "service_route": "/enqueue",
+      "shm_size": "32G",
+      "setup_type": "TT_INFERENCE_SERVER",
+      "env_vars": {
+        "VLLM_CONFIGURE_LOGGING": "1",
+        "VLLM_RPC_TIMEOUT": "900000",
+        "VLLM_TARGET_DEVICE": "tt",
+        "TORCHDYNAMO_DISABLE": "1",
+        "TT_DIT_CACHE_DIR": "/tmp/TT_DIT_CACHE"
+      },
+      "param_count": null
+    },
+    {
+      "model_name": "Qwen2.5-72B",
+      "model_type": "CHAT",
+      "display_model_type": "LLM",
+      "device_configurations": [
+        "GALAXY",
+        "GALAXY_T3K",
+        "T3K"
+      ],
+      "hf_model_id": "Qwen/Qwen2.5-72B",
+      "inference_engine": "vLLM",
+      "supported_modalities": [
+        "text"
+      ],
+      "status": "FUNCTIONAL",
+      "version": "0.9.0",
+      "docker_image": "ghcr.io/tenstorrent/tt-inference-server/vllm-tt-metal-src-release-ubuntu-22.04-amd64:0.9.0-13f44c5-0edd242",
+      "service_route": "/v1/completions",
+      "shm_size": "32G",
+      "setup_type": "TT_INFERENCE_SERVER",
+      "env_vars": {
+        "VLLM_CONFIGURE_LOGGING": "1",
+        "VLLM_RPC_TIMEOUT": "900000",
+        "VLLM_TARGET_DEVICE": "tt",
+        "TORCHDYNAMO_DISABLE": "1",
+        "VLLM_ALLOW_LONG_MAX_MODEL_LEN": 1,
+        "MAX_PREFILL_CHUNK_SIZE": "16"
+      },
+      "param_count": 72
+    },
+    {
+      "model_name": "Qwen2.5-72B-Instruct",
+      "model_type": "CHAT",
+      "display_model_type": "LLM",
+      "device_configurations": [
+        "GALAXY",
+        "GALAXY_T3K",
+        "T3K"
+      ],
+      "hf_model_id": "Qwen/Qwen2.5-72B-Instruct",
+      "inference_engine": "vLLM",
+      "supported_modalities": [
+        "text"
+      ],
+      "status": "FUNCTIONAL",
+      "version": "0.9.0",
+      "docker_image": "ghcr.io/tenstorrent/tt-inference-server/vllm-tt-metal-src-release-ubuntu-22.04-amd64:0.9.0-13f44c5-0edd242",
+      "service_route": "/v1/chat/completions",
+      "shm_size": "32G",
+      "setup_type": "TT_INFERENCE_SERVER",
+      "env_vars": {
+        "VLLM_CONFIGURE_LOGGING": "1",
+        "VLLM_RPC_TIMEOUT": "900000",
+        "VLLM_TARGET_DEVICE": "tt",
+        "TORCHDYNAMO_DISABLE": "1",
+        "VLLM_ALLOW_LONG_MAX_MODEL_LEN": 1,
+        "MAX_PREFILL_CHUNK_SIZE": "16"
+      },
+      "param_count": 72
+    },
+    {
+      "model_name": "Qwen2.5-VL-72B-Instruct",
+      "model_type": "VLM",
+      "display_model_type": "VLM",
+      "device_configurations": [
+        "T3K"
+      ],
+      "hf_model_id": "Qwen/Qwen2.5-VL-72B-Instruct",
+      "inference_engine": "vLLM",
+      "supported_modalities": [
+        "text",
+        "image"
+      ],
+      "status": "FUNCTIONAL",
+      "version": "0.9.0",
+      "docker_image": "ghcr.io/tenstorrent/tt-inference-server/vllm-tt-metal-src-release-ubuntu-22.04-amd64:0.9.0-c18569e-b2894d3",
+      "service_route": "/v1/chat/completions",
+      "shm_size": "32G",
+      "setup_type": "TT_INFERENCE_SERVER",
+      "env_vars": {
+        "VLLM_CONFIGURE_LOGGING": "1",
+        "VLLM_RPC_TIMEOUT": "900000",
+        "VLLM_TARGET_DEVICE": "tt",
+        "TORCHDYNAMO_DISABLE": "1",
+        "VLLM_ALLOW_LONG_MAX_MODEL_LEN": 1
+      },
+      "param_count": 72
+    },
+    {
+      "model_name": "Qwen3-8B",
+      "model_type": "CHAT",
+      "display_model_type": "LLM",
+      "device_configurations": [
+        "GALAXY",
+        "GALAXY_T3K",
+        "N150",
+        "N300",
+        "T3K"
+      ],
+      "hf_model_id": "Qwen/Qwen3-8B",
+      "inference_engine": "vLLM",
+      "supported_modalities": [
+        "text"
+      ],
+      "status": "FUNCTIONAL",
+      "version": "0.9.0",
+      "docker_image": "ghcr.io/tenstorrent/tt-inference-server/vllm-tt-metal-src-release-ubuntu-22.04-amd64:0.9.0-e95ffa5-48eba14",
+      "service_route": "/v1/chat/completions",
+      "shm_size": "32G",
+      "setup_type": "TT_INFERENCE_SERVER",
+      "env_vars": {
+        "VLLM_CONFIGURE_LOGGING": "1",
+        "VLLM_RPC_TIMEOUT": "900000",
+        "VLLM_TARGET_DEVICE": "tt",
+        "TORCHDYNAMO_DISABLE": "1"
+      },
+      "param_count": 8
+    },
+    {
+      "model_name": "QwQ-32B",
+      "model_type": "CHAT",
+      "display_model_type": "LLM",
+      "device_configurations": [
+        "GALAXY",
+        "GALAXY_T3K",
+        "T3K"
+      ],
+      "hf_model_id": "Qwen/QwQ-32B",
+      "inference_engine": "vLLM",
+      "supported_modalities": [
+        "text"
+      ],
+      "status": "FUNCTIONAL",
+      "version": "0.9.0",
+      "docker_image": "ghcr.io/tenstorrent/tt-inference-server/vllm-tt-metal-src-release-ubuntu-22.04-amd64:0.9.0-e95ffa5-48eba14",
+      "service_route": "/v1/chat/completions",
+      "shm_size": "32G",
+      "setup_type": "TT_INFERENCE_SERVER",
+      "env_vars": {
+        "VLLM_CONFIGURE_LOGGING": "1",
+        "VLLM_RPC_TIMEOUT": "900000",
+        "VLLM_TARGET_DEVICE": "tt",
+        "TORCHDYNAMO_DISABLE": "1",
+        "VLLM_ALLOW_LONG_MAX_MODEL_LEN": 1
+      },
+      "param_count": 32
+    },
+    {
+      "model_name": "AFM-4.5B",
+      "model_type": "CHAT",
+      "display_model_type": "LLM",
+      "device_configurations": [
+        "N300",
+        "T3K"
+      ],
+      "hf_model_id": "arcee-ai/AFM-4.5B",
+      "inference_engine": "vLLM",
+      "supported_modalities": [
+        "text"
+      ],
+      "status": "EXPERIMENTAL",
+      "version": "0.9.0",
+      "docker_image": "ghcr.io/tenstorrent/tt-inference-server/vllm-tt-metal-src-release-ubuntu-22.04-amd64:0.9.0-ae65ee5-35f023f",
+      "service_route": "/v1/completions",
+      "shm_size": "32G",
+      "setup_type": "TT_INFERENCE_SERVER",
+      "env_vars": {
+        "VLLM_CONFIGURE_LOGGING": "1",
+        "VLLM_RPC_TIMEOUT": "900000",
+        "VLLM_TARGET_DEVICE": "tt",
+        "TORCHDYNAMO_DISABLE": "1"
+      },
+      "param_count": 4
+    },
+    {
+      "model_name": "bge-large-en-v1.5",
+      "model_type": "EMBEDDING",
+      "display_model_type": "EMBEDDING",
+      "device_configurations": [
+        "GALAXY",
+        "N150",
+        "N300",
+        "T3K"
+      ],
+      "hf_model_id": "BAAI/bge-large-en-v1.5",
+      "inference_engine": "media",
+      "supported_modalities": [
+        "text"
+      ],
+      "status": "EXPERIMENTAL",
+      "version": "0.9.0",
+      "docker_image": "ghcr.io/tenstorrent/tt-media-inference-server:0.2.0-2496be4518bca0a7a5b497a4cda3cfe7e2f59756",
+      "service_route": "/enqueue",
+      "shm_size": "32G",
+      "setup_type": "TT_INFERENCE_SERVER",
+      "env_vars": {
+        "VLLM_CONFIGURE_LOGGING": "1",
+        "VLLM_RPC_TIMEOUT": "900000",
+        "VLLM_TARGET_DEVICE": "tt",
+        "TORCHDYNAMO_DISABLE": "1",
+        "VLLM__MAX_NUM_BATCHED_TOKENS": "3072",
+        "VLLM__MAX_MODEL_LENGTH": "384",
+        "VLLM__MIN_CONTEXT_LENGTH": "32",
+        "VLLM__MAX_NUM_SEQS": "8",
+        "MAX_BATCH_SIZE": "8",
+        "DEFAULT_THROTTLE_LEVEL": "0"
+      },
+      "param_count": null
+    },
+    {
+      "model_name": "DeepSeek-R1-0528",
+      "model_type": "CHAT",
+      "display_model_type": "LLM",
+      "device_configurations": [
+        "GALAXY"
+      ],
+      "hf_model_id": "deepseek-ai/DeepSeek-R1-0528",
+      "inference_engine": "vLLM",
+      "supported_modalities": [
+        "text"
+      ],
+      "status": "EXPERIMENTAL",
+      "version": "0.9.0",
+      "docker_image": "ghcr.io/tenstorrent/tt-inference-server/vllm-tt-metal-src-release-ubuntu-22.04-amd64:0.9.0-e3d97e5-a186bf4",
+      "service_route": "/v1/chat/completions",
+      "shm_size": "32G",
+      "setup_type": "TT_INFERENCE_SERVER",
+      "env_vars": {
+        "VLLM_CONFIGURE_LOGGING": "1",
+        "VLLM_RPC_TIMEOUT": "900000",
+        "VLLM_TARGET_DEVICE": "tt",
+        "TORCHDYNAMO_DISABLE": "1",
+        "VLLM_ALLOW_LONG_MAX_MODEL_LEN": "1",
+        "VLLM_USE_V1": "1"
+      },
+      "param_count": null
+    },
+    {
+      "model_name": "efficientnet",
+      "model_type": "CNN",
+      "display_model_type": "CNN",
+      "device_configurations": [
+        "N150",
+        "N300"
+      ],
+      "hf_model_id": "efficientnet",
+      "inference_engine": "forge",
+      "supported_modalities": [
+        "text"
+      ],
+      "status": "EXPERIMENTAL",
+      "version": "0.9.0",
+      "docker_image": "ghcr.io/tenstorrent/tt-shield/tt-media-inference-server-forge:a9b09e0b611da6deb4d8972e8296148fd864e5fd_98dcf62_60920940673",
+      "service_route": "/v1/chat/completions",
+      "shm_size": "32G",
+      "setup_type": "TT_INFERENCE_SERVER",
+      "env_vars": {
+        "VLLM_CONFIGURE_LOGGING": "1",
+        "VLLM_RPC_TIMEOUT": "900000",
+        "VLLM_TARGET_DEVICE": "tt",
+        "TORCHDYNAMO_DISABLE": "1"
+      },
+      "param_count": null
+    },
+    {
+      "model_name": "gemma-3-1b-it",
+      "model_type": "CHAT",
+      "display_model_type": "LLM",
+      "device_configurations": [
+        "N150"
+      ],
+      "hf_model_id": "google/gemma-3-1b-it",
+      "inference_engine": "vLLM",
+      "supported_modalities": [
+        "text"
+      ],
+      "status": "EXPERIMENTAL",
+      "version": "0.9.0",
+      "docker_image": "ghcr.io/tenstorrent/tt-inference-server/vllm-tt-metal-src-release-ubuntu-22.04-amd64:0.9.0-c254ee3-c4f2327",
+      "service_route": "/v1/chat/completions",
+      "shm_size": "32G",
+      "setup_type": "TT_INFERENCE_SERVER",
+      "env_vars": {
+        "VLLM_CONFIGURE_LOGGING": "1",
+        "VLLM_RPC_TIMEOUT": "900000",
+        "VLLM_TARGET_DEVICE": "tt",
+        "TORCHDYNAMO_DISABLE": "1",
+        "VLLM_USE_V1": "1"
+      },
+      "param_count": 1
+    },
+    {
+      "model_name": "gemma-3-27b-it",
+      "model_type": "VLM",
+      "display_model_type": "VLM",
+      "device_configurations": [
+        "GALAXY",
+        "GALAXY_T3K",
+        "T3K"
+      ],
+      "hf_model_id": "google/gemma-3-27b-it",
+      "inference_engine": "vLLM",
+      "supported_modalities": [
+        "text",
+        "image"
+      ],
+      "status": "EXPERIMENTAL",
+      "version": "0.9.0",
+      "docker_image": "ghcr.io/tenstorrent/tt-inference-server/vllm-tt-metal-src-release-ubuntu-22.04-amd64:0.9.0-0b10c51-3499ffa",
+      "service_route": "/v1/chat/completions",
+      "shm_size": "32G",
+      "setup_type": "TT_INFERENCE_SERVER",
+      "env_vars": {
+        "VLLM_CONFIGURE_LOGGING": "1",
+        "VLLM_RPC_TIMEOUT": "900000",
+        "VLLM_TARGET_DEVICE": "tt",
+        "TORCHDYNAMO_DISABLE": "1",
+        "VLLM_USE_V1": "1"
+      },
+      "param_count": 27
+    },
+    {
+      "model_name": "gemma-3-4b-it",
+      "model_type": "VLM",
+      "display_model_type": "VLM",
+      "device_configurations": [
+        "N150",
+        "N300"
+      ],
+      "hf_model_id": "google/gemma-3-4b-it",
+      "inference_engine": "vLLM",
+      "supported_modalities": [
+        "text",
+        "image"
+      ],
+      "status": "EXPERIMENTAL",
+      "version": "0.9.0",
+      "docker_image": "ghcr.io/tenstorrent/tt-inference-server/vllm-tt-metal-src-release-ubuntu-22.04-amd64:0.9.0-c254ee3-c4f2327",
+      "service_route": "/v1/chat/completions",
+      "shm_size": "32G",
+      "setup_type": "TT_INFERENCE_SERVER",
+      "env_vars": {
+        "VLLM_CONFIGURE_LOGGING": "1",
+        "VLLM_RPC_TIMEOUT": "900000",
+        "VLLM_TARGET_DEVICE": "tt",
+        "TORCHDYNAMO_DISABLE": "1",
+        "VLLM_USE_V1": "1"
+      },
+      "param_count": 4
+    },
+    {
+      "model_name": "gpt-oss-120b",
+      "model_type": "CHAT",
+      "display_model_type": "LLM",
+      "device_configurations": [
+        "GALAXY",
+        "T3K"
+      ],
+      "hf_model_id": "openai/gpt-oss-120b",
+      "inference_engine": "vLLM",
+      "supported_modalities": [
+        "text"
+      ],
+      "status": "EXPERIMENTAL",
+      "version": "0.9.0",
+      "docker_image": "ghcr.io/tenstorrent/tt-inference-server/vllm-tt-metal-src-release-ubuntu-22.04-amd64:0.9.0-65718bb-409b1cd",
+      "service_route": "/v1/chat/completions",
+      "shm_size": "32G",
+      "setup_type": "TT_INFERENCE_SERVER",
+      "env_vars": {
+        "VLLM_CONFIGURE_LOGGING": "1",
+        "VLLM_RPC_TIMEOUT": "900000",
+        "VLLM_TARGET_DEVICE": "tt",
+        "TORCHDYNAMO_DISABLE": "1",
+        "VLLM_ALLOW_LONG_MAX_MODEL_LEN": "1",
+        "VLLM_USE_V1": "1"
+      },
+      "param_count": 120
+    },
+    {
+      "model_name": "gpt-oss-20b",
+      "model_type": "CHAT",
+      "display_model_type": "LLM",
+      "device_configurations": [
+        "GALAXY",
+        "GALAXY_T3K",
+        "T3K"
+      ],
+      "hf_model_id": "openai/gpt-oss-20b",
+      "inference_engine": "vLLM",
+      "supported_modalities": [
+        "text"
+      ],
+      "status": "EXPERIMENTAL",
+      "version": "0.9.0",
+      "docker_image": "ghcr.io/tenstorrent/tt-inference-server/vllm-tt-metal-src-release-ubuntu-22.04-amd64:0.9.0-60ffb199-3499ffa1",
+      "service_route": "/v1/chat/completions",
+      "shm_size": "32G",
+      "setup_type": "TT_INFERENCE_SERVER",
+      "env_vars": {
+        "VLLM_CONFIGURE_LOGGING": "1",
+        "VLLM_RPC_TIMEOUT": "900000",
+        "VLLM_TARGET_DEVICE": "tt",
+        "TORCHDYNAMO_DISABLE": "1",
+        "VLLM_ALLOW_LONG_MAX_MODEL_LEN": "1",
+        "VLLM_USE_V1": "1"
+      },
+      "param_count": 20
+    },
+    {
+      "model_name": "medgemma-27b-it",
+      "model_type": "VLM",
+      "display_model_type": "VLM",
+      "device_configurations": [
+        "GALAXY",
+        "GALAXY_T3K",
+        "T3K"
+      ],
+      "hf_model_id": "google/medgemma-27b-it",
+      "inference_engine": "vLLM",
+      "supported_modalities": [
+        "text",
+        "image"
+      ],
+      "status": "EXPERIMENTAL",
+      "version": "0.9.0",
+      "docker_image": "ghcr.io/tenstorrent/tt-inference-server/vllm-tt-metal-src-release-ubuntu-22.04-amd64:0.9.0-0b10c51-3499ffa",
+      "service_route": "/v1/chat/completions",
+      "shm_size": "32G",
+      "setup_type": "TT_INFERENCE_SERVER",
+      "env_vars": {
+        "VLLM_CONFIGURE_LOGGING": "1",
+        "VLLM_RPC_TIMEOUT": "900000",
+        "VLLM_TARGET_DEVICE": "tt",
+        "TORCHDYNAMO_DISABLE": "1",
+        "VLLM_USE_V1": "1"
+      },
+      "param_count": 27
+    },
+    {
+      "model_name": "medgemma-4b-it",
+      "model_type": "VLM",
+      "display_model_type": "VLM",
+      "device_configurations": [
+        "N150",
+        "N300"
+      ],
+      "hf_model_id": "google/medgemma-4b-it",
+      "inference_engine": "vLLM",
+      "supported_modalities": [
+        "text",
+        "image"
+      ],
+      "status": "EXPERIMENTAL",
+      "version": "0.9.0",
+      "docker_image": "ghcr.io/tenstorrent/tt-inference-server/vllm-tt-metal-src-release-ubuntu-22.04-amd64:0.9.0-c254ee3-c4f2327",
+      "service_route": "/v1/chat/completions",
+      "shm_size": "32G",
+      "setup_type": "TT_INFERENCE_SERVER",
+      "env_vars": {
+        "VLLM_CONFIGURE_LOGGING": "1",
+        "VLLM_RPC_TIMEOUT": "900000",
+        "VLLM_TARGET_DEVICE": "tt",
+        "TORCHDYNAMO_DISABLE": "1",
+        "VLLM_USE_V1": "1"
+      },
+      "param_count": 4
+    },
+    {
+      "model_name": "mobilenetv2",
+      "model_type": "CNN",
+      "display_model_type": "CNN",
+      "device_configurations": [
+        "N150",
+        "N300"
+      ],
+      "hf_model_id": "mobilenetv2",
+      "inference_engine": "forge",
+      "supported_modalities": [
+        "text"
+      ],
+      "status": "EXPERIMENTAL",
+      "version": "0.9.0",
+      "docker_image": "ghcr.io/tenstorrent/tt-shield/tt-media-inference-server-forge:a9b09e0b611da6deb4d8972e8296148fd864e5fd_98dcf62_60920940673",
+      "service_route": "/v1/chat/completions",
+      "shm_size": "32G",
+      "setup_type": "TT_INFERENCE_SERVER",
+      "env_vars": {
+        "VLLM_CONFIGURE_LOGGING": "1",
+        "VLLM_RPC_TIMEOUT": "900000",
+        "VLLM_TARGET_DEVICE": "tt",
+        "TORCHDYNAMO_DISABLE": "1"
+      },
+      "param_count": null
+    },
+    {
+      "model_name": "Qwen2.5-7B",
+      "model_type": "CHAT",
+      "display_model_type": "LLM",
+      "device_configurations": [
+        "N150X4",
+        "N300"
+      ],
+      "hf_model_id": "Qwen/Qwen2.5-7B",
+      "inference_engine": "vLLM",
+      "supported_modalities": [
+        "text"
+      ],
+      "status": "EXPERIMENTAL",
+      "version": "0.9.0",
+      "docker_image": "ghcr.io/tenstorrent/tt-inference-server/vllm-tt-metal-src-release-ubuntu-22.04-amd64:0.9.0-5b5db8a-e771fff",
+      "service_route": "/v1/completions",
+      "shm_size": "32G",
+      "setup_type": "TT_INFERENCE_SERVER",
+      "env_vars": {
+        "VLLM_CONFIGURE_LOGGING": "1",
+        "VLLM_RPC_TIMEOUT": "900000",
+        "VLLM_TARGET_DEVICE": "tt",
+        "TORCHDYNAMO_DISABLE": "1",
+        "VLLM_ALLOW_LONG_MAX_MODEL_LEN": 1
+      },
+      "param_count": 7
+    },
+    {
+      "model_name": "Qwen2.5-7B-Instruct",
+      "model_type": "CHAT",
+      "display_model_type": "LLM",
+      "device_configurations": [
+        "N150X4",
+        "N300"
+      ],
+      "hf_model_id": "Qwen/Qwen2.5-7B-Instruct",
+      "inference_engine": "vLLM",
+      "supported_modalities": [
+        "text"
+      ],
+      "status": "EXPERIMENTAL",
+      "version": "0.9.0",
+      "docker_image": "ghcr.io/tenstorrent/tt-inference-server/vllm-tt-metal-src-release-ubuntu-22.04-amd64:0.9.0-5b5db8a-e771fff",
+      "service_route": "/v1/chat/completions",
+      "shm_size": "32G",
+      "setup_type": "TT_INFERENCE_SERVER",
+      "env_vars": {
+        "VLLM_CONFIGURE_LOGGING": "1",
+        "VLLM_RPC_TIMEOUT": "900000",
+        "VLLM_TARGET_DEVICE": "tt",
+        "TORCHDYNAMO_DISABLE": "1",
+        "VLLM_ALLOW_LONG_MAX_MODEL_LEN": 1
+      },
+      "param_count": 7
+    },
+    {
+      "model_name": "Qwen2.5-Coder-32B-Instruct",
+      "model_type": "CHAT",
+      "display_model_type": "LLM",
+      "device_configurations": [
+        "GALAXY_T3K",
+        "T3K"
+      ],
+      "hf_model_id": "Qwen/Qwen2.5-Coder-32B-Instruct",
+      "inference_engine": "vLLM",
+      "supported_modalities": [
+        "text"
+      ],
+      "status": "EXPERIMENTAL",
+      "version": "0.9.0",
+      "docker_image": "ghcr.io/tenstorrent/tt-inference-server/vllm-tt-metal-src-release-ubuntu-22.04-amd64:0.9.0-17a5973-aa4ae1e",
+      "service_route": "/v1/chat/completions",
+      "shm_size": "32G",
+      "setup_type": "TT_INFERENCE_SERVER",
+      "env_vars": {
+        "VLLM_CONFIGURE_LOGGING": "1",
+        "VLLM_RPC_TIMEOUT": "900000",
+        "VLLM_TARGET_DEVICE": "tt",
+        "TORCHDYNAMO_DISABLE": "1",
+        "VLLM_ALLOW_LONG_MAX_MODEL_LEN": 1
+      },
+      "param_count": 32
+    },
+    {
+      "model_name": "Qwen2.5-VL-32B-Instruct",
+      "model_type": "VLM",
+      "display_model_type": "VLM",
+      "device_configurations": [
+        "T3K"
+      ],
+      "hf_model_id": "Qwen/Qwen2.5-VL-32B-Instruct",
+      "inference_engine": "vLLM",
+      "supported_modalities": [
+        "text",
+        "image"
+      ],
+      "status": "EXPERIMENTAL",
+      "version": "0.9.0",
+      "docker_image": "ghcr.io/tenstorrent/tt-inference-server/vllm-tt-metal-src-release-ubuntu-22.04-amd64:0.9.0-c18569e-b2894d3",
+      "service_route": "/v1/chat/completions",
+      "shm_size": "32G",
+      "setup_type": "TT_INFERENCE_SERVER",
+      "env_vars": {
+        "VLLM_CONFIGURE_LOGGING": "1",
+        "VLLM_RPC_TIMEOUT": "900000",
+        "VLLM_TARGET_DEVICE": "tt",
+        "TORCHDYNAMO_DISABLE": "1",
+        "VLLM_ALLOW_LONG_MAX_MODEL_LEN": 1
+      },
+      "param_count": 32
+    },
+    {
+      "model_name": "Qwen2.5-VL-3B-Instruct",
+      "model_type": "VLM",
+      "display_model_type": "VLM",
+      "device_configurations": [
+        "N150",
+        "N300",
+        "T3K"
+      ],
+      "hf_model_id": "Qwen/Qwen2.5-VL-3B-Instruct",
+      "inference_engine": "vLLM",
+      "supported_modalities": [
+        "text",
+        "image"
+      ],
+      "status": "EXPERIMENTAL",
+      "version": "0.9.0",
+      "docker_image": "ghcr.io/tenstorrent/tt-inference-server/vllm-tt-metal-src-release-ubuntu-22.04-amd64:0.9.0-c18569e-b2894d3",
+      "service_route": "/v1/chat/completions",
+      "shm_size": "32G",
+      "setup_type": "TT_INFERENCE_SERVER",
+      "env_vars": {
+        "VLLM_CONFIGURE_LOGGING": "1",
+        "VLLM_RPC_TIMEOUT": "900000",
+        "VLLM_TARGET_DEVICE": "tt",
+        "TORCHDYNAMO_DISABLE": "1",
+        "VLLM_ALLOW_LONG_MAX_MODEL_LEN": 1
+      },
+      "param_count": 3
+    },
+    {
+      "model_name": "Qwen2.5-VL-7B-Instruct",
+      "model_type": "VLM",
+      "display_model_type": "VLM",
+      "device_configurations": [
+        "N150",
+        "N300",
+        "T3K"
+      ],
+      "hf_model_id": "Qwen/Qwen2.5-VL-7B-Instruct",
+      "inference_engine": "vLLM",
+      "supported_modalities": [
+        "text",
+        "image"
+      ],
+      "status": "EXPERIMENTAL",
+      "version": "0.9.0",
+      "docker_image": "ghcr.io/tenstorrent/tt-inference-server/vllm-tt-metal-src-release-ubuntu-22.04-amd64:0.9.0-c18569e-b2894d3",
+      "service_route": "/v1/chat/completions",
+      "shm_size": "32G",
+      "setup_type": "TT_INFERENCE_SERVER",
+      "env_vars": {
+        "VLLM_CONFIGURE_LOGGING": "1",
+        "VLLM_RPC_TIMEOUT": "900000",
+        "VLLM_TARGET_DEVICE": "tt",
+        "TORCHDYNAMO_DISABLE": "1",
+        "VLLM_ALLOW_LONG_MAX_MODEL_LEN": 1
+      },
+      "param_count": 7
+    },
+    {
+      "model_name": "Qwen3-Embedding-4B",
+      "model_type": "EMBEDDING",
+      "display_model_type": "EMBEDDING",
+      "device_configurations": [
+        "GALAXY",
+        "N150",
+        "N300",
+        "T3K"
+      ],
+      "hf_model_id": "Qwen/Qwen3-Embedding-4B",
+      "inference_engine": "forge",
+      "supported_modalities": [
+        "text"
+      ],
+      "status": "EXPERIMENTAL",
+      "version": "0.9.0",
+      "docker_image": "ghcr.io/tenstorrent/tt-shield/tt-media-inference-server-forge:a9b09e0b611da6deb4d8972e8296148fd864e5fd_98dcf62_60920940673",
+      "service_route": "/v1/chat/completions",
+      "shm_size": "32G",
+      "setup_type": "TT_INFERENCE_SERVER",
+      "env_vars": {
+        "VLLM_CONFIGURE_LOGGING": "1",
+        "VLLM_RPC_TIMEOUT": "900000",
+        "VLLM_TARGET_DEVICE": "tt",
+        "TORCHDYNAMO_DISABLE": "1",
+        "VLLM__MAX_NUM_BATCHED_TOKENS": "1024",
+        "VLLM__MAX_MODEL_LENGTH": "1024",
+        "VLLM__MIN_CONTEXT_LENGTH": "32",
+        "VLLM__MAX_NUM_SEQS": "1",
+        "MAX_BATCH_SIZE": "1",
+        "DEFAULT_THROTTLE_LEVEL": "0"
+      },
+      "param_count": 4
+    },
+    {
+      "model_name": "Qwen3-Embedding-8B",
+      "model_type": "EMBEDDING",
+      "display_model_type": "EMBEDDING",
+      "device_configurations": [
+        "GALAXY",
+        "N150",
+        "N300",
+        "T3K"
+      ],
+      "hf_model_id": "Qwen/Qwen3-Embedding-8B",
+      "inference_engine": "media",
+      "supported_modalities": [
+        "text"
+      ],
+      "status": "EXPERIMENTAL",
+      "version": "0.9.0",
+      "docker_image": "ghcr.io/tenstorrent/tt-media-inference-server:0.2.0-2496be4518bca0a7a5b497a4cda3cfe7e2f59756",
+      "service_route": "/enqueue",
+      "shm_size": "32G",
+      "setup_type": "TT_INFERENCE_SERVER",
+      "env_vars": {
+        "VLLM_CONFIGURE_LOGGING": "1",
+        "VLLM_RPC_TIMEOUT": "900000",
+        "VLLM_TARGET_DEVICE": "tt",
+        "TORCHDYNAMO_DISABLE": "1",
+        "VLLM__MAX_NUM_BATCHED_TOKENS": "1024",
+        "VLLM__MAX_MODEL_LENGTH": "1024",
+        "VLLM__MIN_CONTEXT_LENGTH": "32",
+        "VLLM__MAX_NUM_SEQS": "1"
+      },
+      "param_count": 8
+    },
+    {
+      "model_name": "resnet-50",
+      "model_type": "CNN",
+      "display_model_type": "CNN",
+      "device_configurations": [
+        "N150",
+        "N300"
+      ],
+      "hf_model_id": "resnet-50",
+      "inference_engine": "forge",
+      "supported_modalities": [
+        "text"
+      ],
+      "status": "EXPERIMENTAL",
+      "version": "0.9.0",
+      "docker_image": "ghcr.io/tenstorrent/tt-shield/tt-media-inference-server-forge:a9b09e0b611da6deb4d8972e8296148fd864e5fd_98dcf62_60920940673",
+      "service_route": "/v1/chat/completions",
+      "shm_size": "32G",
+      "setup_type": "TT_INFERENCE_SERVER",
+      "env_vars": {
+        "VLLM_CONFIGURE_LOGGING": "1",
+        "VLLM_RPC_TIMEOUT": "900000",
+        "VLLM_TARGET_DEVICE": "tt",
+        "TORCHDYNAMO_DISABLE": "1"
+      },
+      "param_count": null
+    },
+    {
+      "model_name": "segformer",
+      "model_type": "CNN",
+      "display_model_type": "CNN",
+      "device_configurations": [
+        "N150",
+        "N300"
+      ],
+      "hf_model_id": "segformer",
+      "inference_engine": "forge",
+      "supported_modalities": [
+        "text"
+      ],
+      "status": "EXPERIMENTAL",
+      "version": "0.9.0",
+      "docker_image": "ghcr.io/tenstorrent/tt-shield/tt-media-inference-server-forge:a9b09e0b611da6deb4d8972e8296148fd864e5fd_98dcf62_60920940673",
+      "service_route": "/v1/chat/completions",
+      "shm_size": "32G",
+      "setup_type": "TT_INFERENCE_SERVER",
+      "env_vars": {
+        "VLLM_CONFIGURE_LOGGING": "1",
+        "VLLM_RPC_TIMEOUT": "900000",
+        "VLLM_TARGET_DEVICE": "tt",
+        "TORCHDYNAMO_DISABLE": "1"
+      },
+      "param_count": null
+    },
+    {
+      "model_name": "unet",
+      "model_type": "CNN",
+      "display_model_type": "CNN",
+      "device_configurations": [
+        "N150",
+        "N300"
+      ],
+      "hf_model_id": "unet",
+      "inference_engine": "forge",
+      "supported_modalities": [
+        "text"
+      ],
+      "status": "EXPERIMENTAL",
+      "version": "0.9.0",
+      "docker_image": "ghcr.io/tenstorrent/tt-shield/tt-media-inference-server-forge:a9b09e0b611da6deb4d8972e8296148fd864e5fd_98dcf62_60920940673",
+      "service_route": "/v1/chat/completions",
+      "shm_size": "32G",
+      "setup_type": "TT_INFERENCE_SERVER",
+      "env_vars": {
+        "VLLM_CONFIGURE_LOGGING": "1",
+        "VLLM_RPC_TIMEOUT": "900000",
+        "VLLM_TARGET_DEVICE": "tt",
+        "TORCHDYNAMO_DISABLE": "1"
+      },
+      "param_count": null
+    },
+    {
+      "model_name": "vit",
+      "model_type": "CNN",
+      "display_model_type": "CNN",
+      "device_configurations": [
+        "N150",
+        "N300"
+      ],
+      "hf_model_id": "vit",
+      "inference_engine": "forge",
+      "supported_modalities": [
+        "text"
+      ],
+      "status": "EXPERIMENTAL",
+      "version": "0.9.0",
+      "docker_image": "ghcr.io/tenstorrent/tt-shield/tt-media-inference-server-forge:a9b09e0b611da6deb4d8972e8296148fd864e5fd_98dcf62_60920940673",
+      "service_route": "/v1/chat/completions",
+      "shm_size": "32G",
+      "setup_type": "TT_INFERENCE_SERVER",
+      "env_vars": {
+        "VLLM_CONFIGURE_LOGGING": "1",
+        "VLLM_RPC_TIMEOUT": "900000",
+        "VLLM_TARGET_DEVICE": "tt",
+        "TORCHDYNAMO_DISABLE": "1"
+      },
+      "param_count": null
+    },
+    {
+      "model_name": "vovnet",
+      "model_type": "CNN",
+      "display_model_type": "CNN",
+      "device_configurations": [
+        "N150",
+        "N300"
+      ],
+      "hf_model_id": "vovnet",
+      "inference_engine": "forge",
+      "supported_modalities": [
+        "text"
+      ],
+      "status": "EXPERIMENTAL",
+      "version": "0.9.0",
+      "docker_image": "ghcr.io/tenstorrent/tt-shield/tt-media-inference-server-forge:a9b09e0b611da6deb4d8972e8296148fd864e5fd_98dcf62_60920940673",
+      "service_route": "/v1/chat/completions",
+      "shm_size": "32G",
+      "setup_type": "TT_INFERENCE_SERVER",
+      "env_vars": {
+        "VLLM_CONFIGURE_LOGGING": "1",
+        "VLLM_RPC_TIMEOUT": "900000",
+        "VLLM_TARGET_DEVICE": "tt",
+        "TORCHDYNAMO_DISABLE": "1"
+      },
+      "param_count": null
+    }
+  ]
+}
diff --git a/app/backend/shared_config/sync_models_from_inference_server.py b/app/backend/shared_config/sync_models_from_inference_server.py
new file mode 100644
index 00000000..e750208d
--- /dev/null
+++ b/app/backend/shared_config/sync_models_from_inference_server.py
@@ -0,0 +1,257 @@
+# SPDX-License-Identifier: Apache-2.0
+#
+# SPDX-FileCopyrightText: © 2025 Tenstorrent AI ULC
+
+"""
+Sync script: reads ../../tt-inference-server/model_specs_output.json and
+normalizes it into models_from_inference_server.json (co-located with this script).
+
+Run from any directory:
+    python app/backend/shared_config/sync_models_from_inference_server.py
+"""
+
+import json
+import os
+from datetime import datetime, timezone
+from pathlib import Path
+
+# ---------------------------------------------------------------------------
+# Paths
+# ---------------------------------------------------------------------------
+SCRIPT_DIR = Path(__file__).parent
+OUTPUT_JSON = SCRIPT_DIR / "models_from_inference_server.json"
+
+# Source JSON resolution order:
+#   1. Explicit --source CLI argument
+#   2. TT_INFERENCE_ARTIFACT_PATH env var (set by run.py after artifact download)
+#   3. .artifacts/tt-inference-server/ next to repo root (artifact default location)
+#   4. tt-inference-server/ next to repo root (legacy submodule path)
+_REPO_ROOT = SCRIPT_DIR / "../../.."
+_CANDIDATE_SOURCES = [
+    _REPO_ROOT / ".artifacts/tt-inference-server/model_specs_output.json",
+    _REPO_ROOT / "tt-inference-server/model_specs_output.json",
+]
+
+
+def resolve_source_json(override: str | None = None) -> Path:
+    """Return the path to model_specs_output.json, trying candidates in order."""
+    if override:
+        p = Path(override)
+        if not p.exists():
+            raise FileNotFoundError(f"--source path not found: {p}")
+        return p.resolve()
+
+    # Check env var set by run.py
+    artifact_path = os.environ.get("TT_INFERENCE_ARTIFACT_PATH")
+    if artifact_path:
+        p = Path(artifact_path) / "model_specs_output.json"
+        if p.exists():
+            return p.resolve()
+
+    # Try static candidates
+    for candidate in _CANDIDATE_SOURCES:
+        if candidate.exists():
+            return candidate.resolve()
+
+    raise FileNotFoundError(
+        "Cannot find model_specs_output.json. Tried:\n"
+        + "\n".join(f"  {c.resolve()}" for c in _CANDIDATE_SOURCES)
+    )
+
+# ---------------------------------------------------------------------------
+# Constants
+# ---------------------------------------------------------------------------
+DEVICE_SPECIFIC_ENV_KEYS = {"WH_ARCH_YAML", "MESH_DEVICE", "ARCH_NAME"}
+
+STATUS_ORDER = {"COMPLETE": 3, "FUNCTIONAL": 2, "EXPERIMENTAL": 1}
+
+# device_type string (from tt-inference-server) → DeviceConfigurations member name
+# Only include device_types that exist in DeviceConfigurations enum
+DEVICE_TYPE_TO_CONFIG = {
+    "N150": "N150",
+    "N300": "N300",
+    "T3K": "T3K",
+    "N150X4": "N150X4",
+    "P100": "P100",
+    "P150": "P150",
+    "P150X4": "P150X4",
+    "P150X8": "P150X8",
+    "GALAXY": "GALAXY",
+    "GALAXY_T3K": "GALAXY_T3K",
+}
+
+
+def map_model_type(raw_model_type: str, inference_engine: str) -> str:
+    """Map tt-inference-server model_type + inference_engine to tt-studio ModelTypes."""
+    if raw_model_type == "LLM" and inference_engine == "vLLM":
+        return "CHAT"
+    if raw_model_type == "VLM":
+        return "VLM"
+    if raw_model_type == "IMAGE":
+        return "IMAGE_GENERATION"
+    if raw_model_type == "AUDIO":
+        return "SPEECH_RECOGNITION"
+    if raw_model_type == "TEXT_TO_SPEECH" or raw_model_type == "TTS":
+        return "TTS"
+    if raw_model_type == "VIDEO":
+        return "VIDEO"
+    if raw_model_type == "EMBEDDING":
+        return "EMBEDDING"
+    # CNN + media engine = image generation (FLUX, Motif, etc.)
+    if raw_model_type == "CNN" and inference_engine == "media":
+        return "IMAGE_GENERATION"
+    # CNN + forge = computer vision / object detection (resnet, vit, etc.)
+    if raw_model_type == "CNN" and inference_engine == "forge":
+        return "CNN"
+    return "CHAT"
+
+
+CHAT_CAPABLE_PATTERNS = [
+    "instruct", "-chat", "chat-", "-it-", "-it", "assistant",
+    # Reasoning / thinking models that do have chat templates
+    "deepseek-r1", "qwq", "qwen3", "gpt-oss",
+]
+
+
+def is_chat_capable(hf_model_id: str) -> bool:
+    lower = hf_model_id.lower()
+    return any(p in lower for p in CHAT_CAPABLE_PATTERNS)
+
+
+def map_service_route(inference_engine: str, hf_model_id: str = "") -> str:
+    """Derive service_route from inference_engine (and model id for vLLM)."""
+    if inference_engine == "vLLM":
+        return "/v1/chat/completions" if is_chat_capable(hf_model_id) else "/v1/completions"
+    if inference_engine == "media":
+        return "/enqueue"
+    if inference_engine == "forge":
+        return "/v1/chat/completions"
+    return "/v1/chat/completions"
+
+
+def filter_env_vars(env_vars: dict) -> dict:
+    """Strip device-specific env vars that ModelImpl.__post_init__ handles."""
+    return {k: v for k, v in env_vars.items() if k not in DEVICE_SPECIFIC_ENV_KEYS}
+
+
+def pick_higher_status(current: str | None, candidate: str) -> str:
+    """Return whichever status is higher priority."""
+    if current is None:
+        return candidate
+    return current if STATUS_ORDER.get(current, 0) >= STATUS_ORDER.get(candidate, 0) else candidate
+
+
+def normalize(source_path: Path) -> list[dict]:
+    with open(source_path) as f:
+        raw = json.load(f)
+
+    # group by model_name, skipping GPU entries
+    by_model: dict[str, list[dict]] = {}
+    for entry in raw.values():
+        if entry.get("device_type") == "GPU":
+            continue
+        name = entry["model_name"]
+        by_model.setdefault(name, []).append(entry)
+
+    models = []
+    for model_name, entries in by_model.items():
+        # Use first entry as the canonical source for model-level fields
+        first = entries[0]
+
+        # Aggregate device_types
+        device_configurations = sorted(
+            {
+                DEVICE_TYPE_TO_CONFIG[e["device_type"]]
+                for e in entries
+                if e.get("device_type") in DEVICE_TYPE_TO_CONFIG
+            }
+        )
+
+        # Pick highest status
+        status = None
+        for e in entries:
+            status = pick_higher_status(status, e.get("status", "EXPERIMENTAL"))
+
+        # Model-level env_vars (from first entry, strip device-specific keys)
+        env_vars = filter_env_vars(first.get("env_vars") or {})
+
+        inference_engine = first.get("inference_engine", "vLLM")
+        raw_model_type = first.get("model_type", "LLM")
+
+        models.append({
+            "model_name": model_name,
+            "model_type": map_model_type(raw_model_type, inference_engine),
+            "display_model_type": raw_model_type,
+            "device_configurations": device_configurations,
+            "hf_model_id": first.get("hf_model_repo"),
+            "inference_engine": inference_engine,
+            "supported_modalities": first.get("supported_modalities", ["text"]),
+            "status": status,
+            "version": first.get("version", "0.0.1"),
+            "docker_image": first.get("docker_image"),
+            "service_route": map_service_route(inference_engine, hf_model_id=first.get("hf_model_repo", "")),
+            "shm_size": "32G",
+            "setup_type": "TT_INFERENCE_SERVER",
+            "env_vars": env_vars,
+            "param_count": first.get("param_count"),
+        })
+
+    # Sort: by status (highest first), then alphabetically by model_name
+    models.sort(key=lambda m: (-STATUS_ORDER.get(m["status"], 0), m["model_name"].lower()))
+    return models
+
+
+def main():
+    import argparse
+    parser = argparse.ArgumentParser(description="Sync model catalog from tt-inference-server")
+    parser.add_argument("--source", default=None, help="Path to model_specs_output.json (overrides auto-detection)")
+    args = parser.parse_args()
+
+    source_path = resolve_source_json(args.source)
+    print(f"Reading: {source_path}")
+
+    if not source_path.exists():
+        raise FileNotFoundError(f"Source not found: {source_path}")
+
+    models = normalize(source_path)
+
+    # Resolve artifact version from VERSION file or env vars (avoid leaking absolute paths)
+    artifact_version = None
+    version_file = source_path.parent / "VERSION"
+    if version_file.exists():
+        artifact_version = version_file.read_text().strip()
+    if not artifact_version:
+        artifact_version = (
+            os.environ.get("TT_INFERENCE_ARTIFACT_VERSION")
+            or os.environ.get("TT_INFERENCE_ARTIFACT_BRANCH")
+            or "unknown"
+        )
+
+    catalog = {
+        "source": {
+            "artifact_version": artifact_version,
+            "generated_at": datetime.now(timezone.utc).isoformat(),
+        },
+        "total_models": len(models),
+        "models": models,
+    }
+
+    out_path = OUTPUT_JSON.resolve()
+    with open(out_path, "w") as f:
+        json.dump(catalog, f, indent=2)
+        f.write("\n")
+
+    print(f"Written {len(models)} models → {out_path}")
+
+    # Print a summary
+    from collections import Counter
+    status_counts = Counter(m["status"] for m in models)
+    type_counts = Counter(m["model_type"] for m in models)
+    display_type_counts = Counter(m["display_model_type"] for m in models)
+    print(f"  Status distribution:       {dict(status_counts)}")
+    print(f"  Type distribution:         {dict(type_counts)}")
+    print(f"  Display type distribution: {dict(display_type_counts)}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/app/docker-compose.dev-mode.yml b/app/docker-compose.dev-mode.yml
index 9cfccb6b..6fd24a51 100644
--- a/app/docker-compose.dev-mode.yml
+++ b/app/docker-compose.dev-mode.yml
@@ -10,7 +10,8 @@ services:
     volumes:
       # Mount the local api directory for live code changes
       - ./backend:/backend
-    command: python ./manage.py runserver 0.0.0.0:8000
+    command: >
+      python manage.py runserver 0.0.0.0:8000
     environment:
       - DEBUG=True
     # Allow container to access host services (docker-control-service)
diff --git a/app/docker-compose.yml b/app/docker-compose.yml
index 19f07ba6..a55e9f51 100644
--- a/app/docker-compose.yml
+++ b/app/docker-compose.yml
@@ -20,7 +20,8 @@ services:
       - tt_studio_network
     ports:
       - "8000:8000"
-    command: gunicorn --workers 3 --bind 0.0.0.0:8000 --preload --timeout 1200 api.wsgi:application
+    command: >
+      gunicorn --workers 3 --bind 0.0.0.0:8000 --preload --timeout 1200 api.wsgi:application
     depends_on:
       tt_studio_chroma:
         condition: service_healthy
@@ -58,7 +59,7 @@ services:
       # Mount the local api directory for live code changes
       - ./backend:/backend
       # Mount tt-inference-server workflow logs for viewing deployment logs
-      - ${TT_STUDIO_ROOT}/tt-inference-server/workflow_logs:${TT_STUDIO_ROOT}/tt-inference-server/workflow_logs:ro
+      - ${TT_STUDIO_ROOT}/.artifacts/tt-inference-server/workflow_logs:${TT_STUDIO_ROOT}/.artifacts/tt-inference-server/workflow_logs:ro
 
     healthcheck:
       # On first application load resources for transformers/etc
diff --git a/app/frontend/src/api/modelsDeployedApis.ts b/app/frontend/src/api/modelsDeployedApis.ts
index 0bc1cd28..934bffed 100644
--- a/app/frontend/src/api/modelsDeployedApis.ts
+++ b/app/frontend/src/api/modelsDeployedApis.ts
@@ -48,19 +48,55 @@ interface DeployedModelInfo {
   id: string;
   modelName: string;
   status: string;
+  model_type?: string;
   internal_url?: string;
   health_url?: string;
   model_impl?: {
     model_name?: string;
     hf_model_id?: string;
+    model_type?: string;
   };
 }
 
 export const ModelType = {
   ChatModel: "ChatModel",
+  VLM: "VLM",
   ImageGeneration: "ImageGeneration",
+  VideoGeneration: "VideoGeneration",
   ObjectDetectionModel: "ObjectDetectionModel",
   SpeechRecognitionModel: "SpeechRecognitionModel",
+  TTS: "TTS",
+  Embedding: "Embedding",
+  CNN: "CNN",
+};
+
+/**
+ * Map backend model_type strings (from catalog/API) to frontend ModelType constants.
+ * Falls back to ChatModel for unknown types.
+ */
+export const getModelTypeFromBackendType = (backendType: string): string => {
+  switch (backendType) {
+    case "chat":
+      return ModelType.ChatModel;
+    case "vlm":
+      return ModelType.VLM;
+    case "image_generation":
+      return ModelType.ImageGeneration;
+    case "video_generation":
+      return ModelType.VideoGeneration;
+    case "object_detection":
+      return ModelType.ObjectDetectionModel;
+    case "speech_recognition":
+      return ModelType.SpeechRecognitionModel;
+    case "tts":
+      return ModelType.TTS;
+    case "embedding":
+      return ModelType.Embedding;
+    case "cnn":
+      return ModelType.CNN;
+    default:
+      return ModelType.ChatModel;
+  }
 };
 
 export const fetchModels = async (): Promise<Model[]> => {
@@ -214,12 +250,13 @@ export const handleRedeploy = (modelName: string): void => {
 export const handleModelNavigationClick = (
   modelID: string,
   modelName: string,
-  navigate: NavigateFunction
+  navigate: NavigateFunction,
+  modelType?: string
 ): void => {
-  const modelType = getModelTypeFromName(modelName);
-  const destination = getDestinationFromModelType(modelType);
-  console.log(`${modelType} button clicked for model: ${modelID}`);
-  console.log(`Opening ${modelType} for model: ${modelName}`);
+  const resolvedModelType = modelType ?? getModelTypeFromName(modelName);
+  const destination = getDestinationFromModelType(resolvedModelType);
+  console.log(`${resolvedModelType} button clicked for model: ${modelID}`);
+  console.log(`Opening ${resolvedModelType} for model: ${modelName}`);
   customToast.success(`${destination.slice(1)} page opened!`);
 
   navigate(destination, {
@@ -233,14 +270,109 @@ export const getDestinationFromModelType = (modelType: string): string => {
   switch (modelType) {
     case ModelType.ChatModel:
       return "/chat";
+    case ModelType.VLM:
+      return "/chat"; // VLM reuses the chat UI (supports image content)
     case ModelType.ImageGeneration:
       return "/image-generation";
+    case ModelType.VideoGeneration:
+      return "/chat"; // placeholder until video UI exists
     case ModelType.ObjectDetectionModel:
       return "/object-detection";
     case ModelType.SpeechRecognitionModel:
       return "/speech-to-text";
+    case ModelType.TTS:
+      return "/speech-to-text"; // TTS shares the audio page for now
+    case ModelType.Embedding:
+      return "/chat"; // placeholder
+    case ModelType.CNN:
+      return "/object-detection"; // CNN reuses object detection UI
     default:
-      return "/chat"; // /chat is the default
+      return "/chat";
+  }
+};
+
+// ----- deployModel with device_id support -----
+export const deployModel = async (
+  modelId: string,
+  weightsId: string,
+  deviceId: number = 0,
+): Promise<{ job_id?: string; status?: string; message?: string }> => {
+  const payload = JSON.stringify({
+    model_id: modelId,
+    weights_id: weightsId,
+    device_id: deviceId,
+  });
+  const response = await fetch("/docker-api/deploy/", {
+    method: "POST",
+    headers: { "Content-Type": "application/json" },
+    body: payload,
+  });
+  return response.json();
+};
+
+// ----- Voice Pipeline -----
+export interface VoicePipelineRequest {
+  audioFile: File;
+  whisperDeployId: string;
+  llmDeployId: string;
+  ttsDeployId?: string;
+  systemPrompt?: string;
+}
+
+/**
+ * Calls the voice pipeline endpoint and returns an SSE EventSource.
+ * The caller is responsible for closing the EventSource when done.
+ */
+export const runVoicePipeline = async (
+  req: VoicePipelineRequest,
+  onTranscript: (text: string) => void,
+  onLlmChunk: (text: string) => void,
+  onAudio: (dataUrl: string) => void,
+  onError: (stage: string, message: string) => void,
+  onDone: () => void,
+): Promise<void> => {
+  const form = new FormData();
+  form.append("audio_file", req.audioFile);
+  form.append("whisper_deploy_id", req.whisperDeployId);
+  form.append("llm_deploy_id", req.llmDeployId);
+  if (req.ttsDeployId) form.append("tts_deploy_id", req.ttsDeployId);
+  if (req.systemPrompt) form.append("system_prompt", req.systemPrompt);
+
+  const response = await fetch("/models-api/pipeline/voice/", {
+    method: "POST",
+    body: form,
+  });
+
+  if (!response.ok || !response.body) {
+    onError("pipeline", `HTTP ${response.status}`);
+    return;
+  }
+
+  const reader = response.body.getReader();
+  const decoder = new TextDecoder();
+  let buffer = "";
+
+  while (true) {
+    const { done, value } = await reader.read();
+    if (done) break;
+    buffer += decoder.decode(value, { stream: true });
+
+    const lines = buffer.split("\n");
+    buffer = lines.pop() ?? "";
+
+    for (const line of lines) {
+      if (!line.startsWith("data: ")) continue;
+      try {
+        const evt = JSON.parse(line.slice(6));
+        if (evt.type === "transcript") onTranscript(evt.text);
+        else if (evt.type === "llm_chunk") onLlmChunk(evt.text);
+        else if (evt.type === "audio_url") onAudio(evt.url);
+        else if (evt.type === "error") onError(evt.stage ?? "unknown", evt.message);
+        else if (evt.type === "done") onDone();
+      } catch {
+        // skip malformed lines
+      }
+    }
   }
 };
 
@@ -298,6 +430,7 @@ export const fetchDeployedModelsInfo = async (): Promise<
           modelData.model_impl?.hf_model_id ||
           "Unknown Model",
         status: "deployed",
+        model_type: modelData.model_impl?.model_type,
         internal_url: modelData.internal_url,
         health_url: modelData.health_url,
         model_impl: modelData.model_impl,
diff --git a/app/frontend/src/components/FirstStepForm.tsx b/app/frontend/src/components/FirstStepForm.tsx
index 0dbe346f..267b2676 100644
--- a/app/frontend/src/components/FirstStepForm.tsx
+++ b/app/frontend/src/components/FirstStepForm.tsx
@@ -8,15 +8,10 @@ import axios from "axios";
 import { useEffect, useState } from "react";
 import {
   Bot,
-  // Cpu,
-  // CheckCircle,
   XCircle,
-  MessageSquare,
-  // Image,
-  Eye,
-  Mic,
-  Palette,
-  // Camera,
+  CheckCircle2,
+  Zap,
+  FlaskConical,
 } from "lucide-react";
 import {
   Tooltip,
@@ -48,57 +43,73 @@ import BoardBadge from "./BoardBadge";
 import { DeployedModelsWarning } from "./DeployedModelsWarning";
 import { useModels } from "../hooks/useModels";
 
-// Model type configuration with icons and labels
-const MODEL_TYPE_CONFIG = {
-  chat: {
-    label: "Chat & Language Models",
-    icon: MessageSquare,
+// Status configuration with icons and labels
+const STATUS_CONFIG = {
+  COMPLETE: {
+    label: "Complete",
+    icon: CheckCircle2,
+    color: "text-green-600",
+    bgColor: "bg-green-50 dark:bg-green-900/20",
+    borderColor: "border-green-200 dark:border-green-800",
+  },
+  FUNCTIONAL: {
+    label: "Functional",
+    icon: Zap,
     color: "text-blue-500",
     bgColor: "bg-blue-50 dark:bg-blue-900/20",
     borderColor: "border-blue-200 dark:border-blue-800",
   },
-  image_generation: {
-    label: "Image Generation",
-    icon: Palette,
-    color: "text-purple-500",
-    bgColor: "bg-purple-50 dark:bg-purple-900/20",
-    borderColor: "border-purple-200 dark:border-purple-800",
-  },
-  object_detection: {
-    label: "Object Detection",
-    icon: Eye,
-    color: "text-emerald-500",
-    bgColor: "bg-emerald-50 dark:bg-emerald-900/20",
-    borderColor: "border-emerald-200 dark:border-emerald-800",
-  },
-  speech_recognition: {
-    label: "Speech Recognition",
-    icon: Mic,
-    color: "text-orange-500",
-    bgColor: "bg-orange-50 dark:bg-orange-900/20",
-    borderColor: "border-orange-200 dark:border-orange-800",
-  },
-  mock: {
-    label: "Test Models",
-    icon: Bot,
-    color: "text-gray-500",
-    bgColor: "bg-gray-50 dark:bg-gray-900/20",
-    borderColor: "border-gray-200 dark:border-gray-800",
+  EXPERIMENTAL: {
+    label: "Experimental",
+    icon: FlaskConical,
+    color: "text-amber-500",
+    bgColor: "bg-amber-50 dark:bg-amber-900/20",
+    borderColor: "border-amber-200 dark:border-amber-800",
   },
 };
 
+// Model type configuration for grouping by inference server type
+const TYPE_CONFIG: Record<string, { label: string; order: number }> = {
+  LLM:            { label: "LLM Models",       order: 1 },
+  VLM:            { label: "VLM Models",        order: 2 },
+  VIDEO:          { label: "Video Models",      order: 3 },
+  IMAGE:          { label: "Image Models",      order: 4 },
+  AUDIO:          { label: "Audio Models",      order: 5 },
+  TEXT_TO_SPEECH: { label: "TTS Models",        order: 6 },
+  EMBEDDING:      { label: "Embedding Models",  order: 7 },
+  CNN:            { label: "CNN Models",         order: 8 },
+};
+
 const FirstFormSchema = z.object({
   model: z.string().nonempty("Please select a model."),
 });
 
+// Multi-chip boards where the user needs to pick a chip slot.
+// Single-chip boards (N150, N300 standalone, E150, P100, P150, P300c) always have
+// only one chip so no picker is needed there.
+const MULTI_CHIP_BOARD_SLOTS: Record<string, number> = {
+  T3K:      4,   // 4x N300
+  T3000:    4,
+  N150X4:   4,
+  N300x4:   4,
+  P150X4:   4,
+  P150X8:   8,
+  P300Cx2:  4,   // 2 cards × 2 chips
+  P300Cx4:  8,   // 4 cards × 2 chips
+  GALAXY:   32,
+  GALAXY_T3K: 32,
+};
+
 export function FirstStepForm({
   setSelectedModel,
   setFormError,
+  setSelectedDeviceId,
   autoDeployModel,
   isAutoDeploying,
 }: {
   setSelectedModel: (model: string) => void;
   setFormError: (hasError: boolean) => void;
+  setSelectedDeviceId?: (deviceId: number) => void;
   autoDeployModel?: string | null;
   isAutoDeploying?: boolean;
 }) {
@@ -112,6 +123,7 @@ export function FirstStepForm({
   const [models, setModels] = useState<Model[]>([]);
   const [isLoading, setIsLoading] = useState(true);
   const [isWarningDismissed, setIsWarningDismissed] = useState(false);
+  const [deviceId, setDeviceId] = useState<number>(0);
 
   // Refresh models context when component mounts
   useEffect(() => {
@@ -185,9 +197,12 @@ export function FirstStepForm({
 
         console.log(
           "📝 FirstStepForm: Setting selectedModel to:",
-          selectedModel.id
+          selectedModel.id,
+          "device_id:",
+          deviceId,
         );
         setSelectedModel(selectedModel.id);
+        if (setSelectedDeviceId) setSelectedDeviceId(deviceId);
         console.log(
           "📝 FirstStepForm: selectedModel set, waiting for status check..."
         );
@@ -235,33 +250,35 @@ export function FirstStepForm({
     }
   }, [autoDeployModel, models, isAutoDeploying, form, onSubmit]);
 
-  // Get current board info and group models by type and compatibility
+  // Get current board info and group models by status and compatibility
   const currentBoard = models[0]?.current_board || "unknown";
 
-  // Group models by type and compatibility
+  // Status priority order for sorting
+  const STATUS_ORDER: Record<string, number> = {
+    COMPLETE: 3,
+    FUNCTIONAL: 2,
+    EXPERIMENTAL: 1,
+  };
+
+  // Group models by display type, then by status, then by hardware compatibility
+  type CompatibilityGroup = { compatible: Model[]; incompatible: Model[]; unknown: Model[] };
   const groupModelsByType = () => {
-    const grouped: Record<
-      string,
-      {
-        compatible: Model[];
-        incompatible: Model[];
-        unknown: Model[];
-      }
-    > = {};
+    const grouped: Record<string, Record<string, CompatibilityGroup>> = {};
 
     models.forEach((model) => {
-      const modelType = model.model_type || "unknown";
+      const displayType = model.display_model_type || "LLM";
+      const modelStatus = model.status || "EXPERIMENTAL";
 
-      if (!grouped[modelType]) {
-        grouped[modelType] = { compatible: [], incompatible: [], unknown: [] };
-      }
+      if (!grouped[displayType]) grouped[displayType] = {};
+      if (!grouped[displayType][modelStatus])
+        grouped[displayType][modelStatus] = { compatible: [], incompatible: [], unknown: [] };
 
       if (model.is_compatible === true) {
-        grouped[modelType].compatible.push(model);
+        grouped[displayType][modelStatus].compatible.push(model);
       } else if (model.is_compatible === false) {
-        grouped[modelType].incompatible.push(model);
+        grouped[displayType][modelStatus].incompatible.push(model);
       } else {
-        grouped[modelType].unknown.push(model);
+        grouped[displayType][modelStatus].unknown.push(model);
       }
     });
 
@@ -340,99 +357,106 @@ export function FirstStepForm({
                     </div>
                   )}
 
-                  {/* Render models grouped by type */}
-                  {Object.entries(groupedModels).map(
-                    ([modelType, modelsByCompatibility], typeIndex) => {
-                      const typeConfig =
-                        MODEL_TYPE_CONFIG[
-                          modelType as keyof typeof MODEL_TYPE_CONFIG
-                        ];
-                      const hasModels =
-                        modelsByCompatibility.compatible.length +
-                          modelsByCompatibility.incompatible.length +
-                          modelsByCompatibility.unknown.length >
-                        0;
-
-                      if (!hasModels) return null;
-
-                      const IconComponent = typeConfig?.icon || Bot;
+                  {/* Render models grouped by type, then by status */}
+                  {Object.entries(groupedModels)
+                    .sort(([a], [b]) => {
+                      const orderA = TYPE_CONFIG[a]?.order ?? 99;
+                      const orderB = TYPE_CONFIG[b]?.order ?? 99;
+                      return orderA - orderB;
+                    })
+                    .map(([displayType, statusGroups], typeIndex) => {
+                      const typeConfig = TYPE_CONFIG[displayType];
+                      const typeLabel = typeConfig?.label || `${displayType} Models`;
 
                       return (
-                        <div key={modelType}>
-                          {/* Model Type Header */}
+                        <div key={displayType}>
+                          {/* Type Group Header */}
                           {typeIndex > 0 && (
-                            <div className="h-px bg-gray-200 dark:bg-gray-700 my-2" />
+                            <div className="h-[2px] bg-gray-300 dark:bg-gray-600 my-2" />
                           )}
-                          <div
-                            className={`flex items-center gap-2 px-2 py-2 text-xs font-semibold ${typeConfig?.color || "text-gray-600"} ${typeConfig?.bgColor || "bg-gray-50 dark:bg-gray-900/20"}`}
-                          >
-                            <IconComponent className="w-4 h-4" />
-                            <span>{typeConfig?.label || modelType}</span>
+                          <div className="flex items-center gap-2 px-2 py-2 text-sm font-bold text-gray-800 dark:text-gray-200 bg-gray-100 dark:bg-gray-800/50">
+                            <span>{typeLabel}</span>
                           </div>
 
-                          {/* Compatible Models */}
-                          {modelsByCompatibility.compatible.map((model) => (
-                            <SelectItem
-                              key={model.id}
-                              value={model.name}
-                              className="pl-6 [&>*:first-child]:hidden [&_svg]:hidden [&_[data-radix-select-item-indicator]]:hidden"
-                            >
-                              <div className="flex items-center w-full">
-                                <span className="text-green-500 mr-2 text-xs">
-                                  ●
-                                </span>
-                                <span className="flex-1">{model.name}</span>
-                                <span className="text-xs text-green-600 ml-2">
-                                  Compatible
-                                </span>
-                              </div>
-                            </SelectItem>
-                          ))}
-
-                          {/* Incompatible Models */}
-                          {modelsByCompatibility.incompatible.map((model) => (
-                            <SelectItem
-                              key={model.id}
-                              value={model.name}
-                              disabled={true}
-                              className="pl-6 opacity-50 [&>*:first-child]:hidden [&_svg]:hidden [&_[data-radix-select-item-indicator]]:hidden"
-                            >
-                              <div className="flex items-center w-full">
-                                <span className="text-red-500 mr-2 text-xs">
-                                  ●
-                                </span>
-                                <span className="text-gray-500 flex-1">
-                                  {model.name}
-                                </span>
-                                <span className="text-xs text-red-500 ml-2">
-                                  Incompatible
-                                </span>
-                              </div>
-                            </SelectItem>
-                          ))}
-
-                          {/* Unknown Compatibility Models */}
-                          {modelsByCompatibility.unknown.map((model) => (
-                            <SelectItem
-                              key={model.id}
-                              value={model.name}
-                              className="pl-6 [&>*:first-child]:hidden [&_svg]:hidden [&_[data-radix-select-item-indicator]]:hidden"
-                            >
-                              <div className="flex items-center w-full">
-                                <span className="text-yellow-500 mr-2 text-xs">
-                                  ●
-                                </span>
-                                <span className="flex-1">{model.name}</span>
-                                <span className="text-xs text-yellow-600 ml-2">
-                                  Unknown
-                                </span>
-                              </div>
-                            </SelectItem>
-                          ))}
+                          {/* Status sub-groups within this type */}
+                          {Object.entries(statusGroups)
+                            .sort(
+                              ([a], [b]) =>
+                                (STATUS_ORDER[b] ?? 0) - (STATUS_ORDER[a] ?? 0)
+                            )
+                            .map(([modelStatus, modelsByCompatibility]) => {
+                              const statusConfig =
+                                STATUS_CONFIG[modelStatus as keyof typeof STATUS_CONFIG];
+                              const hasModels =
+                                modelsByCompatibility.compatible.length +
+                                modelsByCompatibility.incompatible.length +
+                                modelsByCompatibility.unknown.length > 0;
+
+                              if (!hasModels) return null;
+
+                              const IconComponent = statusConfig?.icon || Bot;
+
+                              return (
+                                <div key={`${displayType}-${modelStatus}`}>
+                                  {/* Status Sub-Header */}
+                                  <div
+                                    className={`flex items-center gap-2 px-3 py-1.5 text-xs font-semibold ${statusConfig?.color || "text-gray-600"} ${statusConfig?.bgColor || "bg-gray-50 dark:bg-gray-900/20"}`}
+                                  >
+                                    <IconComponent className="w-3 h-3" />
+                                    <span>{statusConfig?.label || modelStatus}</span>
+                                  </div>
+
+                                  {/* Compatible Models */}
+                                  {modelsByCompatibility.compatible.map((model: Model) => (
+                                    <SelectItem
+                                      key={model.id}
+                                      value={model.name}
+                                      className="pl-8 [&>*:first-child]:hidden [&_svg]:hidden [&_[data-radix-select-item-indicator]]:hidden"
+                                    >
+                                      <div className="flex items-center w-full">
+                                        <span className="text-green-500 mr-2 text-xs">●</span>
+                                        <span className="flex-1">{model.name}</span>
+                                        <span className="text-xs text-green-600 ml-2">Compatible</span>
+                                      </div>
+                                    </SelectItem>
+                                  ))}
+
+                                  {/* Incompatible Models */}
+                                  {modelsByCompatibility.incompatible.map((model: Model) => (
+                                    <SelectItem
+                                      key={model.id}
+                                      value={model.name}
+                                      disabled={true}
+                                      className="pl-8 opacity-50 [&>*:first-child]:hidden [&_svg]:hidden [&_[data-radix-select-item-indicator]]:hidden"
+                                    >
+                                      <div className="flex items-center w-full">
+                                        <span className="text-red-500 mr-2 text-xs">●</span>
+                                        <span className="text-gray-500 flex-1">{model.name}</span>
+                                        <span className="text-xs text-red-500 ml-2">Incompatible</span>
+                                      </div>
+                                    </SelectItem>
+                                  ))}
+
+                                  {/* Unknown Compatibility Models */}
+                                  {modelsByCompatibility.unknown.map((model: Model) => (
+                                    <SelectItem
+                                      key={model.id}
+                                      value={model.name}
+                                      className="pl-8 [&>*:first-child]:hidden [&_svg]:hidden [&_[data-radix-select-item-indicator]]:hidden"
+                                    >
+                                      <div className="flex items-center w-full">
+                                        <span className="text-yellow-500 mr-2 text-xs">●</span>
+                                        <span className="flex-1">{model.name}</span>
+                                        <span className="text-xs text-yellow-600 ml-2">Unknown</span>
+                                      </div>
+                                    </SelectItem>
+                                  ))}
+                                </div>
+                              );
+                            })}
                         </div>
                       );
-                    }
-                  )}
+                    })}
 
                   {/* If no models loaded yet */}
                   {models.length === 0 && !isLoading && (
@@ -443,6 +467,36 @@ export function FirstStepForm({
                 </SelectContent>
               </Select>
 
+              {/* Device ID picker — only for multi-chip boards (T3K=4 slots, Galaxy=32, etc.) */}
+              {(() => {
+                const selected = models.find((m) => m.name === form.watch("model"));
+                const board = selected?.current_board ?? currentBoard;
+                const maxSlots = MULTI_CHIP_BOARD_SLOTS[board];
+                if (!maxSlots) return null; // single-chip board — no choice needed
+                const maxId = maxSlots - 1;
+                return (
+                  <div className="mt-4 flex items-center gap-4">
+                    <label className="text-sm font-medium text-gray-700 dark:text-gray-300 whitespace-nowrap">
+                      Chip slot (0–{maxId}):
+                    </label>
+                    <input
+                      type="number"
+                      min={0}
+                      max={maxId}
+                      value={deviceId}
+                      onChange={(e) => {
+                        const v = parseInt(e.target.value, 10);
+                        setDeviceId(isNaN(v) ? 0 : Math.max(0, Math.min(maxId, v)));
+                      }}
+                      className="w-20 rounded-md border border-gray-300 dark:border-gray-600 bg-white dark:bg-gray-800 px-2 py-1 text-sm text-gray-900 dark:text-gray-100 focus:outline-none focus:ring-2 focus:ring-TT-purple-accent"
+                    />
+                    <span className="text-xs text-gray-500 dark:text-gray-400">
+                      /dev/tenstorrent/{deviceId} &nbsp;·&nbsp; {maxSlots} chips available
+                    </span>
+                  </div>
+                );
+              })()}
+
               {/* Summary info */}
               {models.length > 0 && !isLoading && (
                 <div className="mt-4 p-4 rounded-lg border-2 border-stone-200 bg-white text-stone-950 shadow-sm dark:border-stone-800 dark:bg-stone-950 dark:text-stone-50 hover:border-stone-400 dark:hover:border-stone-700 hover:shadow-md transition-all duration-200">
diff --git a/app/frontend/src/components/Footer.tsx b/app/frontend/src/components/Footer.tsx
index 513e9fe3..f60aa5eb 100644
--- a/app/frontend/src/components/Footer.tsx
+++ b/app/frontend/src/components/Footer.tsx
@@ -7,6 +7,7 @@ import { Badge } from "./ui/badge";
 import { useTheme } from "../hooks/useTheme";
 import { useNavigate, useLocation } from "react-router-dom";
 import { useModels } from "../hooks/useModels";
+import { useDeviceState } from "../hooks/useDeviceState";
 import {
   Tooltip,
   TooltipContent,
@@ -31,34 +32,19 @@ interface FooterProps {
   className?: string;
 }
 
-interface SystemStatus {
+interface SystemResources {
   cpuUsage: number;
   memoryUsage: number;
   memoryTotal: string;
-  boardName: string;
-  temperature: number;
-  devices: Array<{
-    index: number;
-    board_type: string;
-    temperature: number;
-    power: number;
-    voltage: number;
-  }>;
-  hardware_status?: "healthy" | "error" | "unknown";
-  hardware_error?: string;
-  error?: string;
 }
 
 const REFRESH_COOLDOWN_MS = 2 * 60 * 1000; // 2 minutes cooldown between manual refreshes
 
 const Footer: React.FC<FooterProps> = ({ className }) => {
-  const [systemStatus, setSystemStatus] = useState<SystemStatus>({
+  const [systemResources, setSystemResources] = useState<SystemResources>({
     cpuUsage: 0,
     memoryUsage: 0,
     memoryTotal: "0 GB",
-    boardName: "Unknown",
-    temperature: 0,
-    devices: [],
   });
   const [loading, setLoading] = useState(true);
   const [error, setError] = useState<string | null>(null);
@@ -67,6 +53,7 @@ const Footer: React.FC<FooterProps> = ({ className }) => {
   const [showTTStudioModal, setShowTTStudioModal] = useState(false);
   const [bugReportLoading, setBugReportLoading] = useState(false);
   const { models } = useModels();
+  const { deviceState, refresh: refreshDeviceState } = useDeviceState();
   const navigate = useNavigate();
   const location = useLocation();
   const { theme } = useTheme();
@@ -82,8 +69,8 @@ const Footer: React.FC<FooterProps> = ({ className }) => {
   // Check if we should hide the footer
   const shouldHideFooter = location.pathname === "/chat";
 
-  // Fetch system status from API
-  const fetchSystemStatus = async () => {
+  // Fetch only CPU/memory resources (board info comes from DeviceStateContext)
+  const fetchSystemResources = async () => {
     try {
       const response = await fetch("/board-api/footer-data/");
       if (!response.ok) {
@@ -96,18 +83,15 @@ const Footer: React.FC<FooterProps> = ({ className }) => {
       }
 
       const data = await response.json();
-      setSystemStatus(data);
+      setSystemResources({
+        cpuUsage: data.cpuUsage ?? 0,
+        memoryUsage: data.memoryUsage ?? 0,
+        memoryTotal: data.memoryTotal ?? "0 GB",
+      });
       setError(null);
     } catch (err) {
-      console.error("Failed to fetch system status:", err);
+      console.error("Failed to fetch system resources:", err);
       setError(err instanceof Error ? err.message : "Unknown error");
-      // Keep previous data or use fallback
-      setSystemStatus((prev) => ({
-        ...prev,
-        boardName: prev.hardware_status === "error" ? prev.boardName : "Error",
-        hardware_status: prev.hardware_status === "error" ? "error" : "unknown",
-        error: err instanceof Error ? err.message : "Unknown error",
-      }));
     } finally {
       setLoading(false);
     }
@@ -129,18 +113,8 @@ const Footer: React.FC<FooterProps> = ({ className }) => {
 
     try {
       setRefreshing(true);
-      const response = await fetch("/board-api/refresh-cache/", {
-        method: "POST",
-        headers: {
-          "Content-Type": "application/json",
-        },
-      });
-
-      if (!response.ok) {
-        throw new Error(`HTTP error! status: ${response.status}`);
-      }
-
-      await fetchSystemStatus();
+      // Trigger an immediate re-poll of device state via context
+      refreshDeviceState();
     } catch (err) {
       console.error("Failed to refresh board detection:", err);
       setError(err instanceof Error ? err.message : "Unknown error");
@@ -151,26 +125,57 @@ const Footer: React.FC<FooterProps> = ({ className }) => {
   };
 
   useEffect(() => {
-    // Initial fetch on mount only
-    fetchSystemStatus();
-
-    // No more timer-based polling - will refresh on model deployment events
+    // Fetch CPU/memory once on mount (board info is handled by DeviceStateContext)
+    fetchSystemResources();
+    // eslint-disable-next-line react-hooks/exhaustive-deps
   }, []);
 
   const textColor = theme === "dark" ? "text-zinc-300" : "text-gray-700";
   const borderColor = theme === "dark" ? "border-zinc-700" : "border-gray-200";
   const bgColor = theme === "dark" ? "bg-zinc-900/95" : "bg-white/95";
   const mutedTextColor = theme === "dark" ? "text-zinc-400" : "text-gray-500";
-  const normalizedBoardName = systemStatus.boardName?.toLowerCase();
+
+  // Derive board info from DeviceStateContext
+  const boardName = deviceState?.board_name ?? "Unknown";
+  const deviceStateName = deviceState?.state ?? "UNKNOWN";
+  const devices = deviceState?.devices ?? [];
+  const avgTemperature =
+    devices.length > 0
+      ? Math.round(
+          (devices.reduce((sum, d) => sum + (d.temperature ?? 0), 0) /
+            devices.length) *
+            10
+        ) / 10
+      : 0;
+  const isHardwareHealthy = deviceStateName === "HEALTHY";
+  const isHardwareError =
+    deviceStateName === "BAD_STATE" || deviceStateName === "NOT_PRESENT";
+  const normalizedBoardName = boardName.toLowerCase();
   const isBoardDetectionIssue =
-    systemStatus.hardware_status === "error" ||
+    isHardwareError ||
     !!error ||
     normalizedBoardName === "error" ||
-    normalizedBoardName === "unknown";
+    normalizedBoardName === "unknown" ||
+    normalizedBoardName === "not present" ||
+    normalizedBoardName === "bad state";
   const remainingCooldownMs = getRemainingCooldownMs();
   const isInCooldown = remainingCooldownMs > 0;
   const cooldownSeconds = Math.ceil(remainingCooldownMs / 1000);
 
+  // Legacy-compatible derived values used by bug-report and render
+  const hardwareStatus: "healthy" | "error" | "unknown" =
+    deviceStateName === "HEALTHY"
+      ? "healthy"
+      : deviceStateName === "BAD_STATE" || deviceStateName === "NOT_PRESENT"
+        ? "error"
+        : "unknown";
+  const hardwareError =
+    deviceStateName === "BAD_STATE"
+      ? "Board is in a bad state (unresponsive). Reset recommended."
+      : deviceStateName === "NOT_PRESENT"
+        ? "No Tenstorrent device detected. Check hardware connection."
+        : null;
+
   // Handle click on deployed models section
   const handleDeployedModelsClick = () => {
     navigate("/models-deployed");
@@ -302,19 +307,19 @@ const Footer: React.FC<FooterProps> = ({ className }) => {
 **Time:** ${new Date().toLocaleTimeString()}
 
 ### System Information
-- **Board:** ${systemStatus.boardName}
-- **Hardware Status:** ${systemStatus.hardware_status || "unknown"}
-- **CPU Usage:** ${systemStatus.cpuUsage.toFixed(2)}%
-- **Memory Usage:** ${systemStatus.memoryUsage.toFixed(1)}% (${systemStatus.memoryTotal})
-- **Temperature:** ${systemStatus.temperature.toFixed(1)}°C
-- **Devices:** ${systemStatus.devices.length} device(s)
+- **Board:** ${boardName}
+- **Hardware Status:** ${hardwareStatus || "unknown"}
+- **CPU Usage:** ${systemResources.cpuUsage.toFixed(2)}%
+- **Memory Usage:** ${systemResources.memoryUsage.toFixed(1)}% (${systemResources.memoryTotal})
+- **Temperature:** ${avgTemperature.toFixed(1)}°C
+- **Devices:** ${devices.length} device(s)
 - **Current URL:** ${currentUrl}
 - **User Agent:** ${userAgent}
 
 ### Hardware Details
 ${
-  systemStatus.devices.length > 0
-    ? systemStatus.devices
+  devices.length > 0
+    ? devices
         .map(
           (device, index) =>
             `**Device ${index + 1}:**
@@ -332,7 +337,7 @@ ${models.length > 0 ? models.map((model) => `- ${model.name} (${model.status})`)
 
 ### Error Information
 ${error ? `**System Error:** ${error}` : "No system errors detected"}
-${systemStatus.hardware_error ? `**Hardware Error:** ${systemStatus.hardware_error}` : "No hardware errors detected"}
+${hardwareError ? `**Hardware Error:** ${hardwareError}` : "No hardware errors detected"}
 
 ### FastAPI Logs
 ${fastapiLogs}
@@ -379,15 +384,15 @@ Add any other context about the problem here.
         : text;
     };
     const limitDevicesList = (maxDevices: number) => {
-      if (systemStatus.devices.length <= maxDevices) return undefined;
-      const blocks = systemStatus.devices
+      if (devices.length <= maxDevices) return undefined;
+      const blocks = devices
         .map(
           (device, index) =>
             `**Device ${index + 1}:**\n- Board Type: ${device.board_type}\n- Temperature: ${device.temperature.toFixed(1)}°C\n- Power: ${device.power.toFixed(2)}W\n- Voltage: ${device.voltage.toFixed(2)}V`
         )
         .slice(0, maxDevices)
         .join("\n\n");
-      return `${blocks}\n\n... (${systemStatus.devices.length - maxDevices} more device entries truncated)`;
+      return `${blocks}\n\n... (${devices.length - maxDevices} more device entries truncated)`;
     };
 
     const MAX_URL_LENGTH = 7000; // conservative safety limit for GitHub new-issue URL
@@ -406,15 +411,15 @@ Add any other context about the problem here.
 **Time:** ${new Date().toLocaleTimeString()}
 
 ### System Information
-- **Board:** ${systemStatus.boardName}
-- **Hardware Status:** ${systemStatus.hardware_status || "unknown"}
-- **CPU Usage:** ${systemStatus.cpuUsage.toFixed(2)}%
-- **Memory Usage:** ${systemStatus.memoryUsage.toFixed(1)}% (${systemStatus.memoryTotal})
-- **Temperature:** ${systemStatus.temperature.toFixed(1)}°C
-- **Devices:** ${systemStatus.devices.length} device(s)
+- **Board:** ${boardName}
+- **Hardware Status:** ${hardwareStatus || "unknown"}
+- **CPU Usage:** ${systemResources.cpuUsage.toFixed(2)}%
+- **Memory Usage:** ${systemResources.memoryUsage.toFixed(1)}% (${systemResources.memoryTotal})
+- **Temperature:** ${avgTemperature.toFixed(1)}°C
+- **Devices:** ${devices.length} device(s)
 
 ### Hardware Details (truncated)
-${devicesLimited ?? (systemStatus.devices.length ? "(within limit)" : "No hardware devices detected")}
+${devicesLimited ?? (devices.length ? "(within limit)" : "No hardware devices detected")}
 
 ### Deployed Models
 ${
@@ -428,7 +433,7 @@ ${
 
 ### Error Information
 ${error ? `**System Error:** ${error}` : "No system errors detected"}
-${systemStatus.hardware_error ? `**Hardware Error:** ${systemStatus.hardware_error}` : "No hardware errors detected"}
+${hardwareError ? `**Hardware Error:** ${hardwareError}` : "No hardware errors detected"}
 
 ### FastAPI Logs (truncated)
 ${truncatedFastapi}
@@ -530,16 +535,16 @@ Full logs have been copied to your clipboard and downloaded as a file. Please pa
 **Time:** ${new Date().toLocaleTimeString()}
 
 ### System Information
-- **Board:** ${systemStatus.boardName}
-- **Hardware Status:** ${systemStatus.hardware_status || "unknown"}
-- **CPU Usage:** ${systemStatus.cpuUsage.toFixed(2)}%
-- **Memory Usage:** ${systemStatus.memoryUsage.toFixed(1)}% (${systemStatus.memoryTotal})
-- **Temperature:** ${systemStatus.temperature.toFixed(1)}°C
-- **Devices:** ${systemStatus.devices.length} device(s)
+- **Board:** ${boardName}
+- **Hardware Status:** ${hardwareStatus || "unknown"}
+- **CPU Usage:** ${systemResources.cpuUsage.toFixed(2)}%
+- **Memory Usage:** ${systemResources.memoryUsage.toFixed(1)}% (${systemResources.memoryTotal})
+- **Temperature:** ${avgTemperature.toFixed(1)}°C
+- **Devices:** ${devices.length} device(s)
 
 ### Error Information
 ${error ? `**System Error:** ${error}` : "No system errors detected"}
-${systemStatus.hardware_error ? `**Hardware Error:** ${systemStatus.hardware_error}` : "No hardware errors detected"}
+${hardwareError ? `**Hardware Error:** ${hardwareError}` : "No hardware errors detected"}
 
 ### FastAPI Logs
 ${fallbackFastapiLogs}
@@ -622,7 +627,7 @@ Add any other context about the problem here.
               <span>TT Studio 2.0.1</span>
               <Github className="h-3.5 w-3.5" />
             </div>
-            {systemStatus.boardName?.toLowerCase().includes("t3k") ? (
+            {boardName?.toLowerCase().includes("t3k") ? (
               <div
                 className="flex items-center gap-2 px-3 py-1.5 bg-TT-purple-accent/10 dark:bg-TT-purple-accent/30 rounded-full cursor-pointer transition-all duration-200 hover:bg-TT-purple-accent/20 dark:hover:bg-TT-purple-accent/40 hover:scale-105"
                 title="Hardware status - Click to learn more"
@@ -635,10 +640,10 @@ Add any other context about the problem here.
               >
                 <HardwareIcon type="loudbox" className="h-4 w-4" />
                 <span className="text-sm font-medium text-TT-purple-accent">
-                  {systemStatus.boardName}
+                  {boardName}
                 </span>
               </div>
-            ) : systemStatus.boardName?.toLowerCase().includes("n300") ? (
+            ) : boardName?.toLowerCase().includes("n300") ? (
               <div
                 className="flex items-center gap-2 px-3 py-1.5 bg-TT-purple-accent/10 dark:bg-TT-purple-accent/30 rounded-full cursor-pointer transition-all duration-200 hover:bg-TT-purple-accent/20 dark:hover:bg-TT-purple-accent/40 hover:scale-105"
                 title="Hardware status - Click to learn more"
@@ -662,14 +667,14 @@ Add any other context about the problem here.
                   />
                 </svg>
                 <span className="text-sm font-medium text-TT-purple-accent">
-                  {systemStatus.boardName}
+                  {boardName}
                 </span>
               </div>
             ) : (
               <div className="flex items-center gap-1.5">
                 <Badge
                   variant={
-                    systemStatus.hardware_status === "error"
+                    hardwareStatus === "error"
                       ? "destructive"
                       : error
                         ? "destructive"
@@ -677,7 +682,7 @@ Add any other context about the problem here.
                   }
                   className={`text-xs ${textColor} cursor-pointer transition-all duration-200 hover:scale-105 hover:bg-opacity-80`}
                   title={
-                    systemStatus.hardware_error ||
+                    hardwareError ||
                     error ||
                     "Hardware status - Click to learn more"
                   }
@@ -685,8 +690,8 @@ Add any other context about the problem here.
                     window.open("https://www.tenstorrent.com/hardware", "_blank");
                   }}
                 >
-                  {systemStatus.boardName}
-                  {systemStatus.hardware_status === "error" && " ⚠️"}
+                  {boardName}
+                  {hardwareStatus === "error" && " ⚠️"}
                 </Badge>
                 {isBoardDetectionIssue && (
                   <TooltipProvider>
@@ -724,10 +729,10 @@ Add any other context about the problem here.
                 )}
               </div>
             )}
-            {(error || systemStatus.hardware_error) && (
+            {(error || hardwareError) && (
               <span
                 className={`text-xs text-red-500`}
-                title={systemStatus.hardware_error || error || "System error"}
+                title={hardwareError || error || "System error"}
               >
                 ⚠️
               </span>
@@ -794,23 +799,23 @@ Add any other context about the problem here.
               SYSTEM RESOURCES USAGE:
             </span>
             <span className={`text-sm ${textColor}`}>
-              RAM: {systemStatus.memoryUsage.toFixed(1)}% (
-              {systemStatus.memoryTotal}) | CPU:{" "}
-              {systemStatus.cpuUsage.toFixed(2)}%
-              {systemStatus.hardware_status === "healthy" && (
-                <> | TEMP: {systemStatus.temperature.toFixed(1)}°C</>
+              RAM: {systemResources.memoryUsage.toFixed(1)}% (
+              {systemResources.memoryTotal}) | CPU:{" "}
+              {systemResources.cpuUsage.toFixed(2)}%
+              {hardwareStatus === "healthy" && (
+                <> | TEMP: {avgTemperature.toFixed(1)}°C</>
               )}
-              {systemStatus.hardware_status === "error" && (
+              {hardwareStatus === "error" && (
                 <> | TT HARDWARE: UNAVAILABLE</>
               )}
-              {systemStatus.hardware_status === "unknown" && (
+              {hardwareStatus === "unknown" && (
                 <> | TT HARDWARE: CHECKING...</>
               )}
             </span>
-            {systemStatus.devices.length > 1 &&
-              systemStatus.hardware_status === "healthy" && (
+            {devices.length > 1 &&
+              hardwareStatus === "healthy" && (
                 <span className={`text-xs ${mutedTextColor}`}>
-                  ({systemStatus.devices.length} devices)
+                  ({devices.length} devices)
                 </span>
               )}
           </div>
diff --git a/app/frontend/src/components/NavBar.tsx b/app/frontend/src/components/NavBar.tsx
index 5c9dda0e..1b970b00 100644
--- a/app/frontend/src/components/NavBar.tsx
+++ b/app/frontend/src/components/NavBar.tsx
@@ -13,6 +13,7 @@ import {
   Image,
   Eye,
   AudioLines,
+  Mic,
   ChevronRight,
   ChevronLeft,
   type LucideIcon,
@@ -46,6 +47,7 @@ import {
   getDestinationFromModelType,
   ModelType,
   getModelTypeFromName,
+  getModelTypeFromBackendType,
 } from "../api/modelsDeployedApis";
 import { useHeroSection } from "../hooks/useHeroSection";
 
@@ -386,7 +388,12 @@ export default function NavBar() {
     if (models.length > 0) {
       const firstModel = models[0];
       if (firstModel.id && firstModel.name) {
-        handleModelNavigationClick(firstModel.id, firstModel.name, navigate);
+        handleModelNavigationClick(
+          firstModel.id,
+          firstModel.name,
+          navigate,
+          firstModel.model_type
+        );
       } else {
         console.error("Model ID or name is undefined");
       }
@@ -467,6 +474,13 @@ export default function NavBar() {
       label: "Logs",
       tooltip: "View system logs",
     },
+    {
+      type: "link",
+      to: "/voice-pipeline",
+      icon: Mic,
+      label: "Voice Pipeline",
+      tooltip: "End-to-end voice demo (Whisper → LLM → TTS)",
+    },
   ];
 
   // Define model-based navigation items (shown only when isDeployedEnabled is true)
@@ -484,7 +498,9 @@ export default function NavBar() {
       if (models.length > 0) {
         // Show navigation items for each deployed model
         return models.map((model) => {
-          const modelType = getModelTypeFromName(model.name);
+          const modelType = model.model_type
+            ? getModelTypeFromBackendType(model.model_type)
+            : getModelTypeFromName(model.name);
           console.log(`Model: ${model.name}, Type: ${modelType}`);
           return {
             type: "button",
@@ -545,7 +561,9 @@ export default function NavBar() {
       // In TT-Studio mode, show only deployed models
       console.log("TT-Studio mode - creating navigation for deployed models");
       return models.map((model) => {
-        const modelType = getModelTypeFromName(model.name);
+        const modelType = model.model_type
+          ? getModelTypeFromBackendType(model.model_type)
+          : getModelTypeFromName(model.name);
         console.log(`TT-Studio Model: ${model.name}, Type: ${modelType}`);
         return {
           type: "button",
diff --git a/app/frontend/src/components/ResetIcon.tsx b/app/frontend/src/components/ResetIcon.tsx
index 8497ea06..9d27e3e2 100644
--- a/app/frontend/src/components/ResetIcon.tsx
+++ b/app/frontend/src/components/ResetIcon.tsx
@@ -1,11 +1,19 @@
 // SPDX-License-Identifier: Apache-2.0
 // SPDX-FileCopyrightText: © 2026 Tenstorrent AI ULC
 
-import React, { useState, useEffect } from "react";
+import React, { useState } from "react";
 import axios from "axios";
-import { Cpu, CheckCircle, AlertTriangle } from "lucide-react";
+import {
+  Cpu,
+  CheckCircle,
+  XCircle,
+  AlertTriangle,
+  Loader2,
+  Trash2,
+  RotateCcw,
+  ChevronDown,
+} from "lucide-react";
 import { Spinner } from "./ui/spinner";
-import { customToast } from "./CustomToaster";
 import { useTheme } from "../hooks/useTheme";
 import { Button } from "./ui/button";
 import {
@@ -15,369 +23,542 @@ import {
   DialogHeader,
   DialogTitle,
   DialogTrigger,
-  DialogDescription,
 } from "./ui/dialog";
-import {
-  Accordion,
-  AccordionContent,
-  AccordionItem,
-  AccordionTrigger,
-} from "./ui/accordion";
 import { ScrollArea } from "./ui/scroll-area";
 import { fetchModels, deleteModel } from "../api/modelsDeployedApis";
 import { useModels } from "../hooks/useModels";
+import { useDeviceState } from "../hooks/useDeviceState";
 import BoardBadge from "./BoardBadge";
 
+type ResetStep = "deleting" | "resetting" | "done" | "failed" | null;
+
 interface ResetIconProps {
   onReset?: () => void;
 }
 
-// Board info interface
-interface BoardInfo {
-  type: string;
-  name: string;
+// ── Shared step-row (mirrors DeleteModelDialog) ──────────────────────────────
+function StepRow({
+  number,
+  icon,
+  label,
+  sublabel,
+  state,
+}: {
+  number: number;
+  icon: React.ReactNode;
+  label: string;
+  sublabel?: string;
+  state: "pending" | "active" | "done" | "skipped";
+}) {
+  return (
+    <div
+      className={`flex items-start gap-3 p-3 rounded-lg border transition-all duration-300 ${
+        state === "active"
+          ? "bg-blue-900/30 border-blue-500/40"
+          : state === "done"
+            ? "bg-green-900/20 border-green-600/30"
+            : state === "skipped"
+              ? "bg-stone-800/30 border-stone-700/30"
+              : "bg-stone-800/50 border-stone-700/40"
+      }`}
+    >
+      <div className="w-7 h-7 flex items-center justify-center shrink-0 mt-0.5">
+        {state === "active" ? (
+          <Loader2 className="w-5 h-5 text-blue-400 animate-spin" />
+        ) : state === "done" ? (
+          <CheckCircle className="w-5 h-5 text-green-400" />
+        ) : state === "skipped" ? (
+          <CheckCircle className="w-5 h-5 text-stone-500" />
+        ) : (
+          <div className="w-6 h-6 rounded-full bg-stone-600 flex items-center justify-center text-xs font-bold text-stone-300">
+            {number}
+          </div>
+        )}
+      </div>
+      <div className="flex-1 min-w-0">
+        <div
+          className={`font-medium text-sm inline-flex items-center gap-1.5 ${
+            state === "pending" || state === "skipped"
+              ? "text-stone-400"
+              : "text-white"
+          }`}
+        >
+          {icon}
+          {label}
+        </div>
+        {sublabel && state === "active" && (
+          <div className="text-xs text-blue-300 mt-1">{sublabel}</div>
+        )}
+        {state === "done" && (
+          <div className="text-xs text-green-400 mt-0.5">Completed</div>
+        )}
+        {state === "skipped" && (
+          <div className="text-xs text-stone-500 mt-0.5">
+            No models deployed — skipped
+          </div>
+        )}
+      </div>
+    </div>
+  );
+}
+
+// ── Board status banner ───────────────────────────────────────────────────────
+function BoardStatusBanner({
+  state,
+  boardType,
+}: {
+  state: string;
+  boardType: string;
+}) {
+  if (state === "BAD_STATE") {
+    return (
+      <div className="flex items-start gap-3 p-3 bg-orange-900/30 border border-orange-500/40 rounded-lg text-orange-200 text-sm">
+        <AlertTriangle className="h-4 w-4 text-orange-400 mt-0.5 shrink-0" />
+        <div>
+          <strong className="text-orange-300">Board unresponsive</strong>
+          <p className="mt-0.5 text-orange-200/80">
+            The board is present but not responding. A reset is strongly
+            recommended.
+          </p>
+        </div>
+      </div>
+    );
+  }
+  if (state === "NOT_PRESENT") {
+    return (
+      <div className="flex items-start gap-3 p-3 bg-red-900/30 border border-red-500/40 rounded-lg text-red-200 text-sm">
+        <AlertTriangle className="h-4 w-4 text-red-400 mt-0.5 shrink-0" />
+        <div>
+          <strong className="text-red-300">No device detected</strong>
+          <p className="mt-0.5 text-red-200/80">
+            <code className="bg-red-900/50 px-1 rounded">/dev/tenstorrent</code>{" "}
+            not found. Check your hardware connection.
+          </p>
+        </div>
+      </div>
+    );
+  }
+  if (state === "HEALTHY" && boardType !== "unknown") {
+    return (
+      <div className="flex items-center gap-2 p-3 bg-green-900/20 border border-green-600/30 rounded-lg text-green-200 text-sm">
+        <CheckCircle className="h-4 w-4 text-green-400 shrink-0" />
+        <span>
+          Board is <strong className="text-green-300">healthy</strong> — reset
+          is available if needed.
+        </span>
+      </div>
+    );
+  }
+  return null;
 }
 
+// ── Main component ────────────────────────────────────────────────────────────
 const ResetIcon: React.FC<ResetIconProps> = ({ onReset }) => {
   const { theme } = useTheme();
-  const { refreshModels } = useModels();
-  const [isLoading, setIsLoading] = useState(false);
-  const [isCompleted, setIsCompleted] = useState(false);
+  const { models, refreshModels } = useModels();
+  const { deviceState, refresh: refreshDeviceState } = useDeviceState();
+
   const [isDialogOpen, setIsDialogOpen] = useState(false);
+  const [resetStep, setResetStep] = useState<ResetStep>(null);
   const [errorMessage, setErrorMessage] = useState<string | null>(null);
+  const [cmdOutput, setCmdOutput] = useState<string | null>(null);
+  const [showOutput, setShowOutput] = useState(false);
   const [resetHistory, setResetHistory] = useState<Date[]>([]);
-  const [fullOutput, setFullOutput] = useState<string | null>(null);
-  const [boardInfo, setBoardInfo] = useState<BoardInfo | null>(null);
-  const [boardLoading, setBoardLoading] = useState(false);
-
-  // Fetch board information when dialog opens
-  useEffect(() => {
-    if (isDialogOpen && !boardInfo) {
-      fetchBoardInfo();
-    }
-  }, [isDialogOpen]);
 
-  const fetchBoardInfo = async () => {
-    setBoardLoading(true);
-    try {
-      const response = await axios.get<{ type: string; name: string }>(
-        "/docker-api/board-info/"
-      );
-      setBoardInfo(response.data);
-    } catch (error) {
-      console.error("Error fetching board info:", error);
-      // Set default values if detection fails
-      setBoardInfo({ type: "unknown", name: "Unknown Board" });
-    } finally {
-      setBoardLoading(false);
-    }
-  };
+  const isLoading =
+    resetStep === "deleting" || resetStep === "resetting";
+  const isCompleted = resetStep === "done";
+  const isFailed = resetStep === "failed";
 
-  const iconColor = theme === "dark" ? "text-zinc-200" : "text-black";
-  const hoverIconColor =
-    theme === "dark" ? "hover:text-zinc-300" : "hover:text-gray-700";
-  const buttonBackgroundColor = theme === "dark" ? "bg-zinc-900" : "bg-white";
-  const hoverButtonBackgroundColor =
-    theme === "dark" ? "hover:bg-zinc-700" : "hover:bg-gray-200";
+  const boardType = deviceState?.board_type ?? "unknown";
+  const deviceStateName = deviceState?.state ?? "UNKNOWN";
+  const isBadState = deviceStateName === "BAD_STATE";
+  const isNotPresent = deviceStateName === "NOT_PRESENT";
+  const isResettingContext = deviceStateName === "RESETTING";
+  const deployedCount = models.length;
+
+  // Step states for the progress rows
+  const step1State: "pending" | "active" | "done" | "skipped" =
+    resetStep === "deleting"
+      ? "active"
+      : resetStep === "resetting" || resetStep === "done" || resetStep === "failed"
+        ? deployedCount === 0
+          ? "skipped"
+          : "done"
+        : "pending";
+
+  const step2State: "pending" | "active" | "done" | "skipped" =
+    resetStep === "resetting"
+      ? "active"
+      : resetStep === "done"
+        ? "done"
+        : "pending";
+
+  // ── Reset execution ─────────────────────────────────────────────────────────
+  const executeReset = async () => {
+    setErrorMessage(null);
+    setCmdOutput(null);
+    setShowOutput(false);
 
-  // Function to delete all deployed models
-  const deleteAllModels = async (): Promise<void> => {
     try {
-      const models = await fetchModels(); // Fetch all deployed models
-      console.log("Models to delete:", models);
-      for (const model of models) {
-        await customToast.promise(deleteModel(model.id), {
-          loading: `Deleting Model ID: ${model.id.substring(0, 4)}...`,
-          success: `Model ID: ${model.id.substring(0, 4)} deleted successfully.`,
-          error: `Failed to delete Model ID: ${model.id.substring(0, 4)}.`,
-        });
+      // Step 1: delete deployed models
+      setResetStep("deleting");
+      const currentModels = await fetchModels();
+      for (const model of currentModels) {
+        await deleteModel(model.id);
       }
-
-      // Refresh the ModelsContext to sync with backend
       await refreshModels();
-    } catch (error) {
-      console.error("Error deleting models:", error);
-      throw new Error("Failed to delete all models.");
-    }
-  };
 
-  const resetBoardAsync = async (): Promise<void> => {
-    const response = await axios.post<Blob>("/docker-api/reset_board/", null, {
-      responseType: "blob",
-    });
-
-    const reader = response.data.stream().getReader();
-    const decoder = new TextDecoder();
-    let output = "";
-    let success = true;
-    const statusCode = response.status;
-
-    while (true) {
-      const { done, value } = await reader.read();
-      if (done) break;
-
-      const chunk = decoder.decode(value, { stream: true });
-      output += chunk;
-
-      // Check for failure in each chunk
-      if (
-        chunk.includes("Command failed") ||
-        chunk.includes("No Tenstorrent devices detected") ||
-        chunk.includes("Exiting") ||
-        chunk.includes("Error")
-      ) {
-        success = false;
-      }
-    }
+      // Step 2: run board reset
+      setResetStep("resetting");
+      const response = await axios.post<Blob>("/docker-api/reset_board/", null, {
+        responseType: "blob",
+      });
 
-    const finalChunk = decoder.decode();
-    if (finalChunk) {
-      output += finalChunk;
-      if (
-        finalChunk.includes("Command failed") ||
-        finalChunk.includes("No Tenstorrent devices detected") ||
-        finalChunk.includes("Exiting") ||
-        finalChunk.includes("Error")
-      ) {
-        success = false;
-      }
-    }
+      const reader = response.data.stream().getReader();
+      const decoder = new TextDecoder();
+      let output = "";
+      let success = true;
 
-    const styledOutput = success
-      ? `
-        <span style="color: green;">Board Reset Successfully</span>
-        -----------------------
-        <pre style="color: yellow; white-space: pre-wrap;">${output}</pre>
-      `
-      : `
-        <span style="color: red;">Board Reset Failed</span>
-        -----------------------
-        <pre style="color: yellow; white-space: pre-wrap;">${output}</pre>
-      `;
-
-    setFullOutput(styledOutput);
-
-    if (!success) {
-      if (statusCode === 501) {
-        throw new Error(
-          "No Tenstorrent devices detected. Please check your hardware connection and try again."
-        );
-      } else {
-        // Parse the error message from the output
-        const errorLines = output
-          .split("\n")
-          .filter(
-            (line) =>
-              line.includes("tt-smi reset failed") ||
-              line.includes("Please check if:") ||
-              line.includes("1.") ||
-              line.includes("2.") ||
-              line.includes("3.") ||
-              line.includes("4.")
-          );
-        if (errorLines.length > 0) {
-          throw new Error(errorLines.join("\n"));
-        } else {
-          throw new Error(
-            "Board reset failed. Please check the command output for details."
-          );
+      // eslint-disable-next-line no-constant-condition
+      while (true) {
+        const { done, value } = await reader.read();
+        if (done) break;
+        const chunk = decoder.decode(value, { stream: true });
+        output += chunk;
+        if (
+          chunk.includes("Command failed") ||
+          chunk.includes("No Tenstorrent devices detected") ||
+          chunk.includes("Error")
+        ) {
+          success = false;
+        }
+      }
+      const tail = decoder.decode();
+      if (tail) {
+        output += tail;
+        if (
+          tail.includes("Command failed") ||
+          tail.includes("No Tenstorrent devices detected") ||
+          tail.includes("Error")
+        ) {
+          success = false;
         }
       }
-    }
-
-    setIsCompleted(true);
-    setResetHistory((prevHistory) => [...prevHistory, new Date()]);
-    setTimeout(() => setIsCompleted(false), 5000);
-  };
-
-  const resetBoard = async (): Promise<void> => {
-    setIsLoading(true);
-    setIsCompleted(false);
-    setErrorMessage(null);
-    setIsDialogOpen(false);
 
-    try {
-      await deleteAllModels();
+      setCmdOutput(output);
 
-      await customToast.promise(resetBoardAsync(), {
-        loading: "Resetting board...",
-        success: "Board reset successfully!",
-        error: "Failed to reset board.",
-      });
-
-      if (onReset) {
-        console.log("Calling onReset prop function");
-        onReset();
-      }
-    } catch (error) {
-      console.error("Error resetting board:", error);
-
-      if (error instanceof Error) {
-        const errorOutput = `
-          <span style="color: red;">Error Resetting Board</span>
-          -----------------------
-          <pre style="color: red; white-space: pre-wrap;">${error.message}</pre>
-        `;
-        setFullOutput(errorOutput);
-        setErrorMessage(error.message);
-      } else {
-        setErrorMessage("An unknown error occurred");
+      if (!success) {
+        throw new Error(
+          response.status === 501
+            ? "No Tenstorrent devices detected. Check hardware connection."
+            : "Board reset failed. See command output for details."
+        );
       }
 
-      setIsDialogOpen(true);
-    } finally {
-      setIsLoading(false);
+      setResetStep("done");
+      setResetHistory((prev) => [...prev, new Date()]);
+      refreshDeviceState();
+      if (onReset) onReset();
+    } catch (err) {
+      setErrorMessage(
+        err instanceof Error ? err.message : "An unknown error occurred."
+      );
+      setResetStep("failed");
     }
   };
 
-  const handleDialogOpenChange = (isOpen: boolean) => {
-    setIsDialogOpen(isOpen);
-    if (isOpen) {
+  const handleOpen = () => {
+    setIsDialogOpen(true);
+    // Only reset state when there's nothing in progress — otherwise re-show current progress
+    if (!isLoading) {
+      setResetStep(null);
       setErrorMessage(null);
+      setCmdOutput(null);
+      setShowOutput(false);
     }
   };
 
+  const handleClose = () => {
+    setIsDialogOpen(false);
+    // Do NOT reset state — any in-progress reset continues in the background.
+    // State is only cleared on the next fresh open (see handleOpen above).
+  };
+
+  // ── Navbar trigger button ───────────────────────────────────────────────────
+  const iconColor = theme === "dark" ? "text-zinc-200" : "text-black";
+  const hoverIconColor =
+    theme === "dark" ? "hover:text-zinc-300" : "hover:text-gray-700";
+  const btnBg = theme === "dark" ? "bg-zinc-900" : "bg-white";
+  const btnHover =
+    theme === "dark" ? "hover:bg-zinc-700" : "hover:bg-gray-200";
+
   return (
-    <Dialog open={isDialogOpen} onOpenChange={handleDialogOpenChange}>
+    <Dialog
+      open={isDialogOpen}
+      onOpenChange={(open) => (open ? handleOpen() : handleClose())}
+    >
       <DialogTrigger asChild>
         <Button
           variant="navbar"
           size="icon"
-          className={`relative inline-flex items-center justify-center p-2 rounded-full transition-all duration-300 ease-in-out ${buttonBackgroundColor} ${hoverButtonBackgroundColor}`}
-          onClick={() => setIsDialogOpen(true)}
+          className={`relative inline-flex items-center justify-center p-2 rounded-full transition-all duration-300 ease-in-out ${btnBg} ${btnHover}`}
+          onClick={handleOpen}
         >
           {isLoading ? (
             <Spinner />
           ) : isCompleted ? (
-            <CheckCircle className={`w-5 h-5 ${iconColor} ${hoverIconColor}`} />
+            <CheckCircle className={`w-5 h-5 text-green-500`} />
           ) : (
-            <Cpu className={`w-5 h-5 ${iconColor} ${hoverIconColor}`} />
+            <>
+              <Cpu className={`w-5 h-5 ${iconColor} ${hoverIconColor}`} />
+              {/* Red dot if board is unhealthy */}
+              {(isBadState || isNotPresent) && (
+                <span className="absolute top-1 right-1 w-2 h-2 rounded-full bg-red-500" />
+              )}
+            </>
           )}
         </Button>
       </DialogTrigger>
+
       <DialogContent
-        className={`sm:max-w-md p-6 rounded-lg shadow-lg ${
-          theme === "dark" ? "bg-zinc-900 text-white" : "bg-white text-black"
-        }`}
+        className="sm:max-w-md p-6 rounded-xl shadow-2xl bg-stone-900 text-white border border-stone-700 backdrop-blur-md"
       >
+        {/* ── HEADER ── */}
         <DialogHeader>
-          <div className="flex items-center justify-between mb-4">
-            <div className="flex items-center">
-              <AlertTriangle className="h-8 w-8 text-yellow-500 mr-2" />
-              <DialogTitle className="text-lg font-semibold">
-                Reset Card
-              </DialogTitle>
-            </div>
-            {boardInfo && boardInfo.type !== "unknown" && (
-              <BoardBadge boardName={boardInfo.type} />
-            )}
-            {boardLoading && (
-              <div className="flex items-center gap-2 px-3 py-1.5 bg-gray-100 dark:bg-gray-800 rounded-full">
-                <Spinner />
-                <span className="text-sm text-gray-600 dark:text-gray-400">
-                  Detecting...
-                </span>
+          <div className="flex items-center justify-between mb-1">
+            <div className="flex items-center gap-3">
+              {isLoading ? (
+                <div className="w-9 h-9 rounded-full bg-blue-900/50 flex items-center justify-center">
+                  <Loader2 className="h-5 w-5 text-blue-400 animate-spin" />
+                </div>
+              ) : isCompleted ? (
+                <div className="w-9 h-9 rounded-full bg-green-900/50 flex items-center justify-center">
+                  <CheckCircle className="h-5 w-5 text-green-400" />
+                </div>
+              ) : isFailed ? (
+                <div className="w-9 h-9 rounded-full bg-red-900/50 flex items-center justify-center">
+                  <XCircle className="h-5 w-5 text-red-400" />
+                </div>
+              ) : (
+                <div className="w-9 h-9 rounded-full bg-yellow-900/50 flex items-center justify-center">
+                  <RotateCcw className="h-5 w-5 text-yellow-400" />
+                </div>
+              )}
+              <div>
+                <DialogTitle className="text-base font-semibold text-white leading-tight">
+                  {isLoading
+                    ? resetStep === "deleting"
+                      ? "Removing deployed models…"
+                      : "Resetting board…"
+                    : isCompleted
+                      ? "Reset complete"
+                      : isFailed
+                        ? "Reset failed"
+                        : "Reset Card"}
+                </DialogTitle>
+                {isLoading && (
+                  <p className="text-xs text-stone-400 mt-0.5">
+                    Step {resetStep === "deleting" ? "1" : "2"} of 2 — do not
+                    close this window
+                  </p>
+                )}
               </div>
+            </div>
+            {/* Board badge — only when idle */}
+            {!isLoading && !isCompleted && !isFailed && boardType !== "unknown" && (
+              <BoardBadge boardName={boardType} />
             )}
           </div>
-          <DialogDescription className="text-left">
-            Are you sure you want to reset the card?
-          </DialogDescription>
         </DialogHeader>
-        {boardInfo && boardInfo.type === "unknown" && (
-          <div className="mb-4 p-4 bg-red-100 dark:bg-red-900/30 text-red-700 dark:text-red-300 rounded-md flex items-start">
-            <AlertTriangle className="h-5 w-5 text-red-700 dark:text-red-300 mr-2 mt-1 shrink-0" />
-            <div>
-              <div className="font-bold mb-1">
-                No Tenstorrent device detected
-              </div>
-              <div className="text-sm">
-                Device <code>/dev/tenstorrent</code> not found. Please check
-                your hardware connection and ensure the device is properly
-                installed.
+
+        <div className="space-y-3 mt-3">
+          {/* ── IDLE: board status + step overview ── */}
+          {!isLoading && !isCompleted && !isFailed && (
+            <>
+              <BoardStatusBanner
+                state={deviceStateName}
+                boardType={boardType}
+              />
+
+              {isResettingContext && (
+                <div className="flex items-center gap-3 p-3 bg-blue-900/30 border border-blue-500/40 rounded-lg text-blue-200 text-sm">
+                  <Loader2 className="h-4 w-4 text-blue-400 animate-spin shrink-0" />
+                  <span>Board is already resetting…</span>
+                </div>
+              )}
+
+              {/* Step overview */}
+              <StepRow
+                number={1}
+                icon={<Trash2 className="w-3.5 h-3.5" />}
+                label={
+                  deployedCount > 0
+                    ? `Stop ${deployedCount} deployed model${deployedCount > 1 ? "s" : ""}`
+                    : "Stop deployed models"
+                }
+                state="pending"
+              />
+              <StepRow
+                number={2}
+                icon={<RotateCcw className="w-3.5 h-3.5" />}
+                label="Reset the board (tt-smi -r)"
+                state="pending"
+              />
+
+              {/* Warning */}
+              <div className="flex items-start gap-2 p-3 bg-red-950/40 border border-red-500/25 rounded-lg text-red-200 text-sm">
+                <AlertTriangle className="h-4 w-4 text-red-400 mt-0.5 shrink-0" />
+                <span>
+                  <strong className="text-red-300">Warning:</strong> This will
+                  interrupt any ongoing processes on the card.
+                  {resetHistory.length > 0 && (
+                    <span className="block mt-1 text-red-300/70">
+                      Last reset:{" "}
+                      {resetHistory[resetHistory.length - 1].toLocaleTimeString()}
+                    </span>
+                  )}
+                </span>
               </div>
-            </div>
-          </div>
-        )}
-        <div
-          className={`mb-4 ${theme === "dark" ? "text-gray-400" : "text-gray-500"}`}
-        >
-          <div className="border-l-4 border-red-600 pl-2">
-            <div className="font-bold">
-              Warning! This action will stop all deployed models and might
-              interrupt ongoing processes.
-            </div>
-            {resetHistory.length > 0 && (
-              <div className="mt-2">
-                Note: This card was reset in the last 5 minutes. Frequent resets
-                may cause issues. Please wait before resetting again.
+            </>
+          )}
+
+          {/* ── LOADING: step progress ── */}
+          {isLoading && (
+            <>
+              <StepRow
+                number={1}
+                icon={<Trash2 className="w-3.5 h-3.5" />}
+                label={
+                  deployedCount > 0
+                    ? `Stop ${deployedCount} deployed model${deployedCount > 1 ? "s" : ""}`
+                    : "Stop deployed models"
+                }
+                sublabel="Sending stop signal to all containers…"
+                state={step1State}
+              />
+              <StepRow
+                number={2}
+                icon={<RotateCcw className="w-3.5 h-3.5" />}
+                label="Reset the board"
+                sublabel="Running tt-smi -r, this may take 10–30 seconds…"
+                state={step2State}
+              />
+            </>
+          )}
+
+          {/* ── COMPLETED ── */}
+          {isCompleted && (
+            <>
+              <StepRow
+                number={1}
+                icon={<Trash2 className="w-3.5 h-3.5" />}
+                label="Deployed models removed"
+                state={deployedCount === 0 ? "skipped" : "done"}
+              />
+              <StepRow
+                number={2}
+                icon={<RotateCcw className="w-3.5 h-3.5" />}
+                label="Board reset"
+                state="done"
+              />
+              {cmdOutput && (
+                <button
+                  type="button"
+                  onClick={() => setShowOutput((v) => !v)}
+                  className="flex items-center gap-1 text-xs text-stone-400 hover:text-stone-200 transition-colors"
+                >
+                  <ChevronDown
+                    className={`w-3.5 h-3.5 transition-transform ${showOutput ? "rotate-180" : ""}`}
+                  />
+                  {showOutput ? "Hide" : "Show"} command output
+                </button>
+              )}
+              {showOutput && cmdOutput && (
+                <ScrollArea className="h-36 rounded-lg border border-stone-700">
+                  <pre className="p-3 text-xs text-green-400 whitespace-pre-wrap font-mono bg-stone-950">
+                    {cmdOutput}
+                  </pre>
+                </ScrollArea>
+              )}
+            </>
+          )}
+
+          {/* ── FAILED ── */}
+          {isFailed && (
+            <>
+              <div className="flex items-start gap-3 p-3 bg-red-900/30 border border-red-500/40 rounded-lg">
+                <XCircle className="h-5 w-5 text-red-400 mt-0.5 shrink-0" />
+                <div>
+                  <p className="text-sm font-medium text-red-200">
+                    {errorMessage}
+                  </p>
+                  {cmdOutput && (
+                    <button
+                      type="button"
+                      onClick={() => setShowOutput((v) => !v)}
+                      className="flex items-center gap-1 text-xs text-stone-400 hover:text-stone-200 mt-2 transition-colors"
+                    >
+                      <ChevronDown
+                        className={`w-3.5 h-3.5 transition-transform ${showOutput ? "rotate-180" : ""}`}
+                      />
+                      {showOutput ? "Hide" : "Show"} command output
+                    </button>
+                  )}
+                </div>
               </div>
-            )}
-          </div>
+              {showOutput && cmdOutput && (
+                <ScrollArea className="h-36 rounded-lg border border-stone-700">
+                  <pre className="p-3 text-xs text-red-300 whitespace-pre-wrap font-mono bg-stone-950">
+                    {cmdOutput}
+                  </pre>
+                </ScrollArea>
+              )}
+            </>
+          )}
         </div>
-        {errorMessage && (
-          <div className="mt-4 p-4 bg-red-100 dark:bg-red-900/30 text-red-700 dark:text-red-300 rounded-md">
-            <div className="flex items-start">
-              <AlertTriangle className="h-5 w-5 text-red-700 dark:text-red-300 mr-2 mt-1 shrink-0" />
-              <div className="flex-1">
-                <div className="font-medium mb-2">Error:</div>
-                <pre className="whitespace-pre-wrap text-sm">
-                  {errorMessage}
-                </pre>
-              </div>
-            </div>
-          </div>
-        )}
-        <Accordion type="single" collapsible className="mt-4">
-          <AccordionItem value="history">
-            <AccordionTrigger className="text-md font-semibold">
-              Reset History
-            </AccordionTrigger>
-            <AccordionContent>
-              <ul className="list-disc pl-5 mt-2 text-sm">
-                {resetHistory.length > 0 ? (
-                  resetHistory.map((resetTime, index) => (
-                    <li key={index}>{resetTime.toLocaleString()}</li>
-                  ))
+
+        {/* ── FOOTER ── */}
+        <DialogFooter className="mt-5 flex justify-end gap-2">
+          {(isCompleted || isFailed) ? (
+            <Button
+              variant="outline"
+              onClick={handleClose}
+              className="border-stone-600 text-stone-300 hover:bg-stone-800"
+            >
+              Close
+            </Button>
+          ) : (
+            <>
+              <Button
+                variant="outline"
+                onClick={handleClose}
+                className="border-stone-600 text-stone-300 hover:bg-stone-800"
+              >
+                {isLoading ? "Minimize" : "Cancel"}
+              </Button>
+              <Button
+                onClick={executeReset}
+                disabled={isLoading || isResettingContext || isNotPresent}
+                className={`min-w-[120px] border ${
+                  isBadState
+                    ? "bg-orange-600 hover:bg-orange-700 border-orange-500/40 text-white"
+                    : "bg-red-600 hover:bg-red-700 border-red-500/30 text-white"
+                }`}
+              >
+                {isLoading ? (
+                  <span className="flex items-center gap-2">
+                    <Loader2 className="w-4 h-4 animate-spin" />
+                    Processing…
+                  </span>
+                ) : isBadState ? (
+                  "Reset (Recommended)"
                 ) : (
-                  <li>No resets yet.</li>
+                  "Reset Card"
                 )}
-              </ul>
-            </AccordionContent>
-          </AccordionItem>
-          {fullOutput && (
-            <AccordionItem value="output">
-              <AccordionTrigger className="text-md font-semibold">
-                Command Output
-              </AccordionTrigger>
-              <AccordionContent>
-                <ScrollArea className="h-48 w-full overflow-auto rounded-md border">
-                  <div
-                    className="text-sm mt-2 px-2 py-1 whitespace-pre-wrap bg-zinc-800 text-green-500 rounded-md"
-                    dangerouslySetInnerHTML={{ __html: fullOutput }}
-                  />
-                </ScrollArea>
-              </AccordionContent>
-            </AccordionItem>
+              </Button>
+            </>
           )}
-        </Accordion>
-        <DialogFooter className="mt-4 flex justify-end space-x-2">
-          <Button
-            type="button"
-            variant="outline"
-            onClick={() => setIsDialogOpen(false)}
-            className={`${theme === "dark" ? "text-white" : "text-black"}`}
-          >
-            Cancel
-          </Button>
-          <Button
-            type="button"
-            variant="outline"
-            className="bg-red-600 text-white hover:bg-red-700"
-            onClick={resetBoard}
-          >
-            Yes, Reset
-          </Button>
         </DialogFooter>
       </DialogContent>
     </Dialog>
diff --git a/app/frontend/src/components/SelectionSteps.tsx b/app/frontend/src/components/SelectionSteps.tsx
index 07f8211d..e230e2f2 100644
--- a/app/frontend/src/components/SelectionSteps.tsx
+++ b/app/frontend/src/components/SelectionSteps.tsx
@@ -22,6 +22,8 @@ export interface Model {
   compatible_boards: string[]; // List of boards this model can run on
   model_type: string; // Type of model (e.g., CHAT, IMAGE_GENERATION, etc.)
   current_board: string; // The detected board type
+  status?: "EXPERIMENTAL" | "FUNCTIONAL" | "COMPLETE" | null;
+  display_model_type?: string;
 }
 
 export default function StepperDemo() {
@@ -40,6 +42,7 @@ export default function StepperDemo() {
   };
 
   const [selectedModel, setSelectedModel] = useState<string | null>(null);
+  const [selectedDeviceId, setSelectedDeviceId] = useState<number>(0);
   const [loading, setLoading] = useState(false);
   const [formError, setFormError] = useState(false);
   const [isAutoDeploying, setIsAutoDeploying] = useState(false);
@@ -72,9 +75,11 @@ export default function StepperDemo() {
       console.log("Found model for auto-deploy:", model);
 
       // Deploy with default weights
+      const deviceIdParam = parseInt(searchParams.get("device-id") ?? "0", 10);
       const deployPayload = {
         model_id: model.id,
         weights_id: "", // Empty string for default weights
+        device_id: isNaN(deviceIdParam) ? 0 : deviceIdParam,
       };
 
       console.log("Auto-deploy payload:", deployPayload);
@@ -137,6 +142,7 @@ export default function StepperDemo() {
     const payload = JSON.stringify({
       model_id,
       weights_id,
+      device_id: selectedDeviceId,
     });
 
     console.log("📦 Deploying with default weights:", { model_id, weights_id });
@@ -213,6 +219,7 @@ export default function StepperDemo() {
                     console.log("🔄 setSelectedModel called with:", modelId);
                     setSelectedModel(modelId);
                   }}
+                  setSelectedDeviceId={setSelectedDeviceId}
                   setFormError={setFormError}
                   autoDeployModel={autoDeployModel}
                   isAutoDeploying={isAutoDeploying}
diff --git a/app/frontend/src/components/chatui/runInference.ts b/app/frontend/src/components/chatui/runInference.ts
index 251c7cd0..09dffb2d 100644
--- a/app/frontend/src/components/chatui/runInference.ts
+++ b/app/frontend/src/components/chatui/runInference.ts
@@ -331,8 +331,8 @@ export const runInference = async (
                   metricsTracker.recordUsage(usage);
                 }
 
-                // Handle generated text content
-                const content = jsonData.choices[0]?.delta?.content || "";
+                // Handle generated text content (chat completions use delta.content, text completions use text)
+                const content = jsonData.choices[0]?.delta?.content ?? jsonData.choices[0]?.text ?? "";
                 if (content) {
                   // Record first token arrival
                   metricsTracker.recordFirstToken();
diff --git a/app/frontend/src/components/models/DeleteModelDialog.tsx b/app/frontend/src/components/models/DeleteModelDialog.tsx
index 887436f2..e8948286 100644
--- a/app/frontend/src/components/models/DeleteModelDialog.tsx
+++ b/app/frontend/src/components/models/DeleteModelDialog.tsx
@@ -1,7 +1,8 @@
 // SPDX-License-Identifier: Apache-2.0
 // SPDX-FileCopyrightText: © 2025 Tenstorrent AI ULC
 
-// React import not needed for modern JSX transform
+import type { ReactNode } from "react";
+import { AlertTriangle, CheckCircle, Loader2, Trash2, RotateCcw } from "lucide-react";
 import {
   Dialog,
   DialogContent,
@@ -10,70 +11,176 @@ import {
   DialogTitle,
 } from "../ui/dialog";
 import { Button } from "../ui/button";
-import { AlertTriangle } from "lucide-react";
+
+export type DeleteStep = "deleting" | "resetting" | null;
 
 interface Props {
   open: boolean;
   modelId: string;
   isLoading: boolean;
+  deleteStep: DeleteStep;
   onConfirm: () => void;
   onCancel: () => void;
 }
 
+function StepRow({
+  number,
+  icon,
+  label,
+  sublabel,
+  state,
+}: {
+  number: number;
+  icon: ReactNode;
+  label: string;
+  sublabel?: string;
+  state: "pending" | "active" | "done";
+}) {
+  return (
+    <div
+      className={`flex items-start gap-3 p-3 rounded-lg border transition-all duration-300 ${
+        state === "active"
+          ? "bg-blue-900/30 border-blue-500/40"
+          : state === "done"
+            ? "bg-green-900/20 border-green-600/30"
+            : "bg-stone-800/50 border-stone-700/40"
+      }`}
+    >
+      <div className="w-7 h-7 flex items-center justify-center shrink-0 mt-0.5">
+        {state === "active" ? (
+          <Loader2 className="w-5 h-5 text-blue-400 animate-spin" />
+        ) : state === "done" ? (
+          <CheckCircle className="w-5 h-5 text-green-400" />
+        ) : (
+          <div className="w-6 h-6 rounded-full bg-stone-600 flex items-center justify-center text-xs font-bold text-stone-300">
+            {number}
+          </div>
+        )}
+      </div>
+      <div className="flex-1 min-w-0">
+        <div
+          className={`font-medium text-sm ${
+            state === "pending" ? "text-stone-400" : "text-white"
+          }`}
+        >
+          <span className="inline-flex items-center gap-1.5">
+            {icon}
+            {label}
+          </span>
+        </div>
+        {sublabel && state === "active" && (
+          <div className="text-xs text-blue-300 mt-1">{sublabel}</div>
+        )}
+        {state === "done" && (
+          <div className="text-xs text-green-400 mt-0.5">Completed</div>
+        )}
+      </div>
+    </div>
+  );
+}
+
 export default function DeleteModelDialog({
   open,
-  modelId: _modelId, // Marked as intentionally unused for now
+  modelId: _modelId,
   isLoading,
+  deleteStep,
   onConfirm,
   onCancel,
 }: Props) {
+  const step1State =
+    deleteStep === "deleting"
+      ? "active"
+      : deleteStep === "resetting"
+        ? "done"
+        : "pending";
+
+  const step2State =
+    deleteStep === "resetting" ? "active" : "pending";
+
   return (
-    <Dialog open={open} onOpenChange={(v) => !v && onCancel()}>
-      <DialogContent className="sm:max-w-md p-6 rounded-xl shadow-2xl bg-stone-900/95 text-white border-2 border-yellow-500/50 backdrop-blur-md">
+    <Dialog open={open} onOpenChange={(v) => !v && !isLoading && onCancel()}>
+      <DialogContent className="sm:max-w-md p-6 rounded-xl shadow-2xl bg-stone-900 text-white border border-stone-700 backdrop-blur-md">
         <DialogHeader>
-          <div className="flex items-center justify-between mb-4">
-            <div className="flex items-center">
-              <AlertTriangle className="h-8 w-8 text-yellow-500 mr-2" />
-              <DialogTitle className="text-lg font-semibold text-white">
-                Delete Model & Reset Card
+          <div className="flex items-center gap-3 mb-1">
+            {isLoading ? (
+              <div className="w-9 h-9 rounded-full bg-blue-900/50 flex items-center justify-center">
+                <Loader2 className="h-5 w-5 text-blue-400 animate-spin" />
+              </div>
+            ) : (
+              <div className="w-9 h-9 rounded-full bg-yellow-900/50 flex items-center justify-center">
+                <AlertTriangle className="h-5 w-5 text-yellow-400" />
+              </div>
+            )}
+            <div>
+              <DialogTitle className="text-base font-semibold text-white leading-tight">
+                {isLoading
+                  ? deleteStep === "deleting"
+                    ? "Removing model…"
+                    : "Resetting board…"
+                  : "Delete Model & Reset Card"}
               </DialogTitle>
+              {isLoading && (
+                <p className="text-xs text-stone-400 mt-0.5">
+                  Step {deleteStep === "deleting" ? "1" : "2"} of 2 — do not close this window
+                </p>
+              )}
             </div>
           </div>
         </DialogHeader>
-        <div className="mb-4 p-4 bg-yellow-900/30 text-yellow-100 rounded-lg border border-yellow-500/30 backdrop-blur-sm flex items-start">
-          <AlertTriangle className="h-5 w-5 text-yellow-400 mr-2 mt-1 shrink-0" />
-          <div>
-            <div className="font-bold mb-1 text-yellow-100">
-              Warning! This action will stop and remove the model, then reset
-              the card.
-            </div>
-            <div className="text-sm text-yellow-200">
-              Deleting a model will attempt to stop and remove the model
-              container.
-              <br />
-              After deletion, the card will automatically be reset using{" "}
-              <code>tt-smi reset</code>.
-              <br />
-              <span className="font-bold text-yellow-300">
-                This may interrupt any ongoing processes on the card.
-              </span>
-            </div>
-          </div>
+
+        <div className="space-y-2 mt-2">
+          {/* Step 1 */}
+          <StepRow
+            number={1}
+            icon={<Trash2 className="w-3.5 h-3.5" />}
+            label="Stop & remove model container"
+            sublabel="Sending stop signal to the container…"
+            state={step1State}
+          />
+
+          {/* Step 2 */}
+          <StepRow
+            number={2}
+            icon={<RotateCcw className="w-3.5 h-3.5" />}
+            label="Reset the board"
+            sublabel="Running tt-smi -r, this may take 10–30 seconds…"
+            state={step2State}
+          />
         </div>
-        <DialogFooter className="mt-4 flex justify-end space-x-2">
+
+        {/* Warning — only shown when idle */}
+        {!isLoading && (
+          <div className="mt-4 flex items-start gap-2 p-3 bg-red-950/40 rounded-lg border border-red-500/25 text-red-200 text-sm">
+            <AlertTriangle className="h-4 w-4 text-red-400 mt-0.5 shrink-0" />
+            <span>
+              <strong className="text-red-300">Warning:</strong> This will
+              interrupt any ongoing processes on the card and cannot be undone.
+            </span>
+          </div>
+        )}
+
+        <DialogFooter className="mt-5 flex justify-end gap-2">
           <Button
+            variant="outline"
             onClick={onCancel}
             disabled={isLoading}
-            className="hover:shadow-lg hover:shadow-stone-200/20 transition-all duration-300 hover:-translate-y-0.5 active:translate-y-0 rounded-lg"
+            className="border-stone-600 text-stone-300 hover:bg-stone-800"
           >
             Cancel
           </Button>
           <Button
             onClick={onConfirm}
-            className="bg-red-600 text-white hover:bg-red-700 hover:shadow-lg hover:shadow-red-500/30 transition-all duration-300 hover:-translate-y-0.5 active:translate-y-0 rounded-lg border border-red-500/30"
             disabled={isLoading}
+            className="bg-red-600 text-white hover:bg-red-700 border border-red-500/30 min-w-[130px]"
           >
-            {isLoading ? "Processing..." : `Yes, Delete & Reset`}
+            {isLoading ? (
+              <span className="flex items-center gap-2">
+                <Loader2 className="w-4 h-4 animate-spin" />
+                Processing…
+              </span>
+            ) : (
+              "Delete & Reset"
+            )}
           </Button>
         </DialogFooter>
       </DialogContent>
diff --git a/app/frontend/src/components/models/ModelsDeployedCard.tsx b/app/frontend/src/components/models/ModelsDeployedCard.tsx
index 2f68ca20..b9e437c7 100644
--- a/app/frontend/src/components/models/ModelsDeployedCard.tsx
+++ b/app/frontend/src/components/models/ModelsDeployedCard.tsx
@@ -30,7 +30,7 @@ import type {
 } from "../../types/models";
 import ModelsToolbar from "./ModelsToolbar.tsx";
 import ModelsTable from "./ModelsTable.tsx";
-import DeleteModelDialog from "./DeleteModelDialog.tsx";
+import DeleteModelDialog, { type DeleteStep } from "./DeleteModelDialog.tsx";
 import LogStreamDialog from "./Logs/LogStreamDialog.tsx";
 import { useNavigate } from "react-router-dom";
 import { useTablePrefs } from "../../hooks/useTablePrefs";
@@ -131,6 +131,7 @@ export default function ModelsDeployedCard(): JSX.Element {
   const [showDeleteModal, setShowDeleteModal] = useState(false);
   const [deleteTargetId, setDeleteTargetId] = useState<string | null>(null);
   const [isProcessingDelete, setIsProcessingDelete] = useState(false);
+  const [deleteStep, setDeleteStep] = useState<DeleteStep>(null);
 
   useEffect(() => {
     loadModels();
@@ -150,30 +151,28 @@ export default function ModelsDeployedCard(): JSX.Element {
     setIsProcessingDelete(true);
     const truncatedModelId = deleteTargetId.substring(0, 4);
     try {
+      // Step 1: stop & remove the model (backend also runs tt-smi -r internally)
+      setDeleteStep("deleting");
       await customToast.promise(deleteModel(deleteTargetId), {
-        loading: `Attempting to delete Model ID: ${truncatedModelId}...`,
-        success: `Model ID: ${truncatedModelId} has been deleted.`,
-        error: `Failed to delete Model ID: ${truncatedModelId}.`,
+        loading: `Stopping model ${truncatedModelId}…`,
+        success: `Model ${truncatedModelId} stopped.`,
+        error: `Failed to stop model ${truncatedModelId}.`,
       });
-      // Simulate resetCard same as original placeholder
-      await customToast.promise(
-        new Promise((resolve) => window.setTimeout(resolve, 2000)),
-        {
-          loading: "Resetting card (tt-smi reset)...",
-          success: "Card reset successfully!",
-          error: "Failed to reset card.",
-        }
-      );
+
+      // Step 2: board reset is handled by the stop API, show progress while cleanup settles
+      setDeleteStep("resetting");
+      await new Promise((resolve) => window.setTimeout(resolve, 2000));
+
       await refreshModels();
       triggerHardwareRefresh();
       setShowDeleteModal(false);
       setDeleteTargetId(null);
-      // Slight delay then refresh health
       window.setTimeout(() => {
         refreshAllHealth();
       }, 1000);
     } finally {
       setIsProcessingDelete(false);
+      setDeleteStep(null);
     }
   }, [deleteTargetId, refreshModels, triggerHardwareRefresh, refreshAllHealth]);
 
@@ -353,8 +352,9 @@ export default function ModelsDeployedCard(): JSX.Element {
         open={showDeleteModal}
         modelId={deleteTargetId || ""}
         isLoading={isProcessingDelete}
+        deleteStep={deleteStep}
         onConfirm={handleConfirmDelete}
-        onCancel={() => setShowDeleteModal(false)}
+        onCancel={() => !isProcessingDelete && setShowDeleteModal(false)}
       />
     </ElevatedCard>
   );
diff --git a/app/frontend/src/components/pipeline/VoicePipelineDemo.tsx b/app/frontend/src/components/pipeline/VoicePipelineDemo.tsx
new file mode 100644
index 00000000..fcdf11dc
--- /dev/null
+++ b/app/frontend/src/components/pipeline/VoicePipelineDemo.tsx
@@ -0,0 +1,378 @@
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileCopyrightText: © 2026 Tenstorrent AI ULC
+
+import { useEffect, useRef, useState } from "react";
+import { Mic, Square, Volume2, CheckCircle, Loader2, Circle } from "lucide-react";
+import { Button } from "../ui/button";
+import {
+  Select,
+  SelectContent,
+  SelectItem,
+  SelectTrigger,
+  SelectValue,
+} from "../ui/select";
+import { runVoicePipeline } from "../../api/modelsDeployedApis";
+import { customToast } from "../CustomToaster";
+
+// ---------------------------------------------------------------------------
+// Types
+// ---------------------------------------------------------------------------
+
+interface DeployedModelInfo {
+  id: string;
+  modelName: string;
+  model_type?: string;
+}
+
+type PipelineStage = "idle" | "recording" | "stt" | "llm" | "tts" | "done";
+
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+
+async function fetchDeployedByType(
+  modelType: string
+): Promise<DeployedModelInfo[]> {
+  try {
+    const res = await fetch("/models-api/deployed/");
+    if (!res.ok) return [];
+    const data = await res.json();
+    return Object.entries(data)
+      .map(([id, info]: [string, any]) => ({
+        id,
+        modelName:
+          info.model_impl?.model_name ||
+          info.model_impl?.hf_model_id ||
+          "Unknown",
+        model_type: info.model_impl?.model_type,
+      }))
+      .filter((m) => m.model_type === modelType);
+  } catch {
+    return [];
+  }
+}
+
+// ---------------------------------------------------------------------------
+// Stage indicator
+// ---------------------------------------------------------------------------
+
+const STAGES: { key: PipelineStage; label: string }[] = [
+  { key: "recording", label: "Mic" },
+  { key: "stt", label: "Whisper" },
+  { key: "llm", label: "LLM" },
+  { key: "tts", label: "TTS" },
+];
+
+const STAGE_ORDER: Record<PipelineStage, number> = {
+  idle: -1,
+  recording: 0,
+  stt: 1,
+  llm: 2,
+  tts: 3,
+  done: 4,
+};
+
+function StageIndicator({ current }: { current: PipelineStage }) {
+  return (
+    <div className="flex items-center gap-2">
+      {STAGES.map((s, i) => {
+        const order = STAGE_ORDER[s.key];
+        const currentOrder = STAGE_ORDER[current];
+        const isDone = currentOrder > order;
+        const isActive = current === s.key;
+
+        return (
+          <div key={s.key} className="flex items-center gap-2">
+            {i > 0 && (
+              <div
+                className={`h-0.5 w-8 ${isDone ? "bg-green-500" : "bg-gray-300 dark:bg-gray-600"}`}
+              />
+            )}
+            <div className="flex flex-col items-center gap-1">
+              {isDone ? (
+                <CheckCircle className="w-5 h-5 text-green-500" />
+              ) : isActive ? (
+                <Loader2 className="w-5 h-5 text-TT-purple-accent animate-spin" />
+              ) : (
+                <Circle className="w-5 h-5 text-gray-400" />
+              )}
+              <span className="text-xs text-gray-500 dark:text-gray-400">
+                {s.label}
+              </span>
+            </div>
+          </div>
+        );
+      })}
+    </div>
+  );
+}
+
+// ---------------------------------------------------------------------------
+// Main component
+// ---------------------------------------------------------------------------
+
+export default function VoicePipelineDemo() {
+  // Model dropdowns
+  const [sttModels, setSttModels] = useState<DeployedModelInfo[]>([]);
+  const [llmModels, setLlmModels] = useState<DeployedModelInfo[]>([]);
+  const [ttsModels, setTtsModels] = useState<DeployedModelInfo[]>([]);
+
+  const [whisperDeployId, setWhisperDeployId] = useState("");
+  const [llmDeployId, setLlmDeployId] = useState("");
+  const [ttsDeployId, setTtsDeployId] = useState("");
+
+  // Recording
+  const [isRecording, setIsRecording] = useState(false);
+  const mediaRecorderRef = useRef<MediaRecorder | null>(null);
+  const chunksRef = useRef<Blob[]>([]);
+
+  // Pipeline state
+  const [stage, setStage] = useState<PipelineStage>("idle");
+  const [transcript, setTranscript] = useState("");
+  const [llmResponse, setLlmResponse] = useState("");
+  const [audioUrl, setAudioUrl] = useState<string | null>(null);
+  const audioRef = useRef<HTMLAudioElement | null>(null);
+
+  // Fetch deployed models on mount
+  useEffect(() => {
+    Promise.all([
+      fetchDeployedByType("speech_recognition"),
+      fetchDeployedByType("chat"),
+      fetchDeployedByType("tts"),
+    ]).then(([stt, llm, tts]) => {
+      setSttModels(stt);
+      setLlmModels(llm);
+      setTtsModels(tts);
+      if (stt.length > 0) setWhisperDeployId(stt[0].id);
+      if (llm.length > 0) setLlmDeployId(llm[0].id);
+      if (tts.length > 0) setTtsDeployId(tts[0].id);
+    });
+  }, []);
+
+  const startRecording = async () => {
+    try {
+      const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
+      const mr = new MediaRecorder(stream);
+      chunksRef.current = [];
+      mr.ondataavailable = (e) => chunksRef.current.push(e.data);
+      mr.start();
+      mediaRecorderRef.current = mr;
+      setIsRecording(true);
+      setStage("recording");
+      setTranscript("");
+      setLlmResponse("");
+      setAudioUrl(null);
+    } catch (err) {
+      customToast.error("Microphone access denied");
+    }
+  };
+
+  const stopRecording = () => {
+    const mr = mediaRecorderRef.current;
+    if (!mr) return;
+    mr.onstop = async () => {
+      const blob = new Blob(chunksRef.current, { type: "audio/webm" });
+      const file = new File([blob], "recording.webm", { type: "audio/webm" });
+      await runPipeline(file);
+    };
+    mr.stop();
+    mr.stream.getTracks().forEach((t) => t.stop());
+    setIsRecording(false);
+  };
+
+  const runPipeline = async (audioFile: File) => {
+    if (!whisperDeployId || !llmDeployId) {
+      customToast.error("Please select STT and LLM models");
+      setStage("idle");
+      return;
+    }
+
+    setStage("stt");
+    let llmText = "";
+
+    await runVoicePipeline(
+      {
+        audioFile,
+        whisperDeployId,
+        llmDeployId,
+        ttsDeployId: ttsDeployId || undefined,
+      },
+      // onTranscript
+      (text) => {
+        setTranscript(text);
+        setStage("llm");
+      },
+      // onLlmChunk
+      (chunk) => {
+        llmText += chunk;
+        setLlmResponse((prev) => prev + chunk);
+      },
+      // onAudio
+      (url) => {
+        setAudioUrl(url);
+        setStage("tts");
+        // Auto-play
+        setTimeout(() => {
+          if (audioRef.current) {
+            audioRef.current.src = url;
+            audioRef.current.play().catch(() => {});
+          }
+        }, 100);
+      },
+      // onError
+      (stage, message) => {
+        customToast.error(`Pipeline error (${stage}): ${message}`);
+        setStage("idle");
+      },
+      // onDone
+      () => {
+        setStage("done");
+      }
+    );
+  };
+
+  return (
+    <div className="flex flex-col gap-6 max-w-3xl mx-auto px-4 py-8">
+      <h1 className="text-2xl font-bold text-gray-900 dark:text-white">
+        Voice Pipeline Demo
+      </h1>
+      <p className="text-sm text-gray-500 dark:text-gray-400">
+        Mic → Whisper STT → LLM → TTS → Speaker
+      </p>
+
+      {/* Model selectors */}
+      <div className="grid grid-cols-1 sm:grid-cols-3 gap-4">
+        <div className="flex flex-col gap-1">
+          <label className="text-xs font-semibold text-gray-600 dark:text-gray-300">
+            STT (Whisper)
+          </label>
+          <Select value={whisperDeployId} onValueChange={setWhisperDeployId}>
+            <SelectTrigger>
+              <SelectValue
+                placeholder={
+                  sttModels.length === 0 ? "No STT deployed" : "Select STT"
+                }
+              />
+            </SelectTrigger>
+            <SelectContent>
+              {sttModels.map((m) => (
+                <SelectItem key={m.id} value={m.id}>
+                  {m.modelName}
+                </SelectItem>
+              ))}
+            </SelectContent>
+          </Select>
+        </div>
+
+        <div className="flex flex-col gap-1">
+          <label className="text-xs font-semibold text-gray-600 dark:text-gray-300">
+            LLM
+          </label>
+          <Select value={llmDeployId} onValueChange={setLlmDeployId}>
+            <SelectTrigger>
+              <SelectValue
+                placeholder={
+                  llmModels.length === 0 ? "No LLM deployed" : "Select LLM"
+                }
+              />
+            </SelectTrigger>
+            <SelectContent>
+              {llmModels.map((m) => (
+                <SelectItem key={m.id} value={m.id}>
+                  {m.modelName}
+                </SelectItem>
+              ))}
+            </SelectContent>
+          </Select>
+        </div>
+
+        <div className="flex flex-col gap-1">
+          <label className="text-xs font-semibold text-gray-600 dark:text-gray-300">
+            TTS (optional)
+          </label>
+          <Select
+            value={ttsDeployId}
+            onValueChange={setTtsDeployId}
+          >
+            <SelectTrigger>
+              <SelectValue placeholder="None (skip TTS)" />
+            </SelectTrigger>
+            <SelectContent>
+              <SelectItem value="">None</SelectItem>
+              {ttsModels.map((m) => (
+                <SelectItem key={m.id} value={m.id}>
+                  {m.modelName}
+                </SelectItem>
+              ))}
+            </SelectContent>
+          </Select>
+        </div>
+      </div>
+
+      {/* Stage indicator */}
+      <div className="flex justify-center py-2">
+        <StageIndicator current={stage} />
+      </div>
+
+      {/* Record button */}
+      <div className="flex justify-center">
+        {isRecording ? (
+          <Button
+            variant="destructive"
+            size="lg"
+            className="flex items-center gap-2 px-8"
+            onClick={stopRecording}
+          >
+            <Square className="w-5 h-5" />
+            Stop Recording
+          </Button>
+        ) : (
+          <Button
+            size="lg"
+            className="flex items-center gap-2 px-8 bg-TT-purple-accent hover:bg-TT-purple text-white"
+            onClick={startRecording}
+            disabled={stage !== "idle" && stage !== "done"}
+          >
+            <Mic className="w-5 h-5" />
+            {stage === "idle" || stage === "done"
+              ? "Start Recording"
+              : "Processing…"}
+          </Button>
+        )}
+      </div>
+
+      {/* Outputs */}
+      {transcript && (
+        <div className="rounded-lg border border-gray-200 dark:border-gray-700 p-4 bg-white dark:bg-gray-900">
+          <p className="text-xs font-semibold text-gray-500 dark:text-gray-400 mb-1">
+            Transcript
+          </p>
+          <p className="text-sm text-gray-800 dark:text-gray-100">
+            {transcript}
+          </p>
+        </div>
+      )}
+
+      {llmResponse && (
+        <div className="rounded-lg border border-gray-200 dark:border-gray-700 p-4 bg-white dark:bg-gray-900">
+          <p className="text-xs font-semibold text-gray-500 dark:text-gray-400 mb-1">
+            LLM Response
+          </p>
+          <p className="text-sm text-gray-800 dark:text-gray-100 whitespace-pre-wrap">
+            {llmResponse}
+          </p>
+        </div>
+      )}
+
+      {audioUrl && (
+        <div className="rounded-lg border border-gray-200 dark:border-gray-700 p-4 bg-white dark:bg-gray-900 flex items-center gap-4">
+          <Volume2 className="w-5 h-5 text-TT-purple-accent" />
+          <audio ref={audioRef} controls src={audioUrl} className="flex-1" />
+        </div>
+      )}
+
+      {/* Hidden audio element for autoplay */}
+      {!audioUrl && <audio ref={audioRef} className="hidden" />}
+    </div>
+  );
+}
diff --git a/app/frontend/src/contexts/DeviceStateContext.ts b/app/frontend/src/contexts/DeviceStateContext.ts
new file mode 100644
index 00000000..a9768597
--- /dev/null
+++ b/app/frontend/src/contexts/DeviceStateContext.ts
@@ -0,0 +1,41 @@
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileCopyrightText: © 2026 Tenstorrent AI ULC
+
+import { createContext } from "react";
+
+export type DeviceState =
+  | "HEALTHY"
+  | "BAD_STATE"
+  | "RESETTING"
+  | "NOT_PRESENT"
+  | "UNKNOWN";
+
+export interface DeviceInfo {
+  index: number;
+  board_type: string;
+  bus_id: string;
+  temperature: number;
+  power: number;
+  voltage: number;
+}
+
+export interface DeviceStateData {
+  state: DeviceState;
+  board_type: string;
+  board_name: string;
+  devices: DeviceInfo[];
+  last_updated: string;
+  reset_suggested: boolean;
+}
+
+export interface DeviceStateContextType {
+  deviceState: DeviceStateData | null;
+  loading: boolean;
+  error: string | null;
+  /** Immediately re-fetch device state and reschedule polling. */
+  refresh: () => void;
+}
+
+export const DeviceStateContext = createContext<
+  DeviceStateContextType | undefined
+>(undefined);
diff --git a/app/frontend/src/contexts/ModelsContext.ts b/app/frontend/src/contexts/ModelsContext.ts
index 57dd2202..c7c64e62 100644
--- a/app/frontend/src/contexts/ModelsContext.ts
+++ b/app/frontend/src/contexts/ModelsContext.ts
@@ -11,6 +11,7 @@ export interface Model {
   status: string;
   health: string;
   ports: string;
+  model_type?: string;
 }
 
 export interface ModelsContextType {
diff --git a/app/frontend/src/hooks/useDeviceState.ts b/app/frontend/src/hooks/useDeviceState.ts
new file mode 100644
index 00000000..af0eee89
--- /dev/null
+++ b/app/frontend/src/hooks/useDeviceState.ts
@@ -0,0 +1,13 @@
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileCopyrightText: © 2026 Tenstorrent AI ULC
+
+import { useContext } from "react";
+import { DeviceStateContext } from "../contexts/DeviceStateContext";
+
+export const useDeviceState = () => {
+  const context = useContext(DeviceStateContext);
+  if (context === undefined) {
+    throw new Error("useDeviceState must be used within a DeviceStateProvider");
+  }
+  return context;
+};
diff --git a/app/frontend/src/pages/VoicePipelinePage.tsx b/app/frontend/src/pages/VoicePipelinePage.tsx
new file mode 100644
index 00000000..de5c2b54
--- /dev/null
+++ b/app/frontend/src/pages/VoicePipelinePage.tsx
@@ -0,0 +1,8 @@
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileCopyrightText: © 2026 Tenstorrent AI ULC
+
+import VoicePipelineDemo from "../components/pipeline/VoicePipelineDemo";
+
+export default function VoicePipelinePage() {
+  return <VoicePipelineDemo />;
+}
diff --git a/app/frontend/src/providers/DeviceStateContext.tsx b/app/frontend/src/providers/DeviceStateContext.tsx
new file mode 100644
index 00000000..9da7d048
--- /dev/null
+++ b/app/frontend/src/providers/DeviceStateContext.tsx
@@ -0,0 +1,79 @@
+// SPDX-License-Identifier: Apache-2.0
+// SPDX-FileCopyrightText: © 2026 Tenstorrent AI ULC
+
+import React, { useState, useCallback, useEffect, useRef } from "react";
+import {
+  DeviceStateContext,
+  type DeviceStateData,
+} from "../contexts/DeviceStateContext";
+
+/**
+ * Adaptive poll intervals by device state.
+ * Fast polling during recovery states so the UI updates promptly.
+ */
+const POLL_INTERVALS: Record<string, number> = {
+  HEALTHY: 30_000,
+  BAD_STATE: 5_000,
+  RESETTING: 2_000,
+  NOT_PRESENT: 30_000,
+  UNKNOWN: 10_000,
+};
+
+export const DeviceStateProvider: React.FC<{ children: React.ReactNode }> = ({
+  children,
+}) => {
+  const [deviceState, setDeviceState] = useState<DeviceStateData | null>(null);
+  const [loading, setLoading] = useState(true);
+  const [error, setError] = useState<string | null>(null);
+
+  // Store the current state in a ref so the scheduled callback always reads
+  // the latest value without creating stale closures.
+  const stateRef = useRef<string>("UNKNOWN");
+  const timerRef = useRef<ReturnType<typeof setTimeout> | null>(null);
+  // pollRef lets us call poll() from the refresh callback without circular deps.
+  const pollRef = useRef<() => Promise<void>>(async () => {});
+
+  const scheduleNext = useCallback(() => {
+    if (timerRef.current) clearTimeout(timerRef.current);
+    const interval = POLL_INTERVALS[stateRef.current] ?? 10_000;
+    timerRef.current = setTimeout(() => pollRef.current(), interval);
+  }, []);
+
+  useEffect(() => {
+    const poll = async () => {
+      try {
+        const response = await fetch("/board-api/device-state/");
+        if (!response.ok)
+          throw new Error(`HTTP ${response.status}: ${response.statusText}`);
+        const data: DeviceStateData = await response.json();
+        stateRef.current = data.state;
+        setDeviceState(data);
+        setError(null);
+      } catch (err) {
+        setError(err instanceof Error ? err.message : "Unknown error");
+      } finally {
+        setLoading(false);
+        scheduleNext();
+      }
+    };
+
+    pollRef.current = poll;
+    poll();
+
+    return () => {
+      if (timerRef.current) clearTimeout(timerRef.current);
+    };
+    // eslint-disable-next-line react-hooks/exhaustive-deps
+  }, []);
+
+  const refresh = useCallback(() => {
+    if (timerRef.current) clearTimeout(timerRef.current);
+    pollRef.current();
+  }, []);
+
+  return (
+    <DeviceStateContext.Provider value={{ deviceState, loading, error, refresh }}>
+      {children}
+    </DeviceStateContext.Provider>
+  );
+};
diff --git a/app/frontend/src/providers/ModelsContext.tsx b/app/frontend/src/providers/ModelsContext.tsx
index 0c02fef9..ce2ce1bf 100644
--- a/app/frontend/src/providers/ModelsContext.tsx
+++ b/app/frontend/src/providers/ModelsContext.tsx
@@ -39,6 +39,7 @@ export const ModelsProvider: React.FC<{ children: React.ReactNode }> = ({
             status: dockerModel?.status || "deployed",
             health: dockerModel?.health || "unknown",
             ports: dockerModel?.ports || "No ports",
+            model_type: deployedModel.model_type,
           };
         });
 
diff --git a/app/frontend/src/routes/index.tsx b/app/frontend/src/routes/index.tsx
index 7627de6f..0bd5b45e 100644
--- a/app/frontend/src/routes/index.tsx
+++ b/app/frontend/src/routes/index.tsx
@@ -4,6 +4,7 @@
 import { BrowserRouter as Router, Routes, Route } from "react-router-dom";
 import { RefreshProvider } from "../providers/RefreshContext";
 import { ModelsProvider } from "../providers/ModelsContext";
+import { DeviceStateProvider } from "../providers/DeviceStateContext";
 import { getRoutes } from "./route-config";
 import { MainLayout } from "../layouts/MainLayout";
 
@@ -18,23 +19,25 @@ const AppRouter = () => {
   );
 
   return (
-    <RefreshProvider>
-      <ModelsProvider>
-        <Router>
-          <Routes>
-            {routes
-              .filter((route) => route.condition !== false)
-              .map((route) => (
-                <Route
-                  key={route.path}
-                  path={route.path}
-                  element={<MainLayout>{route.element}</MainLayout>}
-                />
-              ))}
-          </Routes>
-        </Router>
-      </ModelsProvider>
-    </RefreshProvider>
+    <DeviceStateProvider>
+      <RefreshProvider>
+        <ModelsProvider>
+          <Router>
+            <Routes>
+              {routes
+                .filter((route) => route.condition !== false)
+                .map((route) => (
+                  <Route
+                    key={route.path}
+                    path={route.path}
+                    element={<MainLayout>{route.element}</MainLayout>}
+                  />
+                ))}
+            </Routes>
+          </Router>
+        </ModelsProvider>
+      </RefreshProvider>
+    </DeviceStateProvider>
   );
 };
 
diff --git a/app/frontend/src/routes/route-config.tsx b/app/frontend/src/routes/route-config.tsx
index 7225f053..7f88ea73 100644
--- a/app/frontend/src/routes/route-config.tsx
+++ b/app/frontend/src/routes/route-config.tsx
@@ -52,6 +52,7 @@ import ImageGenPage from "../pages/ImageGenPage";
 import AudioDetectionPage from "../pages/AudioDetectionPage";
 import ApiInfoPage from "../pages/ApiInfoPage";
 import DeploymentHistoryPage from "../pages/DeploymentHistoryPage";
+import VoicePipelinePage from "../pages/VoicePipelinePage";
 
 // Define route configuration type
 export interface RouteConfig {
@@ -123,6 +124,11 @@ export const getRoutes = (): RouteConfig[] => {
       element: <DeploymentHistoryPage />,
       condition: true,
     },
+    {
+      path: "/voice-pipeline",
+      element: <VoicePipelinePage />,
+      condition: true,
+    },
     {
       // catch all for all other routes
       path: "*",
diff --git a/run.py b/run.py
index 4b22f218..7a1f1777 100644
--- a/run.py
+++ b/run.py
@@ -1248,27 +1248,28 @@ def wait_for_all_services(skip_fastapi=False, is_deployed_mode=False):
         print("\n⚠️  Some services may not be fully ready, but main app may still be accessible.")
     return all_healthy
 
-def wait_for_frontend_and_open_browser(host="localhost", port=3000, timeout=60, auto_deploy_model=None):
+def wait_for_frontend_and_open_browser(host="localhost", port=3000, timeout=60, auto_deploy_model=None, device_id=0):
     """
     Wait for frontend service to be healthy before opening browser.
-    
+
     Args:
         host: Frontend host
         port: Frontend port
         timeout: Timeout in seconds
         auto_deploy_model: Model name to auto-deploy (optional)
-    
+        device_id: Chip slot index for auto-deploy (default 0)
+
     Returns:
         bool: True if browser opened successfully, False otherwise
     """
     base_url = f"http://{host}:{port}/"
-    
+
     # Add auto-deploy parameter if specified
     if auto_deploy_model:
         from urllib.parse import urlencode
-        params = urlencode({"auto-deploy": auto_deploy_model})
+        params = urlencode({"auto-deploy": auto_deploy_model, "device-id": device_id})
         frontend_url = f"{base_url}?{params}"
-        print(f"\n🤖 Auto-deploying model: {auto_deploy_model}")
+        print(f"\n🤖 Auto-deploying model: {auto_deploy_model} on chip {device_id}")
     else:
         frontend_url = base_url
     
@@ -2220,6 +2221,33 @@ def handle_remove_readonly(func, path, exc):
         print(f"   See: https://github.com/tenstorrent/tt-inference-server/releases")
         return False
 
+def _sync_model_catalog():
+    """Regenerate models_from_inference_server.json from the downloaded artifact."""
+    sync_script = os.path.join(TT_STUDIO_ROOT, "app", "backend", "shared_config", "sync_models_from_inference_server.py")
+    if not os.path.exists(sync_script):
+        print(f"{C_YELLOW}⚠️  Model catalog sync script not found at {sync_script}, skipping.{C_RESET}")
+        return
+    print(f"\n{C_CYAN}🔄 Syncing model catalog from artifact...{C_RESET}")
+    try:
+        result = subprocess.run(
+            [sys.executable, sync_script],
+            capture_output=True, text=True, timeout=30,
+        )
+        if result.returncode == 0:
+            print(f"{C_GREEN}✅ Model catalog synced.{C_RESET}")
+            if result.stdout.strip():
+                for line in result.stdout.strip().splitlines():
+                    print(f"   {line}")
+            print(f"{C_YELLOW}💡 Reminder: commit app/backend/shared_config/models_from_inference_server.json")
+            print(f"   so CI/CD Docker image builds use the updated catalog.{C_RESET}")
+        else:
+            print(f"{C_YELLOW}⚠️  Model catalog sync exited with code {result.returncode}:{C_RESET}")
+            if result.stderr.strip():
+                print(result.stderr.strip()[:500])
+    except Exception as e:
+        print(f"{C_YELLOW}⚠️  Model catalog sync failed: {e}{C_RESET}")
+
+
 def setup_fastapi_environment():
     """Set up the inference-api FastAPI environment."""
     print(f"🔧 Setting up inference-api environment...")
@@ -3485,6 +3513,8 @@ def main():
                    help="🔍 Check for missing SPDX license headers without adding them")
         parser.add_argument("--auto-deploy", type=str, metavar="MODEL_NAME",
                    help="🤖 Automatically deploy the specified model after startup (e.g., 'Llama-3.2-1B-Instruct')")
+        parser.add_argument("--device-id", type=int, default=0, metavar="CHIP_ID",
+                   help="🔌 Chip slot index (0-7) to use when auto-deploying a model (default: 0)")
         parser.add_argument("--fix-docker", action="store_true",
                    help="🔧 Automatically fix Docker service and permission issues")
         parser.add_argument("--easy", action="store_true",
@@ -3796,6 +3826,8 @@ def main():
                 if not setup_tt_inference_server():
                     print(f"{C_RED}⛔ Failed to setup TT Inference Server. Continuing without FastAPI server.{C_RESET}")
                 else:
+                    # Sync model catalog from the newly downloaded artifact
+                    _sync_model_catalog()
                     # Setup FastAPI environment
                     if not setup_fastapi_environment():
                         print(f"{C_RED}⛔ Failed to setup FastAPI environment. Continuing without FastAPI server.{C_RESET}")
@@ -3899,12 +3931,14 @@ def main():
             host, port, timeout = get_frontend_config()
             
             # Use the new function that reuses existing infrastructure
-            if not wait_for_frontend_and_open_browser(host, port, timeout, args.auto_deploy):
-                auto_deploy_param = f"?auto-deploy={args.auto_deploy}" if args.auto_deploy else ""
+            device_id_val = getattr(args, "device_id", 0)
+            if not wait_for_frontend_and_open_browser(host, port, timeout, args.auto_deploy, device_id=device_id_val):
+                auto_deploy_param = f"?auto-deploy={args.auto_deploy}&device-id={device_id_val}" if args.auto_deploy else ""
                 print(f"{C_YELLOW}⚠️  Browser opening failed. Please manually navigate to http://{host}:{port}{auto_deploy_param}{C_RESET}")
         else:
             host, port, _ = get_frontend_config()
-            auto_deploy_param = f"?auto-deploy={args.auto_deploy}" if args.auto_deploy else ""
+            device_id_val = getattr(args, "device_id", 0)
+            auto_deploy_param = f"?auto-deploy={args.auto_deploy}&device-id={device_id_val}" if args.auto_deploy else ""
             print(f"{C_BLUE}🌐 Automatic browser opening disabled. Access TT-Studio at: {C_CYAN}http://{host}:{port}{auto_deploy_param}{C_RESET}")
         
         # If in dev mode, show logs similar to startup.sh
diff --git a/tt-inference-server b/tt-inference-server
new file mode 160000
index 00000000..ac1892b7
--- /dev/null
+++ b/tt-inference-server
@@ -0,0 +1 @@
+Subproject commit ac1892b7e69f08e7020031dab3f9a30a0dcbe269