diff --git a/.gitignore b/.gitignore
index 6ef5822c8..0635abd98 100644
--- a/.gitignore
+++ b/.gitignore
@@ -43,3 +43,10 @@ cscope.*
 
 /bazel-*
 *.pyc
+
+# Helm chart dependecies cache
+**/Chart.lock
+**/charts/*.tgz
+
+# Helm chart output directory
+ai/ai-starter-kit/out
\ No newline at end of file
diff --git a/ai/ai-starter-kit/Makefile b/ai/ai-starter-kit/Makefile
new file mode 100644
index 000000000..40717cb20
--- /dev/null
+++ b/ai/ai-starter-kit/Makefile
@@ -0,0 +1,70 @@
+.PHONY: check_hf_token check_OCI_target package_helm lint dep_update install install_gke start uninstall push_helm
+
+check_hf_token:
+ifndef HF_TOKEN
+	$(error HF_TOKEN is not set)
+endif
+
+check_OCI_target:
+ifndef OCI_HELM_TARGET
+	$(error OCI_HELM_TARGET is not set)
+endif
+
+package_helm:
+	helm package helm-chart/ai-starter-kit/ --destination out/
+
+push_helm: check_OCI_target
+	helm push out/ai-starter-kit* oci://$$OCI_HELM_TARGET
+
+lint:
+	helm lint helm-chart/ai-starter-kit
+
+dep_update:
+	helm dependency update helm-chart/ai-starter-kit
+
+install: check_hf_token
+	helm upgrade --install ai-starter-kit helm-chart/ai-starter-kit --set huggingface.token="$$HF_TOKEN" --timeout 10m -f helm-chart/ai-starter-kit/values.yaml
+
+install_gke: check_hf_token
+	helm upgrade --install ai-starter-kit helm-chart/ai-starter-kit --set huggingface.token="$$HF_TOKEN" --timeout 10m -f helm-chart/ai-starter-kit/values-gke.yaml
+
+install_gke_gpu: check_hf_token
+	helm upgrade --install ai-starter-kit helm-chart/ai-starter-kit --set huggingface.token="$$HF_TOKEN" --timeout 10m -f helm-chart/ai-starter-kit/values-gke-gpu.yaml
+
+start:
+	mkdir -p /tmp/models-cache
+	minikube start --cpus 4 --memory 15000 --mount --mount-string="/tmp/models-cache:/tmp/models-cache"
+
+start_gpu:
+	mkdir -p $HOME/models-cache
+	minikube start --driver krunkit --cpus 4 --memory 15000 --mount --mount-string="$HOME/models-cache:$HOME/models-cache"
+
+uninstall:
+	helm uninstall ai-starter-kit
+	kubectl delete pod jupyter-user
+	kubectl delete pvc ai-starter-kit-jupyterhub-hub-db-dir
+
+destroy:
+	minikube delete
+
+validate_jupyterhub:
+	kubectl get pods; \
+    kubectl wait --for=condition=Ready pods -l 'component!=continuous-image-puller' --timeout=1800s; \
+    kubectl get pods; \
+    kubectl get services; \
+    kubectl port-forward service/ai-starter-kit-jupyterhub-proxy-public 8081:80 & \
+    PID=$$!; \
+    echo "Port-forward PID=$${PID}"; \
+    sleep 5s; \
+    python3 ./ci/test_hub.py "127.0.0.1:8081"; \
+    kill $$PID
+
+validate_ray:
+	kubectl wait --for=condition=Ready pods -l 'app.kubernetes.io/created-by=kuberay-operator' --timeout=1800s; \
+	kubectl get pods; \
+	kubectl get services; \
+	kubectl port-forward service/ai-starter-kit-kuberay-head-svc 8265:8265 & \
+	PID=$$!; \
+	sleep 10s; \
+	ray job submit --address=http://127.0.0.1:8265 -- python -c "import ray; ray.init(); print(ray.cluster_resources())"; \
+	kill $$PID
diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/.helmignore b/ai/ai-starter-kit/helm-chart/ai-starter-kit/.helmignore
new file mode 100644
index 000000000..0e8a0eb36
--- /dev/null
+++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/.helmignore
@@ -0,0 +1,23 @@
+# Patterns to ignore when building packages.
+# This supports shell glob matching, relative path matching, and
+# negation (prefixed with !). Only one pattern per line.
+.DS_Store
+# Common VCS dirs
+.git/
+.gitignore
+.bzr/
+.bzrignore
+.hg/
+.hgignore
+.svn/
+# Common backup files
+*.swp
+*.bak
+*.tmp
+*.orig
+*~
+# Various IDEs
+.project
+.idea/
+*.tmproj
+.vscode/
diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/Chart.yaml b/ai/ai-starter-kit/helm-chart/ai-starter-kit/Chart.yaml
new file mode 100644
index 000000000..9bf77a3b5
--- /dev/null
+++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/Chart.yaml
@@ -0,0 +1,45 @@
+apiVersion: v2
+name: ai-starter-kit
+description: A Helm chart for Kubernetes
+
+# A chart can be either an 'application' or a 'library' chart.
+#
+# Application charts are a collection of templates that can be packaged into versioned archives
+# to be deployed.
+#
+# Library charts provide useful utilities or functions for the chart developer. They're included as
+# a dependency of application charts to inject those utilities and functions into the rendering
+# pipeline. Library charts do not define any templates and therefore cannot be deployed.
+type: application
+
+# This is the chart version. This version number should be incremented each time you make changes
+# to the chart and its templates, including the app version.
+# Versions are expected to follow Semantic Versioning (https://semver.org/)
+version: 0.1.0
+
+# This is the version number of the application being deployed. This version number should be
+# incremented each time you make changes to the application. Versions are not expected to
+# follow Semantic Versioning. They should reflect the version the application is using.
+# It is recommended to use it with quotes.
+appVersion: "0.1.0"
+
+
+dependencies:
+  - name: kuberay-operator
+    condition: ray-cluster.enabled
+    version: "1.3.0"
+    repository: "https://ray-project.github.io/kuberay-helm"
+  - condition: ray-cluster.enabled
+    name: ray-cluster
+    version: "1.3.0"
+    repository: "https://ray-project.github.io/kuberay-helm"
+  - name: jupyterhub
+    version: "4.2.0"
+    repository: "https://hub.jupyter.org/helm-chart/"
+  - name: mlflow
+    version: "0.12.0"
+    repository: "https://community-charts.github.io/helm-charts"
+  - name: ollama
+    condition: ollama.enabled
+    version: "1.27.0"
+    repository: "https://helm.otwld.com"
diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/README.md b/ai/ai-starter-kit/helm-chart/ai-starter-kit/README.md
new file mode 100644
index 000000000..741d27007
--- /dev/null
+++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/README.md
@@ -0,0 +1,291 @@
+# AI Starter Kit
+
+A comprehensive Helm chart for deploying a complete AI/ML development environment on Kubernetes. This starter kit provides a ready-to-use platform with JupyterHub notebooks, model serving capabilities, and experiment tracking - perfect for teams starting their AI journey or prototyping AI applications.
+
+## Purpose
+
+The AI Starter Kit simplifies the deployment of AI infrastructure by providing:
+
+- **JupyterHub**: Multi-user notebook environment with pre-configured AI/ML libraries
+- **Model Serving**: Support for both Ollama and Ramalama model servers
+- **MLflow**: Experiment tracking and model management
+- **GPU Support**: Configurations for GPU acceleration on GKE and macOS
+- **Model Caching**: Persistent storage for efficient model management
+- **Example Notebooks**: Pre-loaded notebooks to get you started immediately
+
+## Prerequisites
+
+### General Requirements
+- Kubernetes cluster (minikube, GKE)
+- Helm 3.x installed
+- kubectl configured to access your cluster
+- Hugging Face token for accessing models
+
+### Platform-Specific Requirements
+
+#### Minikube (Local Development)
+- Docker Desktop or similar container runtime
+- Minimum 4 CPU cores and 16GB RAM available
+- 40GB+ free disk space
+
+#### GKE (Google Kubernetes Engine)
+- Google Cloud CLI (`gcloud`) installed and configured
+- Appropriate GCP permissions to create clusters
+
+#### macOS with GPU (Apple Silicon)
+- macOS with Apple Silicon (M1/M2/M3/M4)
+- minikube with krunkit driver
+- 16GB+ RAM recommended
+
+## Installation
+
+### Quick Start (Minikube)
+
+1. **Start minikube with persistent storage:**
+```bash
+minikube start --cpus 4 --memory 15000 \
+  --mount --mount-string="/tmp/models-cache:/tmp/models-cache"
+```
+
+2. **Install the chart:**
+```bash
+helm install ai-starter-kit . \
+  --set huggingface.token="YOUR_HF_TOKEN" \
+  -f values.yaml
+```
+
+3. **Access JupyterHub:**
+```bash
+kubectl port-forward svc/ai-starter-kit-jupyterhub-proxy-public 8080:80
+```
+Navigate to http://localhost:8080 and login with any username and password `sneakypass`
+
+### GKE Deployment
+
+1. **Create a GKE Autopilot cluster:**
+```bash
+export REGION=us-central1
+export CLUSTER_NAME="ai-starter-cluster"
+export PROJECT_ID=$(gcloud config get project)
+
+gcloud container clusters create-auto ${CLUSTER_NAME} \
+  --project=${PROJECT_ID} \
+  --region=${REGION} \
+  --release-channel=rapid \
+  --labels=created-by=ai-on-gke,guide=ai-starter-kit
+```
+
+2. **Get cluster credentials:**
+```bash
+gcloud container clusters get-credentials ${CLUSTER_NAME} --location=${REGION}
+```
+
+3. **Install the chart with GKE-specific values:**
+```bash
+helm install ai-starter-kit . \
+  --set huggingface.token="YOUR_HF_TOKEN" \
+  -f values.yaml \
+  -f values-gke.yaml
+```
+
+### GKE with GPU (Ollama)
+
+For GPU-accelerated model serving with Ollama:
+
+```bash
+helm install ai-starter-kit . \
+  --set huggingface.token="YOUR_HF_TOKEN" \
+  -f values-gke.yaml \
+  -f values-ollama-gpu.yaml
+```
+
+### GKE with GPU (Ramalama)
+
+For GPU-accelerated model serving with Ramalama:
+
+```bash
+helm install ai-starter-kit . \
+  --set huggingface.token="YOUR_HF_TOKEN" \
+  -f values-gke.yaml \
+  -f values-ramalama-gpu.yaml
+```
+
+### macOS with Apple Silicon GPU
+
+1. **Start minikube with krunkit driver:**
+```bash
+minikube start --driver krunkit \
+  --cpus 8 --memory 16000 --disk-size 40000mb \
+  --mount --mount-string="/tmp/models-cache:/tmp/models-cache"
+```
+
+2. **Install with macOS GPU support:**
+```bash
+helm install ai-starter-kit . \
+  --set huggingface.token="YOUR_HF_TOKEN" \
+  -f values.yaml \
+  -f values-macos.yaml
+```
+
+## Configuration
+
+### Key Configuration Options
+
+| Parameter | Description | Default |
+|-----------|-------------|---------|
+| `huggingface.token` | HuggingFace token for models | `"YOUR_HF_TOKEN"` |
+| `ollama.enabled` | Enable Ollama model server | `true` |
+| `ramalama.enabled` | Enable Ramalama model server | `true` |
+| `modelsCachePvc.size` | Size of model cache storage | `10Gi` |
+| `jupyterhub.singleuser.defaultUrl` | Default notebook path | `/lab/tree/welcome.ipynb` |
+| `mlflow.enabled` | Enable MLflow tracking server | `true` |
+
+### Storage Configuration
+
+The chart supports different storage configurations:
+
+- **Local Development**: Uses hostPath volumes with minikube mount
+- **GKE**: Uses standard GKE storage classes (`standard-rwo`, `standard-rwx`)
+- **Custom**: Configure via `modelsCachePvc.storageClassName`
+
+### Model Servers
+
+#### Ollama
+Ollama is enabled by default and provides:
+- Easy model management
+- REST API for inference
+- Support for popular models (Llama, Gemma, Qwen, etc.)
+- GPU acceleration support
+
+#### Ramalama
+Ramalama provides:
+- Alternative model serving solution
+- Support for CUDA and Metal (macOS) acceleration
+- Lightweight deployment option
+
+You can run either Ollama or Ramalama, but not both simultaneously. Toggle using:
+```yaml
+ollama:
+  enabled: true/false
+ramalama:
+  enabled: true/false
+```
+
+## Usage
+
+### Accessing Services
+
+#### JupyterHub
+```bash
+# Port forward to access JupyterHub
+kubectl port-forward svc/ai-starter-kit-jupyterhub-proxy-public 8080:80
+# Access at: http://localhost:8080
+# Default password: sneakypass
+```
+
+#### MLflow
+```bash
+# Port forward to access MLflow UI
+kubectl port-forward svc/ai-starter-kit-mlflow 5000:5000
+# Access at: http://localhost:5000
+```
+
+#### Ollama/Ramalama API
+```bash
+# For Ollama
+kubectl port-forward svc/ai-starter-kit-ollama 11434:11434
+
+# For Ramalama
+kubectl port-forward svc/ai-starter-kit-ramalama 8080:8080
+```
+
+### Pre-loaded Example Notebooks
+
+The JupyterHub environment comes with pre-loaded example notebooks:
+- `chat_bot.ipynb`: Simple chatbot interface using Ollama for conversational AI.
+- `multi-agent-ollama.ipynb`: Multi-agent workflow demonstration using Ollama.
+- `multi-agent-ramalama.ipynb`: Similar multi-agent workflow using RamaLama runtime for comparison.
+- `welcome.ipynb`: Introduction notebook with embedding model examples using Qwen models.
+
+These notebooks are automatically copied to your workspace on first login.
+
+## Architecture
+
+The AI Starter Kit consists of:
+
+1. **JupyterHub**: Multi-user notebook server with persistent storage
+2. **Model Serving**: Choice of Ollama or Ramalama for LLM inference
+3. **MLflow**: Experiment tracking and model registry
+4. **Persistent Storage**: Shared model cache to avoid redundant downloads
+5. **Init Containers**: Automated setup of models and notebooks
+
+## Cleanup
+
+### Uninstall the chart
+```bash
+helm uninstall ai-starter-kit
+```
+
+### Delete persistent volumes (optional)
+```bash
+kubectl delete pvc ai-starter-kit-models-cache-pvc
+kubectl delete pvc ai-starter-kit-jupyterhub-hub-db-dir
+```
+
+### Delete GKE cluster
+```bash
+gcloud container clusters delete ${CLUSTER_NAME} --region=${REGION}
+```
+
+### Stop minikube
+```bash
+minikube stop
+minikube delete  # To completely remove the cluster
+```
+
+## Troubleshooting
+
+### Common Issues
+
+#### Pods stuck in Pending state
+- Check available resources: `kubectl describe pod <pod-name>`
+- Increase cluster resources or reduce resource requests
+
+#### Model download failures
+- Verify Hugging Face token is set correctly
+- Check internet connectivity from pods
+- Increase init container timeout in values
+
+#### GPU not detected
+- Verify GPU nodes are available: `kubectl get nodes -o wide`
+- Check GPU driver installation
+- Ensure correct node selectors and tolerations
+
+#### Storage issues
+- Verify PVC is bound: `kubectl get pvc`
+- Check storage class availability: `kubectl get storageclass`
+- Ensure sufficient disk space
+
+### Debug Commands
+```bash
+# Check pod status
+kubectl get pods -n default
+
+# View pod logs
+kubectl logs -f <pod-name>
+
+# Describe pod for events
+kubectl describe pod <pod-name>
+
+# Check resource usage
+kubectl top nodes
+kubectl top pods
+```
+
+## Resources
+
+- [JupyterHub Documentation](https://jupyterhub.readthedocs.io/)
+- [MLflow Documentation](https://mlflow.org/docs/latest/index.html)
+- [Ollama Documentation](https://ollama.ai/docs)
+- [Kubernetes Documentation](https://kubernetes.io/docs/)
+- [Helm Documentation](https://helm.sh/docs/)
\ No newline at end of file
diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/chat_bot.ipynb b/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/chat_bot.ipynb
new file mode 100644
index 000000000..0834cf6c3
--- /dev/null
+++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/chat_bot.ipynb
@@ -0,0 +1,312 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e9e3dd59-b4d9-4de5-a6aa-a72d1480ac77",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from ollama import Client\n",
+    "\n",
+    "client = Client(\n",
+    "  host='http://ai-starter-kit-ollama:11434',\n",
+    "  headers={'x-some-header': 'some-value'}\n",
+    ")\n",
+    "\n",
+    "def get_response(prompt):\n",
+    "    response = client.chat(model='gemma3', messages=[\n",
+    "          {\n",
+    "            'role': 'user',\n",
+    "            'content': prompt,\n",
+    "          },\n",
+    "        ])\n",
+    "    return response.message.content"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "dd1513d4-18c5-46d7-8260-f90be004d315",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<script type=\"esms-options\">{\"shimMode\": true}</script><style>*[data-root-id],\n",
+       "*[data-root-id] > * {\n",
+       "  box-sizing: border-box;\n",
+       "  font-family: var(--jp-ui-font-family);\n",
+       "  font-size: var(--jp-ui-font-size1);\n",
+       "  color: var(--vscode-editor-foreground, var(--jp-ui-font-color1));\n",
+       "}\n",
+       "\n",
+       "/* Override VSCode background color */\n",
+       ".cell-output-ipywidget-background:has(\n",
+       "  > .cell-output-ipywidget-background > .lm-Widget > *[data-root-id]\n",
+       "),\n",
+       ".cell-output-ipywidget-background:has(> .lm-Widget > *[data-root-id]) {\n",
+       "  background-color: transparent !important;\n",
+       "}\n",
+       "</style>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/javascript": "(function(root) {\n  function now() {\n    return new Date();\n  }\n\n  const force = true;\n  const py_version = '3.7.3'.replace('rc', '-rc.').replace('.dev', '-dev.');\n  const reloading = false;\n  const Bokeh = root.Bokeh;\n\n  // Set a timeout for this load but only if we are not already initializing\n  if (typeof (root._bokeh_timeout) === \"undefined\" || (force || !root._bokeh_is_initializing)) {\n    root._bokeh_timeout = Date.now() + 5000;\n    root._bokeh_failed_load = false;\n  }\n\n  function run_callbacks() {\n    try {\n      root._bokeh_onload_callbacks.forEach(function(callback) {\n        if (callback != null)\n          callback();\n      });\n    } finally {\n      delete root._bokeh_onload_callbacks;\n    }\n    console.debug(\"Bokeh: all callbacks have finished\");\n  }\n\n  function load_libs(css_urls, js_urls, js_modules, js_exports, callback) {\n    if (css_urls == null) css_urls = [];\n    if (js_urls == null) js_urls = [];\n    if (js_modules == null) js_modules = [];\n    if (js_exports == null) js_exports = {};\n\n    root._bokeh_onload_callbacks.push(callback);\n\n    if (root._bokeh_is_loading > 0) {\n      // Don't load bokeh if it is still initializing\n      console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n      return null;\n    } else if (js_urls.length === 0 && js_modules.length === 0 && Object.keys(js_exports).length === 0) {\n      // There is nothing to load\n      run_callbacks();\n      return null;\n    }\n\n    function on_load() {\n      root._bokeh_is_loading--;\n      if (root._bokeh_is_loading === 0) {\n        console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n        run_callbacks()\n      }\n    }\n    window._bokeh_on_load = on_load\n\n    function on_error(e) {\n      const src_el = e.srcElement\n      console.error(\"failed to load \" + (src_el.href || src_el.src));\n    }\n\n    const skip = [];\n    if (window.requirejs) {\n      window.requirejs.config({'packages': {}, 'paths': {}, 'shim': {}});\n      root._bokeh_is_loading = css_urls.length + 0;\n    } else {\n      root._bokeh_is_loading = css_urls.length + js_urls.length + js_modules.length + Object.keys(js_exports).length;\n    }\n\n    const existing_stylesheets = []\n    const links = document.getElementsByTagName('link')\n    for (let i = 0; i < links.length; i++) {\n      const link = links[i]\n      if (link.href != null) {\n        existing_stylesheets.push(link.href)\n      }\n    }\n    for (let i = 0; i < css_urls.length; i++) {\n      const url = css_urls[i];\n      const escaped = encodeURI(url)\n      if (existing_stylesheets.indexOf(escaped) !== -1) {\n        on_load()\n        continue;\n      }\n      const element = document.createElement(\"link\");\n      element.onload = on_load;\n      element.onerror = on_error;\n      element.rel = \"stylesheet\";\n      element.type = \"text/css\";\n      element.href = url;\n      console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n      document.body.appendChild(element);\n    }    var existing_scripts = []\n    const scripts = document.getElementsByTagName('script')\n    for (let i = 0; i < scripts.length; i++) {\n      var script = scripts[i]\n      if (script.src != null) {\n        existing_scripts.push(script.src)\n      }\n    }\n    for (let i = 0; i < js_urls.length; i++) {\n      const url = js_urls[i];\n      const escaped = encodeURI(url)\n      if (skip.indexOf(escaped) !== -1 || existing_scripts.indexOf(escaped) !== -1) {\n        if (!window.requirejs) {\n          on_load();\n        }\n        continue;\n      }\n      const element = document.createElement('script');\n      element.onload = on_load;\n      element.onerror = on_error;\n      element.async = false;\n      element.src = url;\n      console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n      document.head.appendChild(element);\n    }\n    for (let i = 0; i < js_modules.length; i++) {\n      const url = js_modules[i];\n      const escaped = encodeURI(url)\n      if (skip.indexOf(escaped) !== -1 || existing_scripts.indexOf(escaped) !== -1) {\n        if (!window.requirejs) {\n          on_load();\n        }\n        continue;\n      }\n      var element = document.createElement('script');\n      element.onload = on_load;\n      element.onerror = on_error;\n      element.async = false;\n      element.src = url;\n      element.type = \"module\";\n      console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n      document.head.appendChild(element);\n    }\n    for (const name in js_exports) {\n      const url = js_exports[name];\n      const escaped = encodeURI(url)\n      if (skip.indexOf(escaped) >= 0 || root[name] != null) {\n        if (!window.requirejs) {\n          on_load();\n        }\n        continue;\n      }\n      var element = document.createElement('script');\n      element.onerror = on_error;\n      element.async = false;\n      element.type = \"module\";\n      console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n      element.textContent = `\n      import ${name} from \"${url}\"\n      window.${name} = ${name}\n      window._bokeh_on_load()\n      `\n      document.head.appendChild(element);\n    }\n    if (!js_urls.length && !js_modules.length) {\n      on_load()\n    }\n  };\n\n  function inject_raw_css(css) {\n    const element = document.createElement(\"style\");\n    element.appendChild(document.createTextNode(css));\n    document.body.appendChild(element);\n  }\n\n  const js_urls = [\"https://cdn.holoviz.org/panel/1.7.5/dist/bundled/reactiveesm/es-module-shims@^1.10.0/dist/es-module-shims.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-3.7.3.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-gl-3.7.3.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-widgets-3.7.3.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-tables-3.7.3.min.js\", \"https://cdn.holoviz.org/panel/1.7.5/dist/panel.min.js\"];\n  const js_modules = [];\n  const js_exports = {};\n  const css_urls = [];\n  const inline_js = [    function(Bokeh) {\n      Bokeh.set_log_level(\"info\");\n    },\nfunction(Bokeh) {} // ensure no trailing comma for IE\n  ];\n\n  function run_inline_js() {\n    if ((root.Bokeh !== undefined) || (force === true)) {\n      for (let i = 0; i < inline_js.length; i++) {\n        try {\n          inline_js[i].call(root, root.Bokeh);\n        } catch(e) {\n          if (!reloading) {\n            throw e;\n          }\n        }\n      }\n      // Cache old bokeh versions\n      if (Bokeh != undefined && !reloading) {\n        var NewBokeh = root.Bokeh;\n        if (Bokeh.versions === undefined) {\n          Bokeh.versions = new Map();\n        }\n        if (NewBokeh.version !== Bokeh.version) {\n          Bokeh.versions.set(NewBokeh.version, NewBokeh)\n        }\n        root.Bokeh = Bokeh;\n      }\n    } else if (Date.now() < root._bokeh_timeout) {\n      setTimeout(run_inline_js, 100);\n    } else if (!root._bokeh_failed_load) {\n      console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n      root._bokeh_failed_load = true;\n    }\n    root._bokeh_is_initializing = false\n  }\n\n  function load_or_wait() {\n    // Implement a backoff loop that tries to ensure we do not load multiple\n    // versions of Bokeh and its dependencies at the same time.\n    // In recent versions we use the root._bokeh_is_initializing flag\n    // to determine whether there is an ongoing attempt to initialize\n    // bokeh, however for backward compatibility we also try to ensure\n    // that we do not start loading a newer (Panel>=1.0 and Bokeh>3) version\n    // before older versions are fully initialized.\n    if (root._bokeh_is_initializing && Date.now() > root._bokeh_timeout) {\n      // If the timeout and bokeh was not successfully loaded we reset\n      // everything and try loading again\n      root._bokeh_timeout = Date.now() + 5000;\n      root._bokeh_is_initializing = false;\n      root._bokeh_onload_callbacks = undefined;\n      root._bokeh_is_loading = 0\n      console.log(\"Bokeh: BokehJS was loaded multiple times but one version failed to initialize.\");\n      load_or_wait();\n    } else if (root._bokeh_is_initializing || (typeof root._bokeh_is_initializing === \"undefined\" && root._bokeh_onload_callbacks !== undefined)) {\n      setTimeout(load_or_wait, 100);\n    } else {\n      root._bokeh_is_initializing = true\n      root._bokeh_onload_callbacks = []\n      const bokeh_loaded = root.Bokeh != null && (root.Bokeh.version === py_version || (root.Bokeh.versions !== undefined && root.Bokeh.versions.has(py_version)));\n      if (!reloading && !bokeh_loaded) {\n        if (root.Bokeh) {\n          root.Bokeh = undefined;\n        }\n        console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n      }\n      load_libs(css_urls, js_urls, js_modules, js_exports, function() {\n        console.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n        run_inline_js();\n      });\n    }\n  }\n  // Give older versions of the autoload script a head-start to ensure\n  // they initialize before we start loading newer version.\n  setTimeout(load_or_wait, 100)\n}(window));",
+      "application/vnd.holoviews_load.v0+json": ""
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/javascript": "\nif ((window.PyViz === undefined) || (window.PyViz instanceof HTMLElement)) {\n  window.PyViz = {comms: {}, comm_status:{}, kernels:{}, receivers: {}, plot_index: []}\n}\n\n\n    function JupyterCommManager() {\n    }\n\n    JupyterCommManager.prototype.register_target = function(plot_id, comm_id, msg_handler) {\n      if (window.comm_manager || ((window.Jupyter !== undefined) && (Jupyter.notebook.kernel != null))) {\n        var comm_manager = window.comm_manager || Jupyter.notebook.kernel.comm_manager;\n        comm_manager.register_target(comm_id, function(comm) {\n          comm.on_msg(msg_handler);\n        });\n      } else if ((plot_id in window.PyViz.kernels) && (window.PyViz.kernels[plot_id])) {\n        window.PyViz.kernels[plot_id].registerCommTarget(comm_id, function(comm) {\n          comm.onMsg = msg_handler;\n        });\n      } else if (typeof google != 'undefined' && google.colab.kernel != null) {\n        google.colab.kernel.comms.registerTarget(comm_id, (comm) => {\n          var messages = comm.messages[Symbol.asyncIterator]();\n          function processIteratorResult(result) {\n            var message = result.value;\n            var content = {data: message.data, comm_id};\n            var buffers = []\n            for (var buffer of message.buffers || []) {\n              buffers.push(new DataView(buffer))\n            }\n            var metadata = message.metadata || {};\n            var msg = {content, buffers, metadata}\n            msg_handler(msg);\n            return messages.next().then(processIteratorResult);\n          }\n          return messages.next().then(processIteratorResult);\n        })\n      }\n    }\n\n    JupyterCommManager.prototype.get_client_comm = function(plot_id, comm_id, msg_handler) {\n      if (comm_id in window.PyViz.comms) {\n        return window.PyViz.comms[comm_id];\n      } else if (window.comm_manager || ((window.Jupyter !== undefined) && (Jupyter.notebook.kernel != null))) {\n        var comm_manager = window.comm_manager || Jupyter.notebook.kernel.comm_manager;\n        var comm = comm_manager.new_comm(comm_id, {}, {}, {}, comm_id);\n        if (msg_handler) {\n          comm.on_msg(msg_handler);\n        }\n      } else if ((plot_id in window.PyViz.kernels) && (window.PyViz.kernels[plot_id])) {\n        var comm = window.PyViz.kernels[plot_id].connectToComm(comm_id);\n        let retries = 0;\n        const open = () => {\n          if (comm.active) {\n            comm.open();\n          } else if (retries > 3) {\n            console.warn('Comm target never activated')\n          } else {\n            retries += 1\n            setTimeout(open, 500)\n          }\n        }\n        if (comm.active) {\n          comm.open();\n        } else {\n          setTimeout(open, 500)\n        }\n        if (msg_handler) {\n          comm.onMsg = msg_handler;\n        }\n      } else if (typeof google != 'undefined' && google.colab.kernel != null) {\n        var comm_promise = google.colab.kernel.comms.open(comm_id)\n        comm_promise.then((comm) => {\n          window.PyViz.comms[comm_id] = comm;\n          if (msg_handler) {\n            var messages = comm.messages[Symbol.asyncIterator]();\n            function processIteratorResult(result) {\n              var message = result.value;\n              var content = {data: message.data};\n              var metadata = message.metadata || {comm_id};\n              var msg = {content, metadata}\n              msg_handler(msg);\n              return messages.next().then(processIteratorResult);\n            }\n            return messages.next().then(processIteratorResult);\n          }\n        })\n        var sendClosure = (data, metadata, buffers, disposeOnDone) => {\n          return comm_promise.then((comm) => {\n            comm.send(data, metadata, buffers, disposeOnDone);\n          });\n        };\n        var comm = {\n          send: sendClosure\n        };\n      }\n      window.PyViz.comms[comm_id] = comm;\n      return comm;\n    }\n    window.PyViz.comm_manager = new JupyterCommManager();\n    \n\n\nvar JS_MIME_TYPE = 'application/javascript';\nvar HTML_MIME_TYPE = 'text/html';\nvar EXEC_MIME_TYPE = 'application/vnd.holoviews_exec.v0+json';\nvar CLASS_NAME = 'output';\n\n/**\n * Render data to the DOM node\n */\nfunction render(props, node) {\n  var div = document.createElement(\"div\");\n  var script = document.createElement(\"script\");\n  node.appendChild(div);\n  node.appendChild(script);\n}\n\n/**\n * Handle when a new output is added\n */\nfunction handle_add_output(event, handle) {\n  var output_area = handle.output_area;\n  var output = handle.output;\n  if ((output.data == undefined) || (!output.data.hasOwnProperty(EXEC_MIME_TYPE))) {\n    return\n  }\n  var id = output.metadata[EXEC_MIME_TYPE][\"id\"];\n  var toinsert = output_area.element.find(\".\" + CLASS_NAME.split(' ')[0]);\n  if (id !== undefined) {\n    var nchildren = toinsert.length;\n    var html_node = toinsert[nchildren-1].children[0];\n    html_node.innerHTML = output.data[HTML_MIME_TYPE];\n    var scripts = [];\n    var nodelist = html_node.querySelectorAll(\"script\");\n    for (var i in nodelist) {\n      if (nodelist.hasOwnProperty(i)) {\n        scripts.push(nodelist[i])\n      }\n    }\n\n    scripts.forEach( function (oldScript) {\n      var newScript = document.createElement(\"script\");\n      var attrs = [];\n      var nodemap = oldScript.attributes;\n      for (var j in nodemap) {\n        if (nodemap.hasOwnProperty(j)) {\n          attrs.push(nodemap[j])\n        }\n      }\n      attrs.forEach(function(attr) { newScript.setAttribute(attr.name, attr.value) });\n      newScript.appendChild(document.createTextNode(oldScript.innerHTML));\n      oldScript.parentNode.replaceChild(newScript, oldScript);\n    });\n    if (JS_MIME_TYPE in output.data) {\n      toinsert[nchildren-1].children[1].textContent = output.data[JS_MIME_TYPE];\n    }\n    output_area._hv_plot_id = id;\n    if ((window.Bokeh !== undefined) && (id in Bokeh.index)) {\n      window.PyViz.plot_index[id] = Bokeh.index[id];\n    } else {\n      window.PyViz.plot_index[id] = null;\n    }\n  } else if (output.metadata[EXEC_MIME_TYPE][\"server_id\"] !== undefined) {\n    var bk_div = document.createElement(\"div\");\n    bk_div.innerHTML = output.data[HTML_MIME_TYPE];\n    var script_attrs = bk_div.children[0].attributes;\n    for (var i = 0; i < script_attrs.length; i++) {\n      toinsert[toinsert.length - 1].childNodes[1].setAttribute(script_attrs[i].name, script_attrs[i].value);\n    }\n    // store reference to server id on output_area\n    output_area._bokeh_server_id = output.metadata[EXEC_MIME_TYPE][\"server_id\"];\n  }\n}\n\n/**\n * Handle when an output is cleared or removed\n */\nfunction handle_clear_output(event, handle) {\n  var id = handle.cell.output_area._hv_plot_id;\n  var server_id = handle.cell.output_area._bokeh_server_id;\n  if (((id === undefined) || !(id in PyViz.plot_index)) && (server_id !== undefined)) { return; }\n  var comm = window.PyViz.comm_manager.get_client_comm(\"hv-extension-comm\", \"hv-extension-comm\", function () {});\n  if (server_id !== null) {\n    comm.send({event_type: 'server_delete', 'id': server_id});\n    return;\n  } else if (comm !== null) {\n    comm.send({event_type: 'delete', 'id': id});\n  }\n  delete PyViz.plot_index[id];\n  if ((window.Bokeh !== undefined) & (id in window.Bokeh.index)) {\n    var doc = window.Bokeh.index[id].model.document\n    doc.clear();\n    const i = window.Bokeh.documents.indexOf(doc);\n    if (i > -1) {\n      window.Bokeh.documents.splice(i, 1);\n    }\n  }\n}\n\n/**\n * Handle kernel restart event\n */\nfunction handle_kernel_cleanup(event, handle) {\n  delete PyViz.comms[\"hv-extension-comm\"];\n  window.PyViz.plot_index = {}\n}\n\n/**\n * Handle update_display_data messages\n */\nfunction handle_update_output(event, handle) {\n  handle_clear_output(event, {cell: {output_area: handle.output_area}})\n  handle_add_output(event, handle)\n}\n\nfunction register_renderer(events, OutputArea) {\n  function append_mime(data, metadata, element) {\n    // create a DOM node to render to\n    var toinsert = this.create_output_subarea(\n    metadata,\n    CLASS_NAME,\n    EXEC_MIME_TYPE\n    );\n    this.keyboard_manager.register_events(toinsert);\n    // Render to node\n    var props = {data: data, metadata: metadata[EXEC_MIME_TYPE]};\n    render(props, toinsert[0]);\n    element.append(toinsert);\n    return toinsert\n  }\n\n  events.on('output_added.OutputArea', handle_add_output);\n  events.on('output_updated.OutputArea', handle_update_output);\n  events.on('clear_output.CodeCell', handle_clear_output);\n  events.on('delete.Cell', handle_clear_output);\n  events.on('kernel_ready.Kernel', handle_kernel_cleanup);\n\n  OutputArea.prototype.register_mime_type(EXEC_MIME_TYPE, append_mime, {\n    safe: true,\n    index: 0\n  });\n}\n\nif (window.Jupyter !== undefined) {\n  try {\n    var events = require('base/js/events');\n    var OutputArea = require('notebook/js/outputarea').OutputArea;\n    if (OutputArea.prototype.mime_types().indexOf(EXEC_MIME_TYPE) == -1) {\n      register_renderer(events, OutputArea);\n    }\n  } catch(err) {\n  }\n}\n",
+      "application/vnd.holoviews_load.v0+json": ""
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.holoviews_exec.v0+json": "",
+      "text/html": [
+       "<div id='b6fd14e0-f8d2-46e7-9c4d-722893d04d7e'>\n",
+       "  <div id=\"bdf60797-a2d6-41c2-b82f-4d6c76aad817\" data-root-id=\"b6fd14e0-f8d2-46e7-9c4d-722893d04d7e\" style=\"display: contents;\"></div>\n",
+       "</div>\n",
+       "<script type=\"application/javascript\">(function(root) {\n",
+       "  var docs_json = {\"1bcc51c9-c228-4b0f-abb3-d07c462a13c9\":{\"version\":\"3.7.3\",\"title\":\"Bokeh Application\",\"roots\":[{\"type\":\"object\",\"name\":\"panel.models.browser.BrowserInfo\",\"id\":\"b6fd14e0-f8d2-46e7-9c4d-722893d04d7e\"},{\"type\":\"object\",\"name\":\"panel.models.comm_manager.CommManager\",\"id\":\"b880ae31-fb7a-49c3-98db-7dfd24466404\",\"attributes\":{\"plot_id\":\"b6fd14e0-f8d2-46e7-9c4d-722893d04d7e\",\"comm_id\":\"43c59d70c0064398b78f2b3eda5ff5ac\",\"client_comm_id\":\"33500ddc0a7a4923b2e1b0459d42adf2\"}}],\"defs\":[{\"type\":\"model\",\"name\":\"ReactiveHTML1\"},{\"type\":\"model\",\"name\":\"FlexBox1\",\"properties\":[{\"name\":\"align_content\",\"kind\":\"Any\",\"default\":\"flex-start\"},{\"name\":\"align_items\",\"kind\":\"Any\",\"default\":\"flex-start\"},{\"name\":\"flex_direction\",\"kind\":\"Any\",\"default\":\"row\"},{\"name\":\"flex_wrap\",\"kind\":\"Any\",\"default\":\"wrap\"},{\"name\":\"gap\",\"kind\":\"Any\",\"default\":\"\"},{\"name\":\"justify_content\",\"kind\":\"Any\",\"default\":\"flex-start\"}]},{\"type\":\"model\",\"name\":\"FloatPanel1\",\"properties\":[{\"name\":\"config\",\"kind\":\"Any\",\"default\":{\"type\":\"map\"}},{\"name\":\"contained\",\"kind\":\"Any\",\"default\":true},{\"name\":\"position\",\"kind\":\"Any\",\"default\":\"right-top\"},{\"name\":\"offsetx\",\"kind\":\"Any\",\"default\":null},{\"name\":\"offsety\",\"kind\":\"Any\",\"default\":null},{\"name\":\"theme\",\"kind\":\"Any\",\"default\":\"primary\"},{\"name\":\"status\",\"kind\":\"Any\",\"default\":\"normalized\"}]},{\"type\":\"model\",\"name\":\"GridStack1\",\"properties\":[{\"name\":\"ncols\",\"kind\":\"Any\",\"default\":null},{\"name\":\"nrows\",\"kind\":\"Any\",\"default\":null},{\"name\":\"allow_resize\",\"kind\":\"Any\",\"default\":true},{\"name\":\"allow_drag\",\"kind\":\"Any\",\"default\":true},{\"name\":\"state\",\"kind\":\"Any\",\"default\":[]}]},{\"type\":\"model\",\"name\":\"drag1\",\"properties\":[{\"name\":\"slider_width\",\"kind\":\"Any\",\"default\":5},{\"name\":\"slider_color\",\"kind\":\"Any\",\"default\":\"black\"},{\"name\":\"value\",\"kind\":\"Any\",\"default\":50}]},{\"type\":\"model\",\"name\":\"click1\",\"properties\":[{\"name\":\"terminal_output\",\"kind\":\"Any\",\"default\":\"\"},{\"name\":\"debug_name\",\"kind\":\"Any\",\"default\":\"\"},{\"name\":\"clears\",\"kind\":\"Any\",\"default\":0}]},{\"type\":\"model\",\"name\":\"ReactiveESM1\",\"properties\":[{\"name\":\"esm_constants\",\"kind\":\"Any\",\"default\":{\"type\":\"map\"}}]},{\"type\":\"model\",\"name\":\"JSComponent1\",\"properties\":[{\"name\":\"esm_constants\",\"kind\":\"Any\",\"default\":{\"type\":\"map\"}}]},{\"type\":\"model\",\"name\":\"ReactComponent1\",\"properties\":[{\"name\":\"use_shadow_dom\",\"kind\":\"Any\",\"default\":true},{\"name\":\"esm_constants\",\"kind\":\"Any\",\"default\":{\"type\":\"map\"}}]},{\"type\":\"model\",\"name\":\"AnyWidgetComponent1\",\"properties\":[{\"name\":\"use_shadow_dom\",\"kind\":\"Any\",\"default\":true},{\"name\":\"esm_constants\",\"kind\":\"Any\",\"default\":{\"type\":\"map\"}}]},{\"type\":\"model\",\"name\":\"FastWrapper1\",\"properties\":[{\"name\":\"object\",\"kind\":\"Any\",\"default\":null},{\"name\":\"style\",\"kind\":\"Any\",\"default\":null}]},{\"type\":\"model\",\"name\":\"NotificationArea1\",\"properties\":[{\"name\":\"js_events\",\"kind\":\"Any\",\"default\":{\"type\":\"map\"}},{\"name\":\"max_notifications\",\"kind\":\"Any\",\"default\":5},{\"name\":\"notifications\",\"kind\":\"Any\",\"default\":[]},{\"name\":\"position\",\"kind\":\"Any\",\"default\":\"bottom-right\"},{\"name\":\"_clear\",\"kind\":\"Any\",\"default\":0},{\"name\":\"types\",\"kind\":\"Any\",\"default\":[{\"type\":\"map\",\"entries\":[[\"type\",\"warning\"],[\"background\",\"#ffc107\"],[\"icon\",{\"type\":\"map\",\"entries\":[[\"className\",\"fas fa-exclamation-triangle\"],[\"tagName\",\"i\"],[\"color\",\"white\"]]}]]},{\"type\":\"map\",\"entries\":[[\"type\",\"info\"],[\"background\",\"#007bff\"],[\"icon\",{\"type\":\"map\",\"entries\":[[\"className\",\"fas fa-info-circle\"],[\"tagName\",\"i\"],[\"color\",\"white\"]]}]]}]}]},{\"type\":\"model\",\"name\":\"Notification\",\"properties\":[{\"name\":\"background\",\"kind\":\"Any\",\"default\":null},{\"name\":\"duration\",\"kind\":\"Any\",\"default\":3000},{\"name\":\"icon\",\"kind\":\"Any\",\"default\":null},{\"name\":\"message\",\"kind\":\"Any\",\"default\":\"\"},{\"name\":\"notification_type\",\"kind\":\"Any\",\"default\":null},{\"name\":\"_rendered\",\"kind\":\"Any\",\"default\":false},{\"name\":\"_destroyed\",\"kind\":\"Any\",\"default\":false}]},{\"type\":\"model\",\"name\":\"TemplateActions1\",\"properties\":[{\"name\":\"open_modal\",\"kind\":\"Any\",\"default\":0},{\"name\":\"close_modal\",\"kind\":\"Any\",\"default\":0}]},{\"type\":\"model\",\"name\":\"BootstrapTemplateActions1\",\"properties\":[{\"name\":\"open_modal\",\"kind\":\"Any\",\"default\":0},{\"name\":\"close_modal\",\"kind\":\"Any\",\"default\":0}]},{\"type\":\"model\",\"name\":\"TemplateEditor1\",\"properties\":[{\"name\":\"layout\",\"kind\":\"Any\",\"default\":[]}]},{\"type\":\"model\",\"name\":\"MaterialTemplateActions1\",\"properties\":[{\"name\":\"open_modal\",\"kind\":\"Any\",\"default\":0},{\"name\":\"close_modal\",\"kind\":\"Any\",\"default\":0}]},{\"type\":\"model\",\"name\":\"request_value1\",\"properties\":[{\"name\":\"fill\",\"kind\":\"Any\",\"default\":\"none\"},{\"name\":\"_synced\",\"kind\":\"Any\",\"default\":null},{\"name\":\"_request_sync\",\"kind\":\"Any\",\"default\":0}]}]}};\n",
+       "  var render_items = [{\"docid\":\"1bcc51c9-c228-4b0f-abb3-d07c462a13c9\",\"roots\":{\"b6fd14e0-f8d2-46e7-9c4d-722893d04d7e\":\"bdf60797-a2d6-41c2-b82f-4d6c76aad817\"},\"root_ids\":[\"b6fd14e0-f8d2-46e7-9c4d-722893d04d7e\"]}];\n",
+       "  var docs = Object.values(docs_json)\n",
+       "  if (!docs) {\n",
+       "    return\n",
+       "  }\n",
+       "  const py_version = docs[0].version.replace('rc', '-rc.').replace('.dev', '-dev.')\n",
+       "  async function embed_document(root) {\n",
+       "    var Bokeh = get_bokeh(root)\n",
+       "    await Bokeh.embed.embed_items_notebook(docs_json, render_items);\n",
+       "    for (const render_item of render_items) {\n",
+       "      for (const root_id of render_item.root_ids) {\n",
+       "\tconst id_el = document.getElementById(root_id)\n",
+       "\tif (id_el.children.length && id_el.children[0].hasAttribute('data-root-id')) {\n",
+       "\t  const root_el = id_el.children[0]\n",
+       "\t  root_el.id = root_el.id + '-rendered'\n",
+       "\t  for (const child of root_el.children) {\n",
+       "            // Ensure JupyterLab does not capture keyboard shortcuts\n",
+       "            // see: https://jupyterlab.readthedocs.io/en/4.1.x/extension/notebook.html#keyboard-interaction-model\n",
+       "\t    child.setAttribute('data-lm-suppress-shortcuts', 'true')\n",
+       "\t  }\n",
+       "\t}\n",
+       "      }\n",
+       "    }\n",
+       "  }\n",
+       "  function get_bokeh(root) {\n",
+       "    if (root.Bokeh === undefined) {\n",
+       "      return null\n",
+       "    } else if (root.Bokeh.version !== py_version) {\n",
+       "      if (root.Bokeh.versions === undefined || !root.Bokeh.versions.has(py_version)) {\n",
+       "\treturn null\n",
+       "      }\n",
+       "      return root.Bokeh.versions.get(py_version);\n",
+       "    } else if (root.Bokeh.version === py_version) {\n",
+       "      return root.Bokeh\n",
+       "    }\n",
+       "    return null\n",
+       "  }\n",
+       "  function is_loaded(root) {\n",
+       "    var Bokeh = get_bokeh(root)\n",
+       "    return (Bokeh != null && Bokeh.Panel !== undefined)\n",
+       "  }\n",
+       "  if (is_loaded(root)) {\n",
+       "    embed_document(root);\n",
+       "  } else {\n",
+       "    var attempts = 0;\n",
+       "    var timer = setInterval(function(root) {\n",
+       "      if (is_loaded(root)) {\n",
+       "        clearInterval(timer);\n",
+       "        embed_document(root);\n",
+       "      } else if (document.readyState == \"complete\") {\n",
+       "        attempts++;\n",
+       "        if (attempts > 200) {\n",
+       "          clearInterval(timer);\n",
+       "\t  var Bokeh = get_bokeh(root)\n",
+       "\t  if (Bokeh == null || Bokeh.Panel == null) {\n",
+       "            console.warn(\"Panel: ERROR: Unable to run Panel code because Bokeh or Panel library is missing\");\n",
+       "\t  } else {\n",
+       "\t    console.warn(\"Panel: WARNING: Attempting to render but not all required libraries could be resolved.\")\n",
+       "\t    embed_document(root)\n",
+       "\t  }\n",
+       "        }\n",
+       "      }\n",
+       "    }, 25, root)\n",
+       "  }\n",
+       "})(window);</script>"
+      ]
+     },
+     "metadata": {
+      "application/vnd.holoviews_exec.v0+json": {
+       "id": "b6fd14e0-f8d2-46e7-9c4d-722893d04d7e"
+      }
+     },
+     "output_type": "display_data"
+    },
+    {
+     "data": {},
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.holoviews_exec.v0+json": "",
+      "text/html": [
+       "<div id='2854d6b0-689d-4dc0-8861-1834489708e9'>\n",
+       "  <div id=\"c1b1b1c4-83d6-4552-84f0-96d758cd0128\" data-root-id=\"2854d6b0-689d-4dc0-8861-1834489708e9\" style=\"display: contents;\"></div>\n",
+       "</div>\n",
+       "<script type=\"application/javascript\">(function(root) {\n",
+       "  var docs_json = {\"105418f6-d8a0-46b1-b5dc-159664d83a93\":{\"version\":\"3.7.3\",\"title\":\"Bokeh Application\",\"roots\":[{\"type\":\"object\",\"name\":\"panel.models.layout.Column\",\"id\":\"2854d6b0-689d-4dc0-8861-1834489708e9\",\"attributes\":{\"name\":\"Column00130\",\"stylesheets\":[\"\\n:host(.pn-loading):before, .pn-loading:before {\\n  background-color: #c3c3c3;\\n  mask-size: auto calc(min(50%, 400px));\\n  -webkit-mask-size: auto calc(min(50%, 400px));\\n}\",{\"type\":\"object\",\"name\":\"ImportedStyleSheet\",\"id\":\"3992db43-aa13-45f0-ae52-0b3dedb4a13f\",\"attributes\":{\"url\":\"https://cdn.holoviz.org/panel/1.7.5/dist/css/loading.css\"}},{\"type\":\"object\",\"name\":\"ImportedStyleSheet\",\"id\":\"ffa52fb4-6c36-437d-81bc-395ce9b164b3\",\"attributes\":{\"url\":\"https://cdn.holoviz.org/panel/1.7.5/dist/css/listpanel.css\"}},{\"type\":\"object\",\"name\":\"ImportedStyleSheet\",\"id\":\"05e00a9c-829a-4e55-8b91-75052cae5a86\",\"attributes\":{\"url\":\"https://cdn.holoviz.org/panel/1.7.5/dist/bundled/theme/default.css\"}},{\"type\":\"object\",\"name\":\"ImportedStyleSheet\",\"id\":\"67f5260e-4dca-442d-bed5-f981f083538e\",\"attributes\":{\"url\":\"https://cdn.holoviz.org/panel/1.7.5/dist/bundled/theme/native.css\"}}],\"margin\":0,\"align\":\"start\",\"children\":[{\"type\":\"object\",\"name\":\"panel.models.widgets.TextInput\",\"id\":\"3dcebbf6-51b9-4029-80e9-4ca0b25f48d3\",\"attributes\":{\"subscribed_events\":{\"type\":\"set\",\"entries\":[\"enter-pressed\"]},\"stylesheets\":[\"\\n:host(.pn-loading):before, .pn-loading:before {\\n  background-color: #c3c3c3;\\n  mask-size: auto calc(min(50%, 400px));\\n  -webkit-mask-size: auto calc(min(50%, 400px));\\n}\",{\"id\":\"3992db43-aa13-45f0-ae52-0b3dedb4a13f\"},{\"id\":\"05e00a9c-829a-4e55-8b91-75052cae5a86\"},{\"id\":\"67f5260e-4dca-442d-bed5-f981f083538e\"}],\"width\":300,\"min_width\":300,\"margin\":[5,10],\"align\":\"start\",\"placeholder\":\"Enter text here\\u2026\",\"max_length\":5000}},{\"type\":\"object\",\"name\":\"Row\",\"id\":\"98d452cc-656b-434d-9948-3917967acd47\",\"attributes\":{\"name\":\"Row00122\",\"stylesheets\":[\"\\n:host(.pn-loading):before, .pn-loading:before {\\n  background-color: #c3c3c3;\\n  mask-size: auto calc(min(50%, 400px));\\n  -webkit-mask-size: auto calc(min(50%, 400px));\\n}\",{\"id\":\"3992db43-aa13-45f0-ae52-0b3dedb4a13f\"},{\"id\":\"ffa52fb4-6c36-437d-81bc-395ce9b164b3\"},{\"id\":\"05e00a9c-829a-4e55-8b91-75052cae5a86\"},{\"id\":\"67f5260e-4dca-442d-bed5-f981f083538e\"}],\"margin\":0,\"align\":\"start\",\"children\":[{\"type\":\"object\",\"name\":\"panel.models.widgets.Button\",\"id\":\"423a5ad6-fd6f-4dc7-9e3f-915fe6f71770\",\"attributes\":{\"js_event_callbacks\":{\"type\":\"map\",\"entries\":[[\"button_click\",[{\"type\":\"object\",\"name\":\"CustomJS\",\"id\":\"5980ae14-00d8-4adc-948f-8eaba515ac61\",\"attributes\":{\"tags\":[[281472795496368,[null,\"event:button_click\"],[null,\"loading\"]]],\"args\":{\"type\":\"map\",\"entries\":[[\"bidirectional\",false],[\"properties\",{\"type\":\"map\",\"entries\":[[\"event:button_click\",\"loading\"]]}],[\"source\",{\"id\":\"423a5ad6-fd6f-4dc7-9e3f-915fe6f71770\"}],[\"target\",{\"type\":\"object\",\"name\":\"panel.models.layout.Column\",\"id\":\"8cafd355-1cdf-4871-a3fb-b0328afbe08a\",\"attributes\":{\"name\":\"Column00127\",\"stylesheets\":[\"\\n:host(.pn-loading):before, .pn-loading:before {\\n  background-color: #c3c3c3;\\n  mask-size: auto calc(min(50%, 400px));\\n  -webkit-mask-size: auto calc(min(50%, 400px));\\n}\",{\"id\":\"3992db43-aa13-45f0-ae52-0b3dedb4a13f\"},{\"id\":\"ffa52fb4-6c36-437d-81bc-395ce9b164b3\"},{\"id\":\"05e00a9c-829a-4e55-8b91-75052cae5a86\"},{\"id\":\"67f5260e-4dca-442d-bed5-f981f083538e\"}],\"width\":300,\"height\":300,\"min_width\":300,\"min_height\":300,\"margin\":0,\"align\":\"start\",\"children\":[{\"type\":\"object\",\"name\":\"panel.models.layout.Column\",\"id\":\"989c404c-60d5-4188-b625-35a4464d8466\",\"attributes\":{\"name\":\"Column00129\",\"stylesheets\":[\"\\n:host(.pn-loading):before, .pn-loading:before {\\n  background-color: #c3c3c3;\\n  mask-size: auto calc(min(50%, 400px));\\n  -webkit-mask-size: auto calc(min(50%, 400px));\\n}\",{\"id\":\"3992db43-aa13-45f0-ae52-0b3dedb4a13f\"},{\"id\":\"ffa52fb4-6c36-437d-81bc-395ce9b164b3\"},{\"id\":\"05e00a9c-829a-4e55-8b91-75052cae5a86\"},{\"id\":\"67f5260e-4dca-442d-bed5-f981f083538e\"}],\"margin\":0,\"align\":\"start\"}}]}}]]},\"code\":\"\\n    if ('event:button_click'.startsWith('event:')) {\\n      var value = true\\n    } else {\\n      var value = source['event:button_click'];\\n      value = value;\\n    }\\n    if (typeof value !== 'boolean' || source.labels !== ['Loading']) {\\n      value = true\\n    }\\n    var css_classes = target.css_classes.slice()\\n    var loading_css = ['pn-loading', 'pn-arc']\\n    if (value) {\\n      for (var css of loading_css) {\\n        if (!(css in css_classes)) {\\n          css_classes.push(css)\\n        }\\n      }\\n    } else {\\n     for (var css of loading_css) {\\n        var index = css_classes.indexOf(css)\\n        if (index > -1) {\\n          css_classes.splice(index, 1)\\n        }\\n      }\\n    }\\n    target['css_classes'] = css_classes\\n    \"}}]]]},\"subscribed_events\":{\"type\":\"set\",\"entries\":[\"button_click\"]},\"css_classes\":[\"solid\"],\"stylesheets\":[\"\\n:host(.pn-loading):before, .pn-loading:before {\\n  background-color: #c3c3c3;\\n  mask-size: auto calc(min(50%, 400px));\\n  -webkit-mask-size: auto calc(min(50%, 400px));\\n}\",{\"id\":\"3992db43-aa13-45f0-ae52-0b3dedb4a13f\"},{\"type\":\"object\",\"name\":\"ImportedStyleSheet\",\"id\":\"ff497def-a626-487d-ac66-27ee120afa86\",\"attributes\":{\"url\":\"https://cdn.holoviz.org/panel/1.7.5/dist/css/button.css\"}},{\"id\":\"05e00a9c-829a-4e55-8b91-75052cae5a86\"},{\"id\":\"67f5260e-4dca-442d-bed5-f981f083538e\"}],\"margin\":[5,10],\"align\":\"start\",\"label\":\"Chat!\"}}]}},{\"id\":\"8cafd355-1cdf-4871-a3fb-b0328afbe08a\"}]}},{\"type\":\"object\",\"name\":\"panel.models.comm_manager.CommManager\",\"id\":\"0d91f848-1a4a-46c1-b922-dec80114ead2\",\"attributes\":{\"plot_id\":\"2854d6b0-689d-4dc0-8861-1834489708e9\",\"comm_id\":\"41d672e5fcb242a9bb917968ac75750c\",\"client_comm_id\":\"2b8467e700a24f10837da0c3fe167c27\"}}],\"defs\":[{\"type\":\"model\",\"name\":\"ReactiveHTML1\"},{\"type\":\"model\",\"name\":\"FlexBox1\",\"properties\":[{\"name\":\"align_content\",\"kind\":\"Any\",\"default\":\"flex-start\"},{\"name\":\"align_items\",\"kind\":\"Any\",\"default\":\"flex-start\"},{\"name\":\"flex_direction\",\"kind\":\"Any\",\"default\":\"row\"},{\"name\":\"flex_wrap\",\"kind\":\"Any\",\"default\":\"wrap\"},{\"name\":\"gap\",\"kind\":\"Any\",\"default\":\"\"},{\"name\":\"justify_content\",\"kind\":\"Any\",\"default\":\"flex-start\"}]},{\"type\":\"model\",\"name\":\"FloatPanel1\",\"properties\":[{\"name\":\"config\",\"kind\":\"Any\",\"default\":{\"type\":\"map\"}},{\"name\":\"contained\",\"kind\":\"Any\",\"default\":true},{\"name\":\"position\",\"kind\":\"Any\",\"default\":\"right-top\"},{\"name\":\"offsetx\",\"kind\":\"Any\",\"default\":null},{\"name\":\"offsety\",\"kind\":\"Any\",\"default\":null},{\"name\":\"theme\",\"kind\":\"Any\",\"default\":\"primary\"},{\"name\":\"status\",\"kind\":\"Any\",\"default\":\"normalized\"}]},{\"type\":\"model\",\"name\":\"GridStack1\",\"properties\":[{\"name\":\"ncols\",\"kind\":\"Any\",\"default\":null},{\"name\":\"nrows\",\"kind\":\"Any\",\"default\":null},{\"name\":\"allow_resize\",\"kind\":\"Any\",\"default\":true},{\"name\":\"allow_drag\",\"kind\":\"Any\",\"default\":true},{\"name\":\"state\",\"kind\":\"Any\",\"default\":[]}]},{\"type\":\"model\",\"name\":\"drag1\",\"properties\":[{\"name\":\"slider_width\",\"kind\":\"Any\",\"default\":5},{\"name\":\"slider_color\",\"kind\":\"Any\",\"default\":\"black\"},{\"name\":\"value\",\"kind\":\"Any\",\"default\":50}]},{\"type\":\"model\",\"name\":\"click1\",\"properties\":[{\"name\":\"terminal_output\",\"kind\":\"Any\",\"default\":\"\"},{\"name\":\"debug_name\",\"kind\":\"Any\",\"default\":\"\"},{\"name\":\"clears\",\"kind\":\"Any\",\"default\":0}]},{\"type\":\"model\",\"name\":\"ReactiveESM1\",\"properties\":[{\"name\":\"esm_constants\",\"kind\":\"Any\",\"default\":{\"type\":\"map\"}}]},{\"type\":\"model\",\"name\":\"JSComponent1\",\"properties\":[{\"name\":\"esm_constants\",\"kind\":\"Any\",\"default\":{\"type\":\"map\"}}]},{\"type\":\"model\",\"name\":\"ReactComponent1\",\"properties\":[{\"name\":\"use_shadow_dom\",\"kind\":\"Any\",\"default\":true},{\"name\":\"esm_constants\",\"kind\":\"Any\",\"default\":{\"type\":\"map\"}}]},{\"type\":\"model\",\"name\":\"AnyWidgetComponent1\",\"properties\":[{\"name\":\"use_shadow_dom\",\"kind\":\"Any\",\"default\":true},{\"name\":\"esm_constants\",\"kind\":\"Any\",\"default\":{\"type\":\"map\"}}]},{\"type\":\"model\",\"name\":\"FastWrapper1\",\"properties\":[{\"name\":\"object\",\"kind\":\"Any\",\"default\":null},{\"name\":\"style\",\"kind\":\"Any\",\"default\":null}]},{\"type\":\"model\",\"name\":\"NotificationArea1\",\"properties\":[{\"name\":\"js_events\",\"kind\":\"Any\",\"default\":{\"type\":\"map\"}},{\"name\":\"max_notifications\",\"kind\":\"Any\",\"default\":5},{\"name\":\"notifications\",\"kind\":\"Any\",\"default\":[]},{\"name\":\"position\",\"kind\":\"Any\",\"default\":\"bottom-right\"},{\"name\":\"_clear\",\"kind\":\"Any\",\"default\":0},{\"name\":\"types\",\"kind\":\"Any\",\"default\":[{\"type\":\"map\",\"entries\":[[\"type\",\"warning\"],[\"background\",\"#ffc107\"],[\"icon\",{\"type\":\"map\",\"entries\":[[\"className\",\"fas fa-exclamation-triangle\"],[\"tagName\",\"i\"],[\"color\",\"white\"]]}]]},{\"type\":\"map\",\"entries\":[[\"type\",\"info\"],[\"background\",\"#007bff\"],[\"icon\",{\"type\":\"map\",\"entries\":[[\"className\",\"fas fa-info-circle\"],[\"tagName\",\"i\"],[\"color\",\"white\"]]}]]}]}]},{\"type\":\"model\",\"name\":\"Notification\",\"properties\":[{\"name\":\"background\",\"kind\":\"Any\",\"default\":null},{\"name\":\"duration\",\"kind\":\"Any\",\"default\":3000},{\"name\":\"icon\",\"kind\":\"Any\",\"default\":null},{\"name\":\"message\",\"kind\":\"Any\",\"default\":\"\"},{\"name\":\"notification_type\",\"kind\":\"Any\",\"default\":null},{\"name\":\"_rendered\",\"kind\":\"Any\",\"default\":false},{\"name\":\"_destroyed\",\"kind\":\"Any\",\"default\":false}]},{\"type\":\"model\",\"name\":\"TemplateActions1\",\"properties\":[{\"name\":\"open_modal\",\"kind\":\"Any\",\"default\":0},{\"name\":\"close_modal\",\"kind\":\"Any\",\"default\":0}]},{\"type\":\"model\",\"name\":\"BootstrapTemplateActions1\",\"properties\":[{\"name\":\"open_modal\",\"kind\":\"Any\",\"default\":0},{\"name\":\"close_modal\",\"kind\":\"Any\",\"default\":0}]},{\"type\":\"model\",\"name\":\"TemplateEditor1\",\"properties\":[{\"name\":\"layout\",\"kind\":\"Any\",\"default\":[]}]},{\"type\":\"model\",\"name\":\"MaterialTemplateActions1\",\"properties\":[{\"name\":\"open_modal\",\"kind\":\"Any\",\"default\":0},{\"name\":\"close_modal\",\"kind\":\"Any\",\"default\":0}]},{\"type\":\"model\",\"name\":\"request_value1\",\"properties\":[{\"name\":\"fill\",\"kind\":\"Any\",\"default\":\"none\"},{\"name\":\"_synced\",\"kind\":\"Any\",\"default\":null},{\"name\":\"_request_sync\",\"kind\":\"Any\",\"default\":0}]}]}};\n",
+       "  var render_items = [{\"docid\":\"105418f6-d8a0-46b1-b5dc-159664d83a93\",\"roots\":{\"2854d6b0-689d-4dc0-8861-1834489708e9\":\"c1b1b1c4-83d6-4552-84f0-96d758cd0128\"},\"root_ids\":[\"2854d6b0-689d-4dc0-8861-1834489708e9\"]}];\n",
+       "  var docs = Object.values(docs_json)\n",
+       "  if (!docs) {\n",
+       "    return\n",
+       "  }\n",
+       "  const py_version = docs[0].version.replace('rc', '-rc.').replace('.dev', '-dev.')\n",
+       "  async function embed_document(root) {\n",
+       "    var Bokeh = get_bokeh(root)\n",
+       "    await Bokeh.embed.embed_items_notebook(docs_json, render_items);\n",
+       "    for (const render_item of render_items) {\n",
+       "      for (const root_id of render_item.root_ids) {\n",
+       "\tconst id_el = document.getElementById(root_id)\n",
+       "\tif (id_el.children.length && id_el.children[0].hasAttribute('data-root-id')) {\n",
+       "\t  const root_el = id_el.children[0]\n",
+       "\t  root_el.id = root_el.id + '-rendered'\n",
+       "\t  for (const child of root_el.children) {\n",
+       "            // Ensure JupyterLab does not capture keyboard shortcuts\n",
+       "            // see: https://jupyterlab.readthedocs.io/en/4.1.x/extension/notebook.html#keyboard-interaction-model\n",
+       "\t    child.setAttribute('data-lm-suppress-shortcuts', 'true')\n",
+       "\t  }\n",
+       "\t}\n",
+       "      }\n",
+       "    }\n",
+       "  }\n",
+       "  function get_bokeh(root) {\n",
+       "    if (root.Bokeh === undefined) {\n",
+       "      return null\n",
+       "    } else if (root.Bokeh.version !== py_version) {\n",
+       "      if (root.Bokeh.versions === undefined || !root.Bokeh.versions.has(py_version)) {\n",
+       "\treturn null\n",
+       "      }\n",
+       "      return root.Bokeh.versions.get(py_version);\n",
+       "    } else if (root.Bokeh.version === py_version) {\n",
+       "      return root.Bokeh\n",
+       "    }\n",
+       "    return null\n",
+       "  }\n",
+       "  function is_loaded(root) {\n",
+       "    var Bokeh = get_bokeh(root)\n",
+       "    return (Bokeh != null && Bokeh.Panel !== undefined)\n",
+       "  }\n",
+       "  if (is_loaded(root)) {\n",
+       "    embed_document(root);\n",
+       "  } else {\n",
+       "    var attempts = 0;\n",
+       "    var timer = setInterval(function(root) {\n",
+       "      if (is_loaded(root)) {\n",
+       "        clearInterval(timer);\n",
+       "        embed_document(root);\n",
+       "      } else if (document.readyState == \"complete\") {\n",
+       "        attempts++;\n",
+       "        if (attempts > 200) {\n",
+       "          clearInterval(timer);\n",
+       "\t  var Bokeh = get_bokeh(root)\n",
+       "\t  if (Bokeh == null || Bokeh.Panel == null) {\n",
+       "            console.warn(\"Panel: ERROR: Unable to run Panel code because Bokeh or Panel library is missing\");\n",
+       "\t  } else {\n",
+       "\t    console.warn(\"Panel: WARNING: Attempting to render but not all required libraries could be resolved.\")\n",
+       "\t    embed_document(root)\n",
+       "\t  }\n",
+       "        }\n",
+       "      }\n",
+       "    }, 25, root)\n",
+       "  }\n",
+       "})(window);</script>"
+      ],
+      "text/plain": [
+       "Column\n",
+       "    [0] TextInput(placeholder='Enter text here…')\n",
+       "    [1] Row\n",
+       "        [0] Button(name='Chat!')\n",
+       "    [2] ParamFunction(function, _pane=Column, defer_load=False, height=300, loading_indicator=True, sizing_mode='fixed', width=300)"
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {
+      "application/vnd.holoviews_exec.v0+json": {
+       "id": "2854d6b0-689d-4dc0-8861-1834489708e9"
+      }
+     },
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import panel as pn  # GUI\n",
+    "pn.extension()\n",
+    "\n",
+    "panels = [] # collect display \n",
+    "context = [ ]  # accumulate messages\n",
+    "\n",
+    "\n",
+    "def collect_messages(_):\n",
+    "    prompt = inp.value_input\n",
+    "    inp.value = ''\n",
+    "    if (not prompt):\n",
+    "        return pn.Column(*panels)\n",
+    "\n",
+    "    response = get_response(prompt)\n",
+    "    context.append({'role':'user', 'content':f\"{prompt}\"})\n",
+    "    context.append({'role':'assistant', 'content':f\"{response}\"})\n",
+    "    panels.append(\n",
+    "        pn.Row('User:', pn.pane.Markdown(prompt, width=600)))\n",
+    "    panels.append(\n",
+    "        pn.Row('Assistant:', pn.pane.Markdown(response, width=600)))\n",
+    " \n",
+    "    return pn.Column(*panels)\n",
+    "\n",
+    "\n",
+    "inp = pn.widgets.TextInput(value=\"Hi\", placeholder='Enter text here…')\n",
+    "button_conversation = pn.widgets.Button(name=\"Chat!\")\n",
+    "interactive_conversation = pn.bind(collect_messages, button_conversation)\n",
+    "dashboard = pn.Column(\n",
+    "    inp,\n",
+    "    pn.Row(button_conversation),\n",
+    "    pn.panel(interactive_conversation, loading_indicator=True, height=300, width=300),\n",
+    ")\n",
+    "\n",
+    "dashboard"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.11"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/download_models.py b/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/download_models.py
new file mode 100644
index 000000000..69529726b
--- /dev/null
+++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/download_models.py
@@ -0,0 +1,22 @@
+import sys
+from huggingface_hub import snapshot_download
+
+# --- Model Download ---
+if __name__ == "__main__":
+    # List your desired Hugging Face model names here
+    model_names = [
+        "Qwen/Qwen3-Embedding-0.6B",
+    ]
+
+    for model_name in model_names:
+        print(f"--- Downloading {model_name} ---")
+        try:
+            if len(sys.argv) > 1:
+                snapshot_download(repo_id=model_name, cache_dir=sys.argv[0])
+            else:
+                snapshot_download(repo_id=model_name)
+            print(f"Successfully cached {model_name}")
+        except Exception as e:
+            print(f"Failed to download {model_name}. Error: {e}")
+
+    print("--- Model download process finished. ---")
diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/multi-agent-ollama.ipynb b/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/multi-agent-ollama.ipynb
new file mode 100644
index 000000000..a25cfbe2a
--- /dev/null
+++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/multi-agent-ollama.ipynb
@@ -0,0 +1,525 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "079fadd2-200e-4d37-8ae2-be2792e3a24e",
+   "metadata": {},
+   "source": [
+    "### Cell 1 - Install Ollama and verify environment\n",
+    "\n",
+    "Installs Ollama for local model serving, sets up environment variables, and verifies the installation."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "79db57cd-fb72-4b10-b0fb-5e9cd5c007b6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!pip -q install ollama requests --disable-pip-version-check\n",
+    "\n",
+    "import os, subprocess, time, json, requests\n",
+    "from pathlib import Path\n",
+    "\n",
+    "os.environ['OLLAMA_HOST'] = os.getenv('OLLAMA_HOST', 'http://ai-starter-kit-ollama:11434')\n",
+    "MODEL_NAME = \"qwen2.5:1.5b\"\n",
+    "MLFLOW_URI = os.getenv(\"MLFLOW_TRACKING_URI\", \"http://ai-starter-kit-mlflow:5000\")\n",
+    "\n",
+    "OLLAMA_HOST = os.environ['OLLAMA_HOST']\n",
+    "\n",
+    "print(\"Environment Configuration:\")\n",
+    "print(\"Ollama Host:\", OLLAMA_HOST)\n",
+    "print(\"Model:      \", MODEL_NAME)\n",
+    "print(\"MLflow:     \", MLFLOW_URI)\n",
+    "print(\"-\" * 60)\n",
+    "\n",
+    "try:\n",
+    "    r = requests.get(f\"{OLLAMA_HOST}/api/version\", timeout=5)\n",
+    "    print(\"Ollama version:\", r.json())\n",
+    "except Exception as e:\n",
+    "    print(\"Note: Ollama service not running. Starting it in next cell...\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "fe862173-fd9a-41ae-a27b-63875f788024",
+   "metadata": {},
+   "source": [
+    "### Cell 2 - Start Ollama service and pull model\n",
+    "\n",
+    "Starts the Ollama service if not running, pulls the Qwen 2.5 1.5B model, and verifies it's ready."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "34da3e26-6276-48b7-b3ac-c90359df6547",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import subprocess, time, requests, os\n",
+    "\n",
+    "OLLAMA_HOST = os.environ.get('OLLAMA_HOST', 'http://ai-starter-kit-ollama:11434')\n",
+    "MODEL_NAME = \"qwen2.5:1.5b\"\n",
+    "\n",
+    "def check_ollama():\n",
+    "    try:\n",
+    "        r = requests.get(f\"{OLLAMA_HOST}/api/tags\", timeout=2)\n",
+    "        return r.status_code == 200\n",
+    "    except:\n",
+    "        return False\n",
+    "\n",
+    "if not check_ollama() and OLLAMA_HOST.startswith(\"http://ai-starter-kit-ollama\"):\n",
+    "    print(\"Starting Ollama service...\")\n",
+    "    try:\n",
+    "        subprocess.Popen([\"ollama\", \"serve\"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)\n",
+    "        time.sleep(3)\n",
+    "    except Exception as e:\n",
+    "        print(f\"Could not start Ollama automatically: {e}\")\n",
+    "        print(\"Please start Ollama manually with: ollama serve\")\n",
+    "\n",
+    "if check_ollama():\n",
+    "    print(\"Ollama service is running\")\n",
+    "    \n",
+    "    print(f\"\\nPulling model {MODEL_NAME}...\")\n",
+    "    try:\n",
+    "        r = requests.get(f\"{OLLAMA_HOST}/api/tags\")\n",
+    "        models = r.json().get('models', [])\n",
+    "        model_exists = any(m.get('name') == MODEL_NAME for m in models)\n",
+    "        \n",
+    "        if not model_exists:\n",
+    "            pull_data = {\"name\": MODEL_NAME}\n",
+    "            r = requests.post(f\"{OLLAMA_HOST}/api/pull\", json=pull_data, stream=True)\n",
+    "            for line in r.iter_lines():\n",
+    "                if line:\n",
+    "                    try:\n",
+    "                        status = json.loads(line)\n",
+    "                        if 'status' in status:\n",
+    "                            print(f\"  {status['status']}\", end='\\r')\n",
+    "                    except:\n",
+    "                        pass\n",
+    "            print(f\"\\nModel {MODEL_NAME} pulled successfully\")\n",
+    "        else:\n",
+    "            print(f\"Model {MODEL_NAME} already available\")\n",
+    "    except Exception as e:\n",
+    "        print(f\"Error pulling model: {e}\")\n",
+    "else:\n",
+    "    print(\"Warning: Ollama service is not running\")\n",
+    "    print(\"Please ensure Ollama is installed and running\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "8111d705-595e-4e65-8479-bdc76191fa31",
+   "metadata": {},
+   "source": [
+    "### Cell 3 - Create OpenAI-compatible API wrapper\n",
+    "\n",
+    "Sets up a simple FastAPI server that wraps Ollama with an OpenAI-compatible API, including MLflow tracking."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "bbea1539-e9ab-460a-9cfc-20a42807f616",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!pip -q install fastapi uvicorn mlflow --disable-pip-version-check\n",
+    "\n",
+    "import os, subprocess, time, json, requests, threading\n",
+    "from pathlib import Path\n",
+    "\n",
+    "api_wrapper_code = '''\n",
+    "import os, time, uuid, requests, json\n",
+    "from fastapi import FastAPI, Request\n",
+    "from fastapi.responses import JSONResponse\n",
+    "import uvicorn\n",
+    "\n",
+    "USE_MLFLOW = False\n",
+    "try:\n",
+    "    import mlflow\n",
+    "    mlflow_uri = os.getenv(\"MLFLOW_TRACKING_URI\")\n",
+    "    if mlflow_uri:\n",
+    "        mlflow.set_tracking_uri(mlflow_uri)\n",
+    "        mlflow.set_experiment(\"ollama-llm\")\n",
+    "        USE_MLFLOW = True\n",
+    "except:\n",
+    "    pass\n",
+    "\n",
+    "app = FastAPI()\n",
+    "OLLAMA_HOST = os.getenv(\"OLLAMA_HOST\", \"http://ai-starter-kit-ollama:11434\")\n",
+    "MODEL_NAME = os.getenv(\"MODEL_NAME\", \"qwen2.5:1.5b\")\n",
+    "\n",
+    "@app.get(\"/v1/healthz\")\n",
+    "async def health():\n",
+    "    return {\"status\": \"ok\", \"model\": MODEL_NAME}\n",
+    "\n",
+    "@app.post(\"/v1/chat/completions\")\n",
+    "async def chat_completions(request: Request):\n",
+    "    t0 = time.time()\n",
+    "    body = await request.json()\n",
+    "    \n",
+    "    messages = body.get(\"messages\", [])\n",
+    "    temperature = body.get(\"temperature\", 0.7)\n",
+    "    max_tokens = body.get(\"max_tokens\", 256)\n",
+    "    \n",
+    "    # Call Ollama API\n",
+    "    ollama_payload = {\n",
+    "        \"model\": MODEL_NAME,\n",
+    "        \"messages\": messages,\n",
+    "        \"stream\": False,\n",
+    "        \"options\": {\n",
+    "            \"temperature\": temperature,\n",
+    "            \"num_predict\": max_tokens\n",
+    "        }\n",
+    "    }\n",
+    "    \n",
+    "    try:\n",
+    "        r = requests.post(f\"{OLLAMA_HOST}/api/chat\", json=ollama_payload, timeout=120)\n",
+    "        r.raise_for_status()\n",
+    "        ollama_response = r.json()\n",
+    "        \n",
+    "        content = ollama_response.get(\"message\", {}).get(\"content\", \"\")\n",
+    "        prompt_tokens = len(\" \".join(m.get(\"content\", \"\") for m in messages).split())\n",
+    "        completion_tokens = len(content.split())\n",
+    "        \n",
+    "        if USE_MLFLOW:\n",
+    "            try:\n",
+    "                with mlflow.start_run():\n",
+    "                    mlflow.log_params({\n",
+    "                        \"temperature\": temperature,\n",
+    "                        \"max_tokens\": max_tokens,\n",
+    "                        \"model\": MODEL_NAME\n",
+    "                    })\n",
+    "                    mlflow.log_metrics({\n",
+    "                        \"duration_ms\": int((time.time() - t0) * 1000),\n",
+    "                        \"prompt_tokens_approx\": prompt_tokens,\n",
+    "                        \"completion_tokens_approx\": completion_tokens,\n",
+    "                        \"total_tokens_approx\": prompt_tokens + completion_tokens\n",
+    "                    })\n",
+    "            except:\n",
+    "                pass\n",
+    "        \n",
+    "        return {\n",
+    "            \"id\": \"chatcmpl-\" + uuid.uuid4().hex[:8],\n",
+    "            \"object\": \"chat.completion\",\n",
+    "            \"created\": int(time.time()),\n",
+    "            \"model\": MODEL_NAME,\n",
+    "            \"choices\": [{\n",
+    "                \"index\": 0,\n",
+    "                \"message\": {\"role\": \"assistant\", \"content\": content},\n",
+    "                \"finish_reason\": \"stop\"\n",
+    "            }],\n",
+    "            \"usage\": {\n",
+    "                \"prompt_tokens\": prompt_tokens,\n",
+    "                \"completion_tokens\": completion_tokens,\n",
+    "                \"total_tokens\": prompt_tokens + completion_tokens\n",
+    "            }\n",
+    "        }\n",
+    "    except Exception as e:\n",
+    "        return JSONResponse(status_code=500, content={\"error\": str(e)})\n",
+    "\n",
+    "if __name__ == \"__main__\":\n",
+    "    uvicorn.run(app, host=\"0.0.0.0\", port=8000)\n",
+    "'''\n",
+    "\n",
+    "with open('/tmp/ollama_wrapper.py', 'w') as f:\n",
+    "    f.write(api_wrapper_code)\n",
+    "\n",
+    "!pkill -f ollama_wrapper.py 2>/dev/null || true\n",
+    "\n",
+    "env_vars = f\"\"\"\n",
+    "export OLLAMA_HOST=\"{os.getenv('OLLAMA_HOST', 'http://ai-starter-kit-ollama:11434')}\"\n",
+    "export MODEL_NAME=\"qwen2.5:1.5b\"\n",
+    "export MLFLOW_TRACKING_URI=\"{os.getenv('MLFLOW_TRACKING_URI', 'http://ai-starter-kit-mlflow:5000')}\"\n",
+    "\"\"\"\n",
+    "\n",
+    "!echo '{env_vars}' > /tmp/env_vars.sh\n",
+    "!bash -c 'source /tmp/env_vars.sh && nohup python /tmp/ollama_wrapper.py > /tmp/wrapper.log 2>&1 &'\n",
+    "\n",
+    "print(\"Starting API wrapper...\")\n",
+    "for i in range(30):\n",
+    "    time.sleep(1)\n",
+    "    try:\n",
+    "        r = requests.get(\"http://localhost:8000/v1/healthz\", timeout=1)\n",
+    "        if r.status_code == 200:\n",
+    "            print(\"API Status:\", r.json())\n",
+    "            print(f\"\\nOpenAI-compatible API running at: http://localhost:8000/v1\")\n",
+    "            print(f\"Health: http://localhost:8000/v1/healthz\")\n",
+    "            print(f\"Chat:   http://localhost:8000/v1/chat/completions\")\n",
+    "            break\n",
+    "    except:\n",
+    "        if i % 5 == 0:\n",
+    "            print(f\"  Waiting for API to start... ({i}s)\")\n",
+    "        continue\n",
+    "else:\n",
+    "    print(\"\\nAPI wrapper failed to start. Checking logs:\")\n",
+    "    !tail -20 /tmp/wrapper.log\n",
+    "    print(\"\\nYou can still use direct Ollama API in the next cells.\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a411c015-c802-4ca1-81bb-3f4790d9626a",
+   "metadata": {},
+   "source": [
+    "### Cell 4 - Basic client + latency test\n",
+    "\n",
+    "Tests the OpenAI-compatible API with a simple chat request and measures latency."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3be634e2-a82f-42c9-8e31-57e6868a86ee",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os, time, requests, json\n",
+    "\n",
+    "USE_WRAPPER = True\n",
+    "BASE_URL = \"http://localhost:8000/v1\" if USE_WRAPPER else os.getenv(\"OLLAMA_HOST\", \"http://ai-starter-kit-ollama:11434\")\n",
+    "\n",
+    "def health():\n",
+    "    if USE_WRAPPER:\n",
+    "        r = requests.get(f\"{BASE_URL}/healthz\", timeout=10)\n",
+    "        print(\"Health:\", r.status_code, r.json())\n",
+    "    else:\n",
+    "        r = requests.get(f\"{BASE_URL}/api/tags\", timeout=10)\n",
+    "        print(\"Health:\", r.status_code, \"Models available:\", len(r.json().get('models', [])))\n",
+    "\n",
+    "def chat(prompt, temperature=0.4, max_tokens=220):\n",
+    "    if USE_WRAPPER:\n",
+    "        body = {\n",
+    "            \"model\": \"qwen2.5:1.5b\",\n",
+    "            \"temperature\": temperature,\n",
+    "            \"max_tokens\": max_tokens,\n",
+    "            \"messages\": [\n",
+    "                {\"role\": \"system\", \"content\": \"You are a helpful assistant. Be concise.\"},\n",
+    "                {\"role\": \"user\", \"content\": prompt},\n",
+    "            ]\n",
+    "        }\n",
+    "        endpoint = f\"{BASE_URL}/chat/completions\"\n",
+    "    else:\n",
+    "        body = {\n",
+    "            \"model\": \"qwen2.5:1.5b\",\n",
+    "            \"messages\": [\n",
+    "                {\"role\": \"system\", \"content\": \"You are a helpful assistant. Be concise.\"},\n",
+    "                {\"role\": \"user\", \"content\": prompt},\n",
+    "            ],\n",
+    "            \"stream\": False,\n",
+    "            \"options\": {\n",
+    "                \"temperature\": temperature,\n",
+    "                \"num_predict\": max_tokens\n",
+    "            }\n",
+    "        }\n",
+    "        endpoint = f\"{BASE_URL}/api/chat\"\n",
+    "    \n",
+    "    t0 = time.time()\n",
+    "    r = requests.post(endpoint, json=body, timeout=120)\n",
+    "    dt = time.time() - t0\n",
+    "    r.raise_for_status()\n",
+    "    \n",
+    "    if USE_WRAPPER:\n",
+    "        response = r.json()\n",
+    "        content = response[\"choices\"][0][\"message\"][\"content\"]\n",
+    "        usage = response.get(\"usage\", {})\n",
+    "    else:\n",
+    "        response = r.json()\n",
+    "        content = response.get(\"message\", {}).get(\"content\", \"\")\n",
+    "        usage = {\"total_tokens\": \"estimated: \" + str(len(content.split()) + len(prompt.split()))}\n",
+    "    \n",
+    "    print(f\"\\nLatency: {dt:.2f}s  | usage: {usage}\")\n",
+    "    print(\"\\n---\\n\", content)\n",
+    "    return content\n",
+    "\n",
+    "health()\n",
+    "_ = chat(\"Say 'test ok' then give me one short fun fact about llamas.\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "553d2756-8949-43e3-8342-71387688e0fa",
+   "metadata": {},
+   "source": [
+    "### Cell 5 - Multi-agent pipeline\n",
+    "\n",
+    "Implements a simple three-agent workflow (Researcher -> Writer -> Critic) using the local LLM."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0f6713f3-8b60-40b2-ad3c-ebf6db4f66e1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os, requests, json, time\n",
+    "\n",
+    "BASE_URL = \"http://localhost:8000/v1\"  \n",
+    "OLLAMA_DIRECT = os.getenv(\"OLLAMA_HOST\", \"http://ai-starter-kit-ollama:11434\")\n",
+    "\n",
+    "def call_llm(role_prompt, user_message, temperature=0.4, max_tokens=150, use_wrapper=True):\n",
+    "    if use_wrapper:\n",
+    "        body = {\n",
+    "            \"model\": \"qwen2.5:1.5b\",\n",
+    "            \"temperature\": temperature,\n",
+    "            \"max_tokens\": max_tokens,\n",
+    "            \"messages\": [\n",
+    "                {\"role\": \"system\", \"content\": role_prompt},\n",
+    "                {\"role\": \"user\", \"content\": user_message}\n",
+    "            ]\n",
+    "        }\n",
+    "        try:\n",
+    "            r = requests.post(f\"{BASE_URL}/chat/completions\", json=body, timeout=120)\n",
+    "            r.raise_for_status()\n",
+    "            return r.json()[\"choices\"][0][\"message\"][\"content\"]\n",
+    "        except Exception as e:\n",
+    "            return f\"Error: {e}\"\n",
+    "    else:\n",
+    "        body = {\n",
+    "            \"model\": \"qwen2.5:1.5b\",\n",
+    "            \"messages\": [\n",
+    "                {\"role\": \"system\", \"content\": role_prompt},\n",
+    "                {\"role\": \"user\", \"content\": user_message}\n",
+    "            ],\n",
+    "            \"stream\": False,\n",
+    "            \"options\": {\n",
+    "                \"temperature\": temperature,\n",
+    "                \"num_predict\": max_tokens\n",
+    "            }\n",
+    "        }\n",
+    "        try:\n",
+    "            r = requests.post(f\"{OLLAMA_DIRECT}/api/chat\", json=body, timeout=120)\n",
+    "            r.raise_for_status()\n",
+    "            return r.json().get(\"message\", {}).get(\"content\", \"\")\n",
+    "        except Exception as e:\n",
+    "            return f\"Error: {e}\"\n",
+    "\n",
+    "print(\"=\" * 60)\n",
+    "print(\"Running Multi-Agent Workflow with Ollama\")\n",
+    "print(\"=\" * 60)\n",
+    "\n",
+    "task = \"Research the latest advancements in quantum computing as of 2025.\"\n",
+    "\n",
+    "try:\n",
+    "    r = requests.get(f\"{BASE_URL}/healthz\", timeout=2)\n",
+    "    use_wrapper = r.status_code == 200\n",
+    "    print(\"Using: OpenAI-compatible wrapper\\n\")\n",
+    "except:\n",
+    "    use_wrapper = False\n",
+    "    print(\"Using: Direct Ollama API\\n\")\n",
+    "\n",
+    "print(\"1. RESEARCHER:\")\n",
+    "print(\"-\" * 40)\n",
+    "research_prompt = \"You are a researcher. Provide 3-4 key facts about the topic. Be concise and factual.\"\n",
+    "research_notes = call_llm(research_prompt, task, temperature=0.35, max_tokens=140, use_wrapper=use_wrapper)\n",
+    "print(research_notes)\n",
+    "time.sleep(1)\n",
+    "\n",
+    "print(\"\\n2. WRITER:\")\n",
+    "print(\"-\" * 40)\n",
+    "writer_prompt = \"You are a technical writer. Based on the following notes, write a brief report.\"\n",
+    "writer_task = f\"Write a report based on these notes:\\n{research_notes}\"\n",
+    "report = call_llm(writer_prompt, writer_task, temperature=0.55, max_tokens=220, use_wrapper=use_wrapper)\n",
+    "print(report)\n",
+    "time.sleep(1)\n",
+    "\n",
+    "print(\"\\n3. CRITIC/EDITOR:\")\n",
+    "print(\"-\" * 40)\n",
+    "critic_prompt = \"You are an editor. Review the report and provide a final polished version.\"\n",
+    "critic_task = f\"Review and improve this report:\\n{report}\"\n",
+    "final_output = call_llm(critic_prompt, critic_task, temperature=0.45, max_tokens=160, use_wrapper=use_wrapper)\n",
+    "print(final_output)\n",
+    "\n",
+    "print(\"\\n\" + \"=\" * 60)\n",
+    "print(\"Multi-agent workflow complete\")\n",
+    "print(\"=\" * 60)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "0af596cf-5ba6-42df-a030-61d7a20d6f7b",
+   "metadata": {},
+   "source": [
+    "### Cell 6 - MLFlow: connect to tracking server and list recent runs\n",
+    "\n",
+    "Connects to MLflow tracking server and displays recent model inference runs with metrics."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "03a1b042-04df-4cd0-9099-4cc763ecfe9d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!pip -q install mlflow==2.14.3 --disable-pip-version-check\n",
+    "\n",
+    "import os, mlflow\n",
+    "from datetime import datetime\n",
+    "\n",
+    "tracking_uri = os.getenv(\"MLFLOW_TRACKING_URI\", \"http://ai-starter-kit-mlflow:5000\")\n",
+    "mlflow.set_tracking_uri(tracking_uri)\n",
+    "print(f\"MLflow Tracking URI: {tracking_uri}\")\n",
+    "\n",
+    "exp_name = \"ollama-llm\"\n",
+    "exp = mlflow.set_experiment(exp_name)\n",
+    "print(f\"Experiment: {exp.name} (ID: {exp.experiment_id})\")\n",
+    "print(\"-\" * 60)\n",
+    "\n",
+    "client = mlflow.tracking.MlflowClient()\n",
+    "runs = client.search_runs(\n",
+    "    exp.experiment_id,\n",
+    "    order_by=[\"attributes.start_time DESC\"],\n",
+    "    max_results=10\n",
+    ")\n",
+    "\n",
+    "if not runs:\n",
+    "    print(\"No runs found. Run cells 4 or 5 first to generate inference requests.\")\n",
+    "else:\n",
+    "    print(f\"\\nFound {len(runs)} recent runs:\")\n",
+    "    print(\"-\" * 60)\n",
+    "    \n",
+    "    for i, run in enumerate(runs, 1):\n",
+    "        start_time = datetime.fromtimestamp(run.info.start_time/1000).strftime('%Y-%m-%d %H:%M:%S')\n",
+    "        duration = run.data.metrics.get('duration_ms', 'N/A')\n",
+    "        temp = run.data.params.get('temperature', 'N/A')\n",
+    "        max_tokens = run.data.params.get('max_tokens', 'N/A')\n",
+    "        total_tokens = run.data.metrics.get('total_tokens_approx', 'N/A')\n",
+    "        \n",
+    "        print(f\"\\nRun {i}:\")\n",
+    "        print(f\"  ID:          {run.info.run_id[:12]}...\")\n",
+    "        print(f\"  Time:        {start_time}\")\n",
+    "        print(f\"  Status:      {run.info.status}\")\n",
+    "        print(f\"  Temperature: {temp}\")\n",
+    "        print(f\"  Max Tokens:  {max_tokens}\")\n",
+    "        print(f\"  Duration:    {duration} ms\")\n",
+    "        print(f\"  Total Tokens: {total_tokens}\")\n",
+    "    \n",
+    "    print(\"\\n\" + \"=\" * 60)\n",
+    "    print(\"SUMMARY:\")\n",
+    "    successful = sum(1 for r in runs if r.info.status == 'FINISHED')\n",
+    "    durations = [r.data.metrics.get('duration_ms', 0) for r in runs if r.data.metrics.get('duration_ms')]\n",
+    "    avg_duration = sum(durations) / len(durations) if durations else 0\n",
+    "    \n",
+    "    print(f\"  Total Runs: {len(runs)}\")\n",
+    "    print(f\"  Successful: {successful}\")\n",
+    "    print(f\"  Failed: {len(runs) - successful}\")\n",
+    "    print(f\"  Avg Duration: {avg_duration:.1f} ms\" if avg_duration else \"  Avg Duration: N/A\")\n",
+    "\n",
+    "print(\"\\n\" + \"=\" * 60)\n",
+    "print(\"MLflow verification complete\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "language_info": {
+   "name": "python"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/multi-agent-ramalama.ipynb b/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/multi-agent-ramalama.ipynb
new file mode 100644
index 000000000..07aff13cc
--- /dev/null
+++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/multi-agent-ramalama.ipynb
@@ -0,0 +1,466 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "079fadd2-200e-4d37-8ae2-be2792e3a24e",
+   "metadata": {},
+   "source": [
+    "### Cell 1 - Install RamaLama and verify environment\n",
+    "\n",
+    "Installs RamaLama for local model serving, sets up environment variables, and verifies the installation."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "79db57cd-fb72-4b10-b0fb-5e9cd5c007b6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!pip -q install requests --disable-pip-version-check\n",
+    "\n",
+    "import os, time, json, requests\n",
+    "from pathlib import Path\n",
+    "\n",
+    "os.environ['RAMALAMA_HOST'] = 'http://ai-starter-kit-ramalama:8080'\n",
+    "MODEL_NAME = \"qwen2.5:1.5b\"\n",
+    "MLFLOW_URI = os.getenv(\"MLFLOW_TRACKING_URI\", \"http://ai-starter-kit-mlflow:5000\")\n",
+    "\n",
+    "RAMALAMA_HOST = os.environ['RAMALAMA_HOST']\n",
+    "\n",
+    "print(\"Environment Configuration:\")\n",
+    "print(\"RamaLama Host:\", RAMALAMA_HOST)\n",
+    "print(\"Model:      \", MODEL_NAME)\n",
+    "print(\"MLflow:     \", MLFLOW_URI)\n",
+    "print(\"-\" * 60)\n",
+    "\n",
+    "try:\n",
+    "    r = requests.get(f\"{RAMALAMA_HOST}/v1/models\", timeout=5)\n",
+    "    print(\"RamaLama models:\", r.json())\n",
+    "except Exception as e:\n",
+    "    print(f\"Error connecting to RamaLama: {e}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "fe862173-fd9a-41ae-a27b-63875f788024",
+   "metadata": {},
+   "source": [
+    "### Cell 2 - Start RamaLama service and pull model\n",
+    "\n",
+    "Starts the RamaLama service if not running, pulls the Qwen 2.5 1.5B model, and verifies it's ready."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "34da3e26-6276-48b7-b3ac-c90359df6547",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import requests, os, json\n",
+    "\n",
+    "RAMALAMA_HOST = os.environ.get('RAMALAMA_HOST', 'http://ai-starter-kit-ramalama:8080')\n",
+    "MODEL_NAME = \"qwen2.5:1.5b\"\n",
+    "\n",
+    "def check_ramalama():\n",
+    "    try:\n",
+    "        r = requests.get(f\"{RAMALAMA_HOST}/v1/models\", timeout=2)\n",
+    "        return r.status_code == 200\n",
+    "    except:\n",
+    "        return False\n",
+    "\n",
+    "if check_ramalama():\n",
+    "    print(\"RamaLama service is running\")\n",
+    "    \n",
+    "    try:\n",
+    "        r = requests.get(f\"{RAMALAMA_HOST}/v1/models\")\n",
+    "        models = r.json().get('data', [])\n",
+    "        model_exists = any(m.get('id') == MODEL_NAME for m in models) \n",
+    "        if model_exists:\n",
+    "            print(f\"Model {MODEL_NAME} already available\")\n",
+    "        else:\n",
+    "            print(f\"Model {MODEL_NAME} not found; ensure it's pulled in the deployment\")\n",
+    "    except Exception as e:\n",
+    "        print(f\"Error checking model: {e}\")\n",
+    "else:\n",
+    "    print(\"Warning: RamaLama service is not running\")\n",
+    "    print(\"Please ensure the deployment is healthy\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "8111d705-595e-4e65-8479-bdc76191fa31",
+   "metadata": {},
+   "source": [
+    "### Cell 3 - Create OpenAI-compatible API wrapper\n",
+    "\n",
+    "Sets up a simple FastAPI server that wraps RamaLama with an OpenAI-compatible API, including MLflow tracking. Since RamaLama already provides OpenAI compatibility, this acts as a proxy with logging."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "bbea1539-e9ab-460a-9cfc-20a42807f616",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!pip -q install fastapi uvicorn mlflow --disable-pip-version-check\n",
+    "\n",
+    "import os, threading, time, json\n",
+    "from pathlib import Path\n",
+    "\n",
+    "api_wrapper_code = '''\n",
+    "import os, time, uuid, requests, json\n",
+    "from fastapi import FastAPI, Request\n",
+    "from fastapi.responses import JSONResponse\n",
+    "import uvicorn\n",
+    "\n",
+    "USE_MLFLOW = False\n",
+    "try:\n",
+    "    import mlflow\n",
+    "    mlflow_uri = os.getenv(\"MLFLOW_TRACKING_URI\")\n",
+    "    if mlflow_uri:\n",
+    "        mlflow.set_tracking_uri(mlflow_uri)\n",
+    "        mlflow.set_experiment(\"ramalama-llm\")\n",
+    "        USE_MLFLOW = True\n",
+    "except:\n",
+    "    pass\n",
+    "\n",
+    "app = FastAPI()\n",
+    "RAMALAMA_HOST = os.getenv(\"RAMALAMA_HOST\", \"http://127.0.0.1:8080\")\n",
+    "MODEL_NAME = os.getenv(\"MODEL_NAME\", \"qwen2.5:1.5b\")\n",
+    "\n",
+    "@app.get(\"/v1/healthz\")\n",
+    "async def health():\n",
+    "    return {\"status\": \"ok\", \"model\": MODEL_NAME}\n",
+    "\n",
+    "@app.post(\"/v1/chat/completions\")\n",
+    "async def chat_completions(request: Request):\n",
+    "    t0 = time.time()\n",
+    "    body = await request.json()\n",
+    "    \n",
+    "    messages = body.get(\"messages\", [])\n",
+    "    temperature = body.get(\"temperature\", 0.7)\n",
+    "    max_tokens = body.get(\"max_tokens\", 256)\n",
+    "    \n",
+    "    payload = {\n",
+    "        \"model\": MODEL_NAME,\n",
+    "        \"messages\": messages,\n",
+    "        \"temperature\": temperature,\n",
+    "        \"max_tokens\": max_tokens,\n",
+    "        \"stream\": False\n",
+    "    }\n",
+    "    \n",
+    "    try:\n",
+    "        r = requests.post(f\"{RAMALAMA_HOST}/v1/chat/completions\", json=payload, timeout=120)\n",
+    "        r.raise_for_status()\n",
+    "        response = r.json()\n",
+    "        \n",
+    "        content = response[\"choices\"][0][\"message\"][\"content\"]\n",
+    "        usage = response.get(\"usage\", {})\n",
+    "        prompt_tokens = usage.get(\"prompt_tokens\", len(\" \".join(m.get(\"content\", \"\") for m in messages).split()))\n",
+    "        completion_tokens = usage.get(\"completion_tokens\", len(content.split()))\n",
+    "        total_tokens = prompt_tokens + completion_tokens\n",
+    "        \n",
+    "        if USE_MLFLOW:\n",
+    "            try:\n",
+    "                with mlflow.start_run():\n",
+    "                    mlflow.log_params({\n",
+    "                        \"temperature\": temperature,\n",
+    "                        \"max_tokens\": max_tokens,\n",
+    "                        \"model\": MODEL_NAME\n",
+    "                    })\n",
+    "                    mlflow.log_metrics({\n",
+    "                        \"duration_ms\": int((time.time() - t0) * 1000),\n",
+    "                        \"prompt_tokens\": prompt_tokens,\n",
+    "                        \"completion_tokens\": completion_tokens,\n",
+    "                        \"total_tokens\": total_tokens\n",
+    "                    })\n",
+    "            except:\n",
+    "                pass\n",
+    "        \n",
+    "        return {\n",
+    "            \"id\": \"chatcmpl-\" + uuid.uuid4().hex[:8],\n",
+    "            \"object\": \"chat.completion\",\n",
+    "            \"created\": int(time.time()),\n",
+    "            \"model\": MODEL_NAME,\n",
+    "            \"choices\": [{\n",
+    "                \"index\": 0,\n",
+    "                \"message\": {\"role\": \"assistant\", \"content\": content},\n",
+    "                \"finish_reason\": \"stop\"\n",
+    "            }],\n",
+    "            \"usage\": {\n",
+    "                \"prompt_tokens\": prompt_tokens,\n",
+    "                \"completion_tokens\": completion_tokens,\n",
+    "                \"total_tokens\": total_tokens\n",
+    "            }\n",
+    "        }\n",
+    "    except Exception as e:\n",
+    "        return JSONResponse(status_code=500, content={\"error\": str(e)})\n",
+    "\n",
+    "if __name__ == \"__main__\":\n",
+    "    uvicorn.run(app, host=\"0.0.0.0\", port=8000)\n",
+    "'''\n",
+    "\n",
+    "with open('/tmp/ramalama_wrapper.py', 'w') as f:\n",
+    "    f.write(api_wrapper_code)\n",
+    "\n",
+    "def run_api():\n",
+    "    subprocess.run([\"python\", \"/tmp/ramalama_wrapper.py\"], capture_output=True)\n",
+    "\n",
+    "import subprocess\n",
+    "api_process = subprocess.Popen(\n",
+    "    [\"python\", \"/tmp/ramalama_wrapper.py\"],\n",
+    "    env={**os.environ, \n",
+    "         \"RAMALAMA_HOST\": os.getenv(\"RAMALAMA_HOST\", \"http://127.0.0.1:8080\"),\n",
+    "         \"MODEL_NAME\": MODEL_NAME,\n",
+    "         \"MLFLOW_TRACKING_URI\": MLFLOW_URI},\n",
+    "    stdout=subprocess.DEVNULL,\n",
+    "    stderr=subprocess.DEVNULL\n",
+    ")\n",
+    "\n",
+    "time.sleep(3)\n",
+    "\n",
+    "API_URL = \"http://localhost:8000\"\n",
+    "try:\n",
+    "    r = requests.get(f\"{API_URL}/v1/healthz\", timeout=5)\n",
+    "    print(\"API Status:\", r.json())\n",
+    "    print(f\"\\nOpenAI-compatible API running at: {API_URL}/v1\")\n",
+    "    print(f\"Health: {API_URL}/v1/healthz\")\n",
+    "    print(f\"Chat:   {API_URL}/v1/chat/completions\")\n",
+    "except Exception as e:\n",
+    "    print(f\"Warning: API wrapper not responding: {e}\")\n",
+    "    print(\"You may need to run the wrapper manually\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a411c015-c802-4ca1-81bb-3f4790d9626a",
+   "metadata": {},
+   "source": [
+    "### Cell 4 - Basic client + latency test\n",
+    "\n",
+    "Tests the OpenAI-compatible API with a simple chat request and measures latency."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3be634e2-a82f-42c9-8e31-57e6868a86ee",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os, time, requests, json\n",
+    "\n",
+    "USE_WRAPPER = True\n",
+    "BASE_URL = \"http://localhost:8000/v1\" if USE_WRAPPER else os.getenv(\"RAMALAMA_HOST\", \"http://127.0.0.1:8080\")\n",
+    "\n",
+    "def health():\n",
+    "    if USE_WRAPPER:\n",
+    "        r = requests.get(f\"{BASE_URL}/healthz\", timeout=10)\n",
+    "        print(\"Health:\", r.status_code, r.json())\n",
+    "    else:\n",
+    "        r = requests.get(f\"{BASE_URL}/v1/models\", timeout=10)\n",
+    "        print(\"Health:\", r.status_code, \"Models available:\", r.json().get('data', []))\n",
+    "\n",
+    "def chat(prompt, temperature=0.4, max_tokens=220):\n",
+    "    body = {\n",
+    "        \"model\": \"qwen2.5:1.5b\",\n",
+    "        \"temperature\": temperature,\n",
+    "        \"max_tokens\": max_tokens,\n",
+    "        \"messages\": [\n",
+    "            {\"role\": \"system\", \"content\": \"You are a helpful assistant. Be concise.\"},\n",
+    "            {\"role\": \"user\", \"content\": prompt},\n",
+    "        ],\n",
+    "        \"stream\": False\n",
+    "    }\n",
+    "    endpoint = f\"{BASE_URL}/chat/completions\"\n",
+    "    \n",
+    "    t0 = time.time()\n",
+    "    r = requests.post(endpoint, json=body, timeout=120)\n",
+    "    dt = time.time() - t0\n",
+    "    r.raise_for_status()\n",
+    "    \n",
+    "    response = r.json()\n",
+    "    content = response[\"choices\"][0][\"message\"][\"content\"]\n",
+    "    usage = response.get(\"usage\", {\"total_tokens\": \"estimated: \" + str(len(content.split()) + len(prompt.split()))})\n",
+    "    \n",
+    "    print(f\"\\nLatency: {dt:.2f}s  | usage: {usage}\")\n",
+    "    print(\"\\n---\\n\", content)\n",
+    "    return content\n",
+    "\n",
+    "health()\n",
+    "_ = chat(\"Say 'test ok' then give me one short fun fact about llamas.\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "553d2756-8949-43e3-8342-71387688e0fa",
+   "metadata": {},
+   "source": [
+    "### Cell 5 - Multi-agent pipeline\n",
+    "\n",
+    "Implements a simple three-agent workflow (Researcher -> Writer -> Critic) using the local LLM."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0f6713f3-8b60-40b2-ad3c-ebf6db4f66e1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os, requests, json, time\n",
+    "\n",
+    "BASE_URL = \"http://localhost:8000/v1\"  \n",
+    "RAMALAMA_DIRECT = os.getenv(\"RAMALAMA_HOST\", \"http://127.0.0.1:8080\")\n",
+    "\n",
+    "def call_llm(role_prompt, user_message, temperature=0.4, max_tokens=150, use_wrapper=True):\n",
+    "    body = {\n",
+    "        \"model\": \"qwen2.5:1.5b\",\n",
+    "        \"messages\": [\n",
+    "            {\"role\": \"system\", \"content\": role_prompt},\n",
+    "            {\"role\": \"user\", \"content\": user_message}\n",
+    "        ],\n",
+    "        \"temperature\": temperature,\n",
+    "        \"max_tokens\": max_tokens,\n",
+    "        \"stream\": False\n",
+    "    }\n",
+    "    if use_wrapper:\n",
+    "        endpoint = f\"{BASE_URL}/chat/completions\"\n",
+    "    else:\n",
+    "        endpoint = f\"{RAMALAMA_DIRECT}/v1/chat/completions\"\n",
+    "    try:\n",
+    "        r = requests.post(endpoint, json=body, timeout=120)\n",
+    "        r.raise_for_status()\n",
+    "        response = r.json()\n",
+    "        return response[\"choices\"][0][\"message\"][\"content\"]\n",
+    "    except Exception as e:\n",
+    "        return f\"Error: {e}\"\n",
+    "\n",
+    "print(\"=\" * 60)\n",
+    "print(\"Running Multi-Agent Workflow with RamaLama\")\n",
+    "print(\"=\" * 60)\n",
+    "\n",
+    "task = \"Research the latest advancements in quantum computing as of 2025.\"\n",
+    "\n",
+    "try:\n",
+    "    r = requests.get(f\"{BASE_URL}/healthz\", timeout=2)\n",
+    "    use_wrapper = r.status_code == 200\n",
+    "    print(\"Using: OpenAI-compatible wrapper\\n\")\n",
+    "except:\n",
+    "    use_wrapper = False\n",
+    "    print(\"Using: Direct RamaLama API\\n\")\n",
+    "\n",
+    "print(\"RESEARCHER:\")\n",
+    "print(\"-\" * 40)\n",
+    "research_prompt = \"You are a researcher. Provide 3-4 key facts about the topic. Be concise and factual.\"\n",
+    "research_notes = call_llm(research_prompt, task, temperature=0.35, max_tokens=140, use_wrapper=use_wrapper)\n",
+    "print(research_notes)\n",
+    "time.sleep(1)\n",
+    "\n",
+    "print(\"\\nWRITER:\")\n",
+    "print(\"-\" * 40)\n",
+    "writer_prompt = \"You are a technical writer. Based on the following notes, write a brief report.\"\n",
+    "writer_task = f\"Write a report based on these notes:\\n{research_notes}\"\n",
+    "report = call_llm(writer_prompt, writer_task, temperature=0.55, max_tokens=220, use_wrapper=use_wrapper)\n",
+    "print(report)\n",
+    "time.sleep(1)\n",
+    "\n",
+    "print(\"\\nCRITIC/EDITOR:\")\n",
+    "print(\"-\" * 40)\n",
+    "critic_prompt = \"You are an editor. Review the report and provide a final polished version.\"\n",
+    "critic_task = f\"Review and improve this report:\\n{report}\"\n",
+    "final_output = call_llm(critic_prompt, critic_task, temperature=0.45, max_tokens=160, use_wrapper=use_wrapper)\n",
+    "print(final_output)\n",
+    "\n",
+    "print(\"\\n\" + \"=\" * 60)\n",
+    "print(\"Multi-agent workflow complete\")\n",
+    "print(\"=\" * 60)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "0af596cf-5ba6-42df-a030-61d7a20d6f7b",
+   "metadata": {},
+   "source": [
+    "### Cell 6 - MLFlow: connect to tracking server and list recent runs\n",
+    "\n",
+    "Connects to MLflow tracking server and displays recent model inference runs with metrics."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "03a1b042-04df-4cd0-9099-4cc763ecfe9d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!pip -q install mlflow==2.14.3 --disable-pip-version-check\n",
+    "\n",
+    "import os, mlflow\n",
+    "from datetime import datetime\n",
+    "\n",
+    "tracking_uri = os.getenv(\"MLFLOW_TRACKING_URI\", \"http://ai-starter-kit-mlflow:5000\")\n",
+    "mlflow.set_tracking_uri(tracking_uri)\n",
+    "print(f\"MLflow Tracking URI: {tracking_uri}\")\n",
+    "\n",
+    "exp_name = \"ramalama-llm\"\n",
+    "exp = mlflow.set_experiment(exp_name)\n",
+    "print(f\"Experiment: {exp.name} (ID: {exp.experiment_id})\")\n",
+    "print(\"-\" * 60)\n",
+    "\n",
+    "client = mlflow.tracking.MlflowClient()\n",
+    "runs = client.search_runs(\n",
+    "    exp.experiment_id,\n",
+    "    order_by=[\"attributes.start_time DESC\"],\n",
+    "    max_results=10\n",
+    ")\n",
+    "\n",
+    "if not runs:\n",
+    "    print(\"No runs found. Run cells 4 or 5 first to generate inference requests.\")\n",
+    "else:\n",
+    "    print(f\"\\nFound {len(runs)} recent runs:\")\n",
+    "    print(\"-\" * 60)\n",
+    "    \n",
+    "    for i, run in enumerate(runs, 1):\n",
+    "        start_time = datetime.fromtimestamp(run.info.start_time/1000).strftime('%Y-%m-%d %H:%M:%S')\n",
+    "        duration = run.data.metrics.get('duration_ms', 'N/A')\n",
+    "        temp = run.data.params.get('temperature', 'N/A')\n",
+    "        max_tokens = run.data.params.get('max_tokens', 'N/A')\n",
+    "        total_tokens = run.data.metrics.get('total_tokens', 'N/A')\n",
+    "        \n",
+    "        print(f\"\\nRun {i}:\")\n",
+    "        print(f\"  ID:          {run.info.run_id[:12]}...\")\n",
+    "        print(f\"  Time:        {start_time}\")\n",
+    "        print(f\"  Status:      {run.info.status}\")\n",
+    "        print(f\"  Temperature: {temp}\")\n",
+    "        print(f\"  Max Tokens:  {max_tokens}\")\n",
+    "        print(f\"  Duration:    {duration} ms\")\n",
+    "        print(f\"  Total Tokens: {total_tokens}\")\n",
+    "    \n",
+    "    print(\"\\n\" + \"=\" * 60)\n",
+    "    print(\"SUMMARY:\")\n",
+    "    successful = sum(1 for r in runs if r.info.status == 'FINISHED')\n",
+    "    durations = [r.data.metrics.get('duration_ms', 0) for r in runs if r.data.metrics.get('duration_ms')]\n",
+    "    avg_duration = sum(durations) / len(durations) if durations else 0\n",
+    "    \n",
+    "    print(f\"  Total Runs: {len(runs)}\")\n",
+    "    print(f\"  Successful: {successful}\")\n",
+    "    print(f\"  Failed: {len(runs) - successful}\")\n",
+    "    print(f\"  Avg Duration: {avg_duration:.1f} ms\" if avg_duration else \"  Avg Duration: N/A\")\n",
+    "\n",
+    "print(\"\\n\" + \"=\" * 60)\n",
+    "print(\"MLflow verification complete\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "language_info": {
+   "name": "python"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/multi-agent.ipynb b/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/multi-agent.ipynb
new file mode 100644
index 000000000..23189a639
--- /dev/null
+++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/multi-agent.ipynb
@@ -0,0 +1,687 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "079fadd2-200e-4d37-8ae2-be2792e3a24e",
+   "metadata": {},
+   "source": [
+    "### Cell 1 - Initialize Ray endpoints and verify dashboard\n",
+    "\n",
+    "Installs requests, derives the Ray head host from RAY_ADDRESS, builds Dashboard/Serve/MLflow URLs, reads an Hugging Face token, and prints the endpoints plus the Jobs API version for a quick health check."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "79db57cd-fb72-4b10-b0fb-5e9cd5c007b6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!pip -q install requests==2.* --disable-pip-version-check\n",
+    "\n",
+    "import os, textwrap, base64, time, json, requests\n",
+    "from string import Template\n",
+    "\n",
+    "raw_addr = os.getenv(\"RAY_ADDRESS\", \"ray://ai-starter-kit-kuberay-head-svc:10001\")\n",
+    "if raw_addr.startswith(\"ray://\"):\n",
+    "    HEAD_HOST = raw_addr.split(\"://\", 1)[1].split(\":\", 1)[0]\n",
+    "else:\n",
+    "    HEAD_HOST = raw_addr.split(\":\", 1)[0] or \"ai-starter-kit-kuberay-head-svc\"\n",
+    "\n",
+    "DASH_URL    = f\"http://{HEAD_HOST}:8265\"\n",
+    "SERVE_PORT  = int(os.getenv(\"SERVE_PORT\", \"8000\"))\n",
+    "SERVE_ROUTE = \"/v1\"\n",
+    "\n",
+    "HF_TOKEN_PATH = \"/etc/secrets/huggingface/token\"\n",
+    "HF_TOKEN = \"\"\n",
+    "if os.path.exists(HF_TOKEN_PATH):\n",
+    "    try:\n",
+    "        HF_TOKEN = open(HF_TOKEN_PATH).read().strip()\n",
+    "    except Exception:\n",
+    "        HF_TOKEN = \"\"\n",
+    "\n",
+    "print(\"Head host:\", HEAD_HOST)\n",
+    "print(\"Jobs API :\", f\"{DASH_URL}/api/jobs/\")\n",
+    "print(\"Serve URL:\", f\"http://{HEAD_HOST}:{SERVE_PORT}{SERVE_ROUTE}\")\n",
+    "print(\"MLflow   :\", os.getenv(\"MLFLOW_TRACKING_URI\", \"http://ai-starter-kit-mlflow:5000\"))\n",
+    "\n",
+    "print(\"Jobs API version:\", requests.get(f\"{DASH_URL}/api/version\", timeout=10).json())\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "fe862173-fd9a-41ae-a27b-63875f788024",
+   "metadata": {},
+   "source": [
+    "### Cell 2 - Deploy a minimal Ray Serve smoke test and verify readiness\n",
+    "\n",
+    "Submits a tiny FastAPI app to Ray Serve (one /healthz endpoint under /smoke) as a Ray Job, installing FastAPI on the fly. It polls the Jobs API for status and hits :8000/smoke/healthz up to 60 seconds, printing when the service responds 200 (i.e., smoke test passes)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "34da3e26-6276-48b7-b3ac-c90359df6547",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os, base64, textwrap, time, requests\n",
+    "\n",
+    "DASH_URL = \"http://ai-starter-kit-kuberay-head-svc:8265\"\n",
+    "\n",
+    "print(\"Jobs API:\", requests.get(f\"{DASH_URL}/api/version\", timeout=10).json())\n",
+    "\n",
+    "serve_py = textwrap.dedent(\"\"\"\n",
+    "    from fastapi import FastAPI\n",
+    "    from ray import serve\n",
+    "    serve.start(detached=True, http_options={\"host\":\"0.0.0.0\",\"port\":8000})\n",
+    "    app = FastAPI()\n",
+    "\n",
+    "    @serve.deployment(name=\"smoke\", num_replicas=1)\n",
+    "    @serve.ingress(app)\n",
+    "    class Smoke:\n",
+    "        @app.get(\"/healthz\")\n",
+    "        async def health(self): return {\"ok\": True}\n",
+    "\n",
+    "    serve.run(Smoke.bind(), route_prefix=\"/smoke\")\n",
+    "    print(\"READY: smoke\", flush=True)\n",
+    "\"\"\").strip()\n",
+    "\n",
+    "b64 = base64.b64encode(serve_py.encode()).decode()\n",
+    "entry = f'python -c \"import base64; exec(base64.b64decode(\\'{b64}\\'))\"'\n",
+    "submit = requests.post(f\"{DASH_URL}/api/jobs/\", json={\"entrypoint\": entry, \"runtime_env\": {\"pip\": [\"fastapi>=0.110\"]}}, timeout=60).json()\n",
+    "job_id = submit[\"job_id\"]\n",
+    "print(\"Job:\", job_id)\n",
+    "\n",
+    "svc = \"http://ai-starter-kit-kuberay-head-svc:8000/smoke/healthz\"\n",
+    "for i in range(60):\n",
+    "    s = requests.get(f\"{DASH_URL}/api/jobs/{job_id}\", timeout=10).json()[\"status\"]\n",
+    "    try:\n",
+    "        r = requests.get(svc, timeout=2)\n",
+    "        print(f\"tick {i:02d}: job={s}, health={r.status_code}\")\n",
+    "        if r.status_code == 200:\n",
+    "            print(\"Smoke OK\")\n",
+    "            break\n",
+    "    except Exception as e:\n",
+    "        print(f\"tick {i:02d}: job={s}, health=ERR {e}\")\n",
+    "    time.sleep(1)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "8111d705-595e-4e65-8479-bdc76191fa31",
+   "metadata": {},
+   "source": [
+    "### Cell 3 - Deploy model on Ray Serve with llama-cpp\n",
+    "\n",
+    "Packages and submits a Ray Job that spins up a Ray Serve app exposing /v1/healthz and /v1/chat/completions. It downloads the preferred GGUF from Hugging Face, initializes llama-cpp-python, logs to MLflow, and prints the deployed health/chat URLs."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "bbea1539-e9ab-460a-9cfc-20a42807f616",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os, base64, textwrap, requests\n",
+    "\n",
+    "HEAD        = os.environ.get(\"RAY_HEAD_SVC\", \"ai-starter-kit-kuberay-head-svc\")\n",
+    "DASH_URL    = f\"http://{HEAD}:8265\"\n",
+    "SERVE_PORT  = 8000\n",
+    "SERVE_ROUTE = \"/v1\"\n",
+    "\n",
+    "runtime_env = {\n",
+    "    \"pip\": [\n",
+    "        \"fastapi==0.110.0\",\n",
+    "        \"uvicorn==0.23.2\",\n",
+    "        \"huggingface_hub==0.25.2\",\n",
+    "        \"llama-cpp-python==0.3.16\",   \n",
+    "        \"hf_transfer==0.1.6\",\n",
+    "        \"mlflow==2.14.3\",            \n",
+    "    ],\n",
+    "    \"env_vars\": {\n",
+    "        \"HF_HUB_ENABLE_HF_TRANSFER\": \"1\",\n",
+    "        \"HUGGINGFACE_HUB_TOKEN\": os.environ.get(\"HUGGINGFACE_HUB_TOKEN\", \"\"),\n",
+    "        \"SERVE_PORT\": str(SERVE_PORT),\n",
+    "\n",
+    "        \"MODEL_REPO\": \"Qwen/Qwen2.5-1.5B-Instruct-GGUF\",\n",
+    "        \"GGUF_PREF_ORDER\": \"q4_k_m,q4_0,q3_k_m,q2_k\",\n",
+    "\n",
+    "        \"LLM_CONTEXT\": os.environ.get(\"LLM_CONTEXT\", \"1024\"),\n",
+    "        \"LLM_MAX_TOKENS\": os.environ.get(\"LLM_MAX_TOKENS\", \"256\"),\n",
+    "        \"SERVER_MAX_NEW_TOKENS\": os.environ.get(\"SERVER_MAX_NEW_TOKENS\", \"512\"),\n",
+    "\n",
+    "        \"LLM_THREADS\": os.environ.get(\"LLM_THREADS\", \"6\"),\n",
+    "        \"OMP_NUM_THREADS\": os.environ.get(\"OMP_NUM_THREADS\", \"6\"),\n",
+    "        \"GPU_LAYERS\": \"0\",   \n",
+    "        \n",
+    "        \"PIP_PREFER_BINARY\": \"1\",\n",
+    "        \"CMAKE_ARGS\": \"-DGGML_OPENMP=OFF -DLLAMA_NATIVE=OFF\",\n",
+    "\n",
+    "        \"HF_HOME\": \"/tmp/hf-cache\",\n",
+    "        \"TRANSFORMERS_CACHE\": \"/tmp/hf-cache\",\n",
+    "\n",
+    "        \"MLFLOW_TRACKING_URI\": os.environ.get(\"MLFLOW_TRACKING_URI\", \"\"),\n",
+    "        \"MLFLOW_EXPERIMENT_NAME\": os.environ.get(\"MLFLOW_EXPERIMENT_NAME\", \"ray-llama-cpp\"),\n",
+    "    },\n",
+    "}\n",
+    "\n",
+    "serve_py = textwrap.dedent(f\"\"\"\n",
+    "import os, time, multiprocessing, uuid\n",
+    "from typing import List, Dict, Any\n",
+    "from fastapi import FastAPI, Request\n",
+    "from fastapi.responses import JSONResponse\n",
+    "from huggingface_hub import HfApi, hf_hub_download\n",
+    "from ray import serve\n",
+    "from llama_cpp import Llama\n",
+    "\n",
+    "USE_MLFLOW = False\n",
+    "try:\n",
+    "    import mlflow\n",
+    "    if os.getenv(\"MLFLOW_TRACKING_URI\"):\n",
+    "        mlflow.set_tracking_uri(os.getenv(\"MLFLOW_TRACKING_URI\"))\n",
+    "        mlflow.set_experiment(os.getenv(\"MLFLOW_EXPERIMENT_NAME\",\"ray-llama-cpp\"))\n",
+    "        USE_MLFLOW = True\n",
+    "except Exception as _e:\n",
+    "    USE_MLFLOW = False\n",
+    "\n",
+    "SERVE_PORT  = int(os.getenv(\"SERVE_PORT\", \"{SERVE_PORT}\"))\n",
+    "SERVE_ROUTE = \"{SERVE_ROUTE}\"\n",
+    "MODEL_REPO  = os.getenv(\"MODEL_REPO\", \"Qwen/Qwen2.5-1.5B-Instruct-GGUF\")\n",
+    "GGUF_PREFS  = [s.strip() for s in os.getenv(\"GGUF_PREF_ORDER\",\"q4_k_m,q4_0,q3_k_m,q2_k\").split(\",\") if s.strip()]\n",
+    "CTX_LEN     = int(os.getenv(\"LLM_CONTEXT\", \"2048\"))\n",
+    "MAX_TOKENS  = int(os.getenv(\"LLM_MAX_TOKENS\", \"256\"))\n",
+    "HF_TOKEN    = os.getenv(\"HUGGINGFACE_HUB_TOKEN\") or None\n",
+    "\n",
+    "serve.start(detached=True, http_options={{\"host\":\"0.0.0.0\", \"port\":SERVE_PORT}})\n",
+    "app = FastAPI()\n",
+    "\n",
+    "def pick_one_file(repo_id: str, prefs):\n",
+    "    api = HfApi()\n",
+    "    files = api.list_repo_files(repo_id=repo_id, repo_type=\"model\", token=HF_TOKEN)\n",
+    "    ggufs = [f for f in files if f.lower().endswith(\".gguf\")]\n",
+    "    if not ggufs:\n",
+    "        raise RuntimeError(f\"No .gguf files visible in {{repo_id}}\")\n",
+    "    for pref in prefs:\n",
+    "        for f in ggufs:\n",
+    "            if pref.lower() in f.lower():\n",
+    "                return f\n",
+    "    return ggufs[0]\n",
+    "\n",
+    "def pick_chat_format(repo: str, fname: str) -> str:\n",
+    "    return \"qwen\"\n",
+    "\n",
+    "@serve.deployment(name=\"qwen\", num_replicas=1, ray_actor_options={{\"num_cpus\": 6}})\n",
+    "@serve.ingress(app)\n",
+    "class OpenAICompatLlama:\n",
+    "    def __init__(self, repo_id: str = MODEL_REPO):\n",
+    "        target = pick_one_file(repo_id, GGUF_PREFS)\n",
+    "        print(f\"[env] model repo: {{repo_id}} file: {{target}}\", flush=True)\n",
+    "        local_dir = \"/tmp/hf-gguf\"; os.makedirs(local_dir, exist_ok=True)\n",
+    "\n",
+    "        gguf_path = hf_hub_download(\n",
+    "            repo_id=repo_id, filename=target, token=HF_TOKEN,\n",
+    "            local_dir=local_dir, local_dir_use_symlinks=False,\n",
+    "            force_download=False, resume_download=True\n",
+    "        )\n",
+    "        print(f\"[download] done: {{gguf_path}}\", flush=True)\n",
+    "\n",
+    "        n_threads = int(os.getenv(\"LLM_THREADS\", max(2, (multiprocessing.cpu_count() or 4)//2)))\n",
+    "        print(f\"[load] llama-cpp-python | ctx={{CTX_LEN}} threads={{n_threads}} gpu_layers={{int(os.getenv('GPU_LAYERS','0'))}}\", flush=True)\n",
+    "\n",
+    "        self.model_file = os.path.basename(gguf_path)\n",
+    "        self.model_repo = repo_id\n",
+    "        chat_format = pick_chat_format(self.model_repo, self.model_file)\n",
+    "        print(f\"[load] chat_format={{chat_format}}\", flush=True)\n",
+    "\n",
+    "        self.llm = Llama(\n",
+    "            model_path=gguf_path,\n",
+    "            n_ctx=CTX_LEN,\n",
+    "            n_threads=n_threads,\n",
+    "            n_batch=256,                                \n",
+    "            n_gpu_layers=int(os.getenv(\"GPU_LAYERS\",\"0\")),\n",
+    "            chat_format=chat_format,\n",
+    "            verbose=False\n",
+    "        )\n",
+    "        print(\"[ready] model loaded\", flush=True)\n",
+    "\n",
+    "    @app.get(\"/healthz\")\n",
+    "    async def health(self):\n",
+    "        return {{\"status\":\"ok\"}}\n",
+    "\n",
+    "    @app.post(\"/chat/completions\")\n",
+    "    async def chat_completions(self, request: Request):\n",
+    "        t0 = time.time()\n",
+    "        body = await request.json()\n",
+    "\n",
+    "        messages    = body.get(\"messages\", [])\n",
+    "        temperature = float(body.get(\"temperature\", 0.2))\n",
+    "        req_max     = body.get(\"max_tokens\", None)\n",
+    "        stop_words  = (body.get(\"stop\", []) or []) + [\"<|im_end|>\", \"</s>\"]\n",
+    "\n",
+    "        SERVER_MAX  = int(os.getenv(\"SERVER_MAX_NEW_TOKENS\", \"512\"))\n",
+    "        max_tokens  = int(req_max if isinstance(req_max, int) else MAX_TOKENS)\n",
+    "        max_tokens  = max(32, min(max_tokens, CTX_LEN - 128, SERVER_MAX))\n",
+    "\n",
+    "        rid = \"chatcmpl-\" + uuid.uuid4().hex[:24]\n",
+    "        created = int(time.time())\n",
+    "        model_name = f\"{{self.model_repo}}/{{self.model_file}}\"\n",
+    "\n",
+    "        try:\n",
+    "            result = self.llm.create_chat_completion(\n",
+    "                messages=messages,\n",
+    "                temperature=temperature,\n",
+    "                max_tokens=max_tokens,\n",
+    "                top_k=50,\n",
+    "                top_p=0.9,\n",
+    "                repeat_penalty=1.1,\n",
+    "                stop=stop_words,\n",
+    "            )\n",
+    "            out_text = (result[\"choices\"][0][\"message\"][\"content\"] or \"\").strip()\n",
+    "            usage_raw = result.get(\"usage\") or {{}}\n",
+    "            p_tokens = int(usage_raw.get(\"prompt_tokens\") or 0)\n",
+    "            c_tokens = int(usage_raw.get(\"completion_tokens\") or 0)\n",
+    "            err = None\n",
+    "        except Exception as e:\n",
+    "            out_text = \"\"\n",
+    "            p_tokens = c_tokens = 0\n",
+    "            err = str(e)\n",
+    "\n",
+    "        if USE_MLFLOW:\n",
+    "            try:\n",
+    "                dur_ms = int((time.time()-t0) * 1000)\n",
+    "                with mlflow.start_run(run_name=\"chat\"):\n",
+    "                    mlflow.set_tags({{\n",
+    "                        \"model_repo\": self.model_repo,\n",
+    "                        \"model_file\": self.model_file,\n",
+    "                        \"framework\": \"llama-cpp-python\",\n",
+    "                    }})\n",
+    "                    mlflow.log_params({{\n",
+    "                        \"temperature\": temperature,\n",
+    "                        \"max_tokens\": max_tokens,\n",
+    "                        \"ctx\": CTX_LEN,\n",
+    "                    }})\n",
+    "                    if not (p_tokens and c_tokens):\n",
+    "                        p_tokens = p_tokens or max(1, len(\" \".join(m.get(\"content\",\"\") for m in messages).split()))\n",
+    "                        c_tokens = c_tokens or max(0, len(out_text.split()))\n",
+    "                    mlflow.log_metrics({{\n",
+    "                        \"duration_ms\": dur_ms,\n",
+    "                        \"prompt_tokens_approx\": p_tokens,\n",
+    "                        \"completion_tokens_approx\": c_tokens,\n",
+    "                        \"total_tokens_approx\": p_tokens + c_tokens,\n",
+    "                    }})\n",
+    "            except Exception:\n",
+    "                pass\n",
+    "\n",
+    "        if err:\n",
+    "            return JSONResponse(status_code=500, content={{\"error\": err, \"type\":\"generation_error\"}})\n",
+    "\n",
+    "        usage = {{\n",
+    "            \"prompt_tokens\": p_tokens,\n",
+    "            \"completion_tokens\": c_tokens,\n",
+    "            \"total_tokens\": p_tokens + c_tokens,\n",
+    "        }}\n",
+    "        return {{\n",
+    "            \"id\": rid,\n",
+    "            \"object\": \"chat.completion\",\n",
+    "            \"created\": created,\n",
+    "            \"model\": model_name,\n",
+    "            \"choices\": [\n",
+    "                {{\n",
+    "                    \"index\": 0,\n",
+    "                    \"message\": {{\"role\":\"assistant\",\"content\": out_text}},\n",
+    "                    \"finish_reason\": \"stop\"\n",
+    "                }}\n",
+    "            ],\n",
+    "            \"usage\": usage\n",
+    "        }}\n",
+    "\n",
+    "serve.run(OpenAICompatLlama.bind(), route_prefix=SERVE_ROUTE)\n",
+    "print(\"READY\", flush=True)\n",
+    "\"\"\").strip()\n",
+    "\n",
+    "payload = base64.b64encode(serve_py.encode()).decode()\n",
+    "entrypoint = 'python -c \"import base64,sys;exec(base64.b64decode(\\'{}\\').decode())\"'.format(payload)\n",
+    "\n",
+    "job = requests.post(\n",
+    "    f\"{DASH_URL}/api/jobs/\",\n",
+    "    json={\n",
+    "        \"entrypoint\": entrypoint,\n",
+    "        \"runtime_env\": runtime_env,\n",
+    "        \"metadata\": {\"job_name\": \"serve-qwen2_5-llama_cpp-openai\"},\n",
+    "    },\n",
+    "    timeout=45\n",
+    ").json()\n",
+    "\n",
+    "print(\"Job:\", job.get(\"job_id\"))\n",
+    "print(\"Health:\", f\"http://{HEAD}:{SERVE_PORT}{SERVE_ROUTE}/healthz\")\n",
+    "print(\"Chat:  \", f\"http://{HEAD}:{SERVE_PORT}{SERVE_ROUTE}/chat/completions\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a411c015-c802-4ca1-81bb-3f4790d9626a",
+   "metadata": {},
+   "source": [
+    "### Cell 4 - Basic client + latency test\n",
+    "\n",
+    "Calls /v1/healthz and then sends an OpenAI-style chat request to /v1/chat/completions with a short prompt. Prints latency and token usage, returning the assistant text."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3be634e2-a82f-42c9-8e31-57e6868a86ee",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os, time, requests, json\n",
+    "\n",
+    "HEAD       = os.environ.get(\"RAY_HEAD_SVC\", \"ai-starter-kit-kuberay-head-svc\")\n",
+    "SERVE_PORT = 8000\n",
+    "BASE_URL   = f\"http://{HEAD}:{SERVE_PORT}/v1\"\n",
+    "\n",
+    "def health():\n",
+    "    r = requests.get(f\"{BASE_URL}/healthz\", timeout=10)\n",
+    "    print(\"Health:\", r.status_code, r.json())\n",
+    "\n",
+    "def chat(prompt, temperature=0.4, max_tokens=220, stop=None):\n",
+    "    body = {\n",
+    "        \"model\": \"qwen2.5-1.5b-instruct-gguf\",\n",
+    "        \"temperature\": float(temperature),\n",
+    "        \"max_tokens\": int(max_tokens),\n",
+    "        \"messages\": [\n",
+    "            {\"role\": \"system\", \"content\": \"You are Qwen2.5 Instruct running on a tiny CPU host. Be concise, complete sentences.\"},\n",
+    "            {\"role\": \"user\", \"content\": prompt},\n",
+    "        ],\n",
+    "    }\n",
+    "    if stop:\n",
+    "        body[\"stop\"] = stop\n",
+    "\n",
+    "    t0 = time.time()\n",
+    "    r = requests.post(f\"{BASE_URL}/chat/completions\", json=body, timeout=300)\n",
+    "    dt = time.time() - t0\n",
+    "    r.raise_for_status()\n",
+    "    out = r.json()[\"choices\"][0][\"message\"][\"content\"]\n",
+    "    usage = r.json().get(\"usage\", {})\n",
+    "    print(f\"\\nLatency: {dt:.2f}s  | usage: {usage}\")\n",
+    "    print(\"\\n---\\n\", out)\n",
+    "    return out\n",
+    "\n",
+    "health()\n",
+    "_ = chat(\"Say 'test ok' then give me one short fun fact about llamas.\", stop=[\"<|im_end|>\"])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "553d2756-8949-43e3-8342-71387688e0fa",
+   "metadata": {},
+   "source": [
+    "### Cell 5 - Multi-agent (Autogen) pipeline\n",
+    "\n",
+    "Installs Autogen, configures OpenAIWrapper to hit Ray Serve /v1 endpoint, warms up the model, then runs a simple three-agent workflow (Researcher -> Writer -> Critic) to produce and refine a short report."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0f6713f3-8b60-40b2-ad3c-ebf6db4f66e1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os, requests, json, time\n",
+    "\n",
+    "HEAD = os.environ.get(\"RAY_HEAD_SVC\", \"ai-starter-kit-kuberay-head-svc\")\n",
+    "SERVE_PORT = 8000\n",
+    "BASE_URL = f\"http://{HEAD}:{SERVE_PORT}/v1\"\n",
+    "\n",
+    "def call_llm(role_prompt, user_message, temperature=0.4, max_tokens=150):\n",
+    "    body = {\n",
+    "        \"model\": \"qwen2.5-1.5b-instruct-gguf\",\n",
+    "        \"temperature\": temperature,\n",
+    "        \"max_tokens\": max_tokens,\n",
+    "        \"messages\": [\n",
+    "            {\"role\": \"system\", \"content\": role_prompt},\n",
+    "            {\"role\": \"user\", \"content\": user_message}\n",
+    "        ]\n",
+    "    }\n",
+    "    try:\n",
+    "        r = requests.post(f\"{BASE_URL}/chat/completions\", json=body, timeout=120)\n",
+    "        r.raise_for_status()\n",
+    "        return r.json()[\"choices\"][0][\"message\"][\"content\"]\n",
+    "    except Exception as e:\n",
+    "        return f\"Error: {e}\"\n",
+    "\n",
+    "# Try to use autogen if available, otherwise use direct implementation\n",
+    "USE_AUTOGEN = False\n",
+    "\n",
+    "try:\n",
+    "    import autogen\n",
+    "    from autogen import AssistantAgent, UserProxyAgent\n",
+    "    USE_AUTOGEN = True\n",
+    "    print(\"Using autogen for multi-agent workflow\")\n",
+    "except ImportError:\n",
+    "    try:\n",
+    "        print(\"Installing autogen dependencies...\")\n",
+    "        !pip install -q pyautogen~=0.2.35 python-dotenv tiktoken \"numpy<2,>=1.17.0\" --disable-pip-version-check 2>/dev/null\n",
+    "        import autogen\n",
+    "        from autogen import AssistantAgent, UserProxyAgent\n",
+    "        USE_AUTOGEN = True\n",
+    "        print(\"Autogen installed successfully\")\n",
+    "    except:\n",
+    "        print(\"Using direct implementation (autogen not available)\")\n",
+    "        USE_AUTOGEN = False\n",
+    "\n",
+    "if USE_AUTOGEN:\n",
+    "    config_list = [\n",
+    "        {\n",
+    "            \"model\": \"qwen2.5-1.5b-instruct-gguf\",\n",
+    "            \"base_url\": BASE_URL,\n",
+    "            \"api_key\": \"local\",\n",
+    "            \"price\": [0.0, 0.0],\n",
+    "        }\n",
+    "    ]\n",
+    "    \n",
+    "    llm = autogen.OpenAIWrapper(config_list=config_list)\n",
+    "    \n",
+    "    try:\n",
+    "        r = llm.create(messages=[{\"role\":\"user\",\"content\":\"Say 'test ok'.\"}], temperature=0.2, max_tokens=16)\n",
+    "        print(\"Warmup:\", r.choices[0].message.content)\n",
+    "    except Exception as e:\n",
+    "        print(\"Warmup skipped:\", e)\n",
+    "    \n",
+    "    user_proxy = UserProxyAgent(\n",
+    "        name=\"UserProxy\",\n",
+    "        system_message=\"You are the human admin. Initiate the task.\",\n",
+    "        code_execution_config=False,\n",
+    "        human_input_mode=\"NEVER\",\n",
+    "    )\n",
+    "    \n",
+    "    researcher = AssistantAgent(\n",
+    "        name=\"Researcher\",\n",
+    "        system_message=(\n",
+    "            \"You are a researcher. Gather concise, verified facts on the topic. \"\n",
+    "            \"Return 3-4 bullet points. Keep under 100 words total.\"\n",
+    "        ),\n",
+    "        llm_config={\"config_list\": config_list, \"temperature\": 0.35, \"max_tokens\": 140, \"timeout\": 120},\n",
+    "    )\n",
+    "    \n",
+    "    writer = AssistantAgent(\n",
+    "        name=\"Writer\",\n",
+    "        system_message=(\n",
+    "            \"You are a writer. Using the Researcher's notes, produce a clear report under 160 words.\"\n",
+    "        ),\n",
+    "        llm_config={\"config_list\": config_list, \"temperature\": 0.55, \"max_tokens\": 220, \"timeout\": 180},\n",
+    "    )\n",
+    "    \n",
+    "    critic = AssistantAgent(\n",
+    "        name=\"Critic\",\n",
+    "        system_message=(\n",
+    "            \"You are a critic. Review the Writer's report for accuracy and clarity. \"\n",
+    "            \"Present the final polished text under 140 words.\"\n",
+    "        ),\n",
+    "        llm_config={\"config_list\": config_list, \"temperature\": 0.45, \"max_tokens\": 160, \"timeout\": 120},\n",
+    "    )\n",
+    "    \n",
+    "    def run_sequential(task):\n",
+    "        print(\"\\n\" + \"=\" * 60)\n",
+    "        print(\"Running Multi-Agent Workflow (with autogen)\")\n",
+    "        print(\"=\" * 60)\n",
+    "        \n",
+    "        research_response = researcher.generate_reply(messages=[{\"content\": task, \"role\": \"user\"}])\n",
+    "        research_notes = research_response if isinstance(research_response, str) else research_response.get(\"content\", \"[no output]\")\n",
+    "        print(\"\\n1. RESEARCHER:\")\n",
+    "        print(\"-\" * 40)\n",
+    "        print(research_notes)\n",
+    "        \n",
+    "        writer_prompt = f\"Using these research notes, write the report:\\n{research_notes}\"\n",
+    "        writer_response = writer.generate_reply(messages=[{\"content\": writer_prompt, \"role\": \"user\"}])\n",
+    "        report = writer_response if isinstance(writer_response, str) else writer_response.get(\"content\", \"[no output]\")\n",
+    "        print(\"\\n2. WRITER:\")\n",
+    "        print(\"-\" * 40)\n",
+    "        print(report)\n",
+    "        \n",
+    "        critic_prompt = f\"Review this report:\\n{report}\"\n",
+    "        critic_response = critic.generate_reply(messages=[{\"content\": critic_prompt, \"role\": \"user\"}])\n",
+    "        final_text = critic_response if isinstance(critic_response, str) else critic_response.get(\"content\", \"[no output]\")\n",
+    "        print(\"\\n3. CRITIC/EDITOR:\")\n",
+    "        print(\"-\" * 40)\n",
+    "        print(final_text)\n",
+    "        return final_text\n",
+    "    \n",
+    "    task = \"Research the latest advancements in quantum computing as of 2025. Gather key facts, then write a short report.\"\n",
+    "    final_output = run_sequential(task)\n",
+    "    \n",
+    "else:\n",
+    "    print(\"=\" * 60)\n",
+    "    print(\"Running Multi-Agent Workflow (direct implementation)\")\n",
+    "    print(\"=\" * 60)\n",
+    "    \n",
+    "    task = \"Research the latest advancements in quantum computing as of 2025.\"\n",
+    "    \n",
+    "    print(\"\\n1. RESEARCHER:\")\n",
+    "    print(\"-\" * 40)\n",
+    "    research_prompt = \"You are a researcher. Provide 3-4 key facts about the topic. Be concise and factual.\"\n",
+    "    research_notes = call_llm(research_prompt, task, temperature=0.35, max_tokens=140)\n",
+    "    print(research_notes)\n",
+    "    time.sleep(1) \n",
+    "    \n",
+    "    print(\"\\n2. WRITER:\")\n",
+    "    print(\"-\" * 40)\n",
+    "    writer_prompt = \"You are a technical writer. Based on the following notes, write a brief report.\"\n",
+    "    writer_task = f\"Write a report based on these notes:\\n{research_notes}\"\n",
+    "    report = call_llm(writer_prompt, writer_task, temperature=0.55, max_tokens=220)\n",
+    "    print(report)\n",
+    "    time.sleep(1)\n",
+    "    \n",
+    "    print(\"\\n3. CRITIC/EDITOR:\")\n",
+    "    print(\"-\" * 40)\n",
+    "    critic_prompt = \"You are an editor. Review the report and provide a final polished version.\"\n",
+    "    critic_task = f\"Review and improve this report:\\n{report}\"\n",
+    "    final_output = call_llm(critic_prompt, critic_task, temperature=0.45, max_tokens=160)\n",
+    "    print(final_output)\n",
+    "\n",
+    "print(\"\\n\" + \"=\" * 60)\n",
+    "print(\"Multi-agent workflow complete\")\n",
+    "print(\"=\" * 60)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "0af596cf-5ba6-42df-a030-61d7a20d6f7b",
+   "metadata": {},
+   "source": [
+    "### Cell 6 - MLFlow: connect to tracking server and list recent chat runs\n",
+    "\n",
+    "Installs MLflow, sets the tracking URI and experiment, then queries and prints the latest runs with key params/metrics (temperature, max_tokens, duration) to verify Serve logging."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "03a1b042-04df-4cd0-9099-4cc763ecfe9d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!pip -q install mlflow==2.14.3 --disable-pip-version-check\n",
+    "\n",
+    "import os, mlflow\n",
+    "from datetime import datetime\n",
+    "\n",
+    "tracking_uri = os.getenv(\"MLFLOW_TRACKING_URI\", \"http://ai-starter-kit-mlflow:5000\")\n",
+    "mlflow.set_tracking_uri(tracking_uri)\n",
+    "print(f\"MLflow Tracking URI: {tracking_uri}\")\n",
+    "\n",
+    "exp_name = os.getenv(\"MLFLOW_EXPERIMENT_NAME\", \"ray-llama-cpp\")\n",
+    "exp = mlflow.set_experiment(exp_name)\n",
+    "print(f\"Experiment: {exp.name} (ID: {exp.experiment_id})\")\n",
+    "print(\"-\" * 60)\n",
+    "\n",
+    "client = mlflow.tracking.MlflowClient()\n",
+    "runs = client.search_runs(\n",
+    "    exp.experiment_id, \n",
+    "    order_by=[\"attributes.start_time DESC\"], \n",
+    "    max_results=10\n",
+    ")\n",
+    "\n",
+    "if not runs:\n",
+    "    print(\"No runs found. Run cells 4 or 5 first to generate inference requests.\")\n",
+    "else:\n",
+    "    print(f\"\\nFound {len(runs)} recent runs:\")\n",
+    "    print(\"-\" * 60)\n",
+    "    \n",
+    "    for i, run in enumerate(runs, 1):\n",
+    "        start_time = datetime.fromtimestamp(run.info.start_time/1000).strftime('%Y-%m-%d %H:%M:%S')\n",
+    "        duration = run.data.metrics.get('duration_ms', 'N/A')\n",
+    "        temp = run.data.params.get('temperature', 'N/A')\n",
+    "        max_tokens = run.data.params.get('max_tokens', 'N/A')\n",
+    "        total_tokens = run.data.metrics.get('total_tokens_approx', 'N/A')\n",
+    "        \n",
+    "        print(f\"\\nRun {i}:\")\n",
+    "        print(f\"  ID:          {run.info.run_id[:12]}...\")\n",
+    "        print(f\"  Time:        {start_time}\")\n",
+    "        print(f\"  Status:      {run.info.status}\")\n",
+    "        print(f\"  Temperature: {temp}\")\n",
+    "        print(f\"  Max Tokens:  {max_tokens}\")\n",
+    "        print(f\"  Duration:    {duration} ms\")\n",
+    "        print(f\"  Total Tokens: {total_tokens}\")\n",
+    "    \n",
+    "    print(\"\\n\" + \"=\" * 60)\n",
+    "    print(\"SUMMARY:\")\n",
+    "    successful = sum(1 for r in runs if r.info.status == 'FINISHED')\n",
+    "    durations = [r.data.metrics.get('duration_ms', 0) for r in runs if r.data.metrics.get('duration_ms')]\n",
+    "    avg_duration = sum(durations) / len(durations) if durations else 0\n",
+    "    \n",
+    "    print(f\"  Total Runs: {len(runs)}\")\n",
+    "    print(f\"  Successful: {successful}\")\n",
+    "    print(f\"  Failed: {len(runs) - successful}\")\n",
+    "    print(f\"  Avg Duration: {avg_duration:.1f} ms\" if avg_duration else \"  Avg Duration: N/A\")\n",
+    "\n",
+    "print(\"\\n\" + \"=\" * 60)\n",
+    "print(\"MLflow verification complete\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/ray.ipynb b/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/ray.ipynb
new file mode 100644
index 000000000..dae93a357
--- /dev/null
+++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/ray.ipynb
@@ -0,0 +1,798 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "e4a6ac7c-5c73-42a9-8b74-420788321543",
+   "metadata": {
+    "collapsed": true,
+    "jupyter": {
+     "outputs_hidden": true
+    },
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Defaulting to user installation because normal site-packages is not writeable\n",
+      "Collecting ray==2.41.0\n",
+      "  Downloading ray-2.41.0-cp311-cp311-manylinux2014_x86_64.whl.metadata (18 kB)\n",
+      "Collecting click>=7.0 (from ray==2.41.0)\n",
+      "  Downloading click-8.2.1-py3-none-any.whl.metadata (2.5 kB)\n",
+      "Collecting filelock (from ray==2.41.0)\n",
+      "  Downloading filelock-3.19.1-py3-none-any.whl.metadata (2.1 kB)\n",
+      "Requirement already satisfied: jsonschema in /opt/bitnami/miniconda/lib/python3.11/site-packages (from ray==2.41.0) (4.22.0)\n",
+      "Collecting msgpack<2.0.0,>=1.0.0 (from ray==2.41.0)\n",
+      "  Downloading msgpack-1.1.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (8.4 kB)\n",
+      "Requirement already satisfied: packaging in /opt/bitnami/miniconda/lib/python3.11/site-packages (from ray==2.41.0) (23.2)\n",
+      "Collecting protobuf!=3.19.5,>=3.15.3 (from ray==2.41.0)\n",
+      "  Downloading protobuf-6.32.0-cp39-abi3-manylinux2014_x86_64.whl.metadata (593 bytes)\n",
+      "Requirement already satisfied: pyyaml in /opt/bitnami/miniconda/lib/python3.11/site-packages (from ray==2.41.0) (6.0.1)\n",
+      "Requirement already satisfied: aiosignal in /opt/bitnami/miniconda/lib/python3.11/site-packages (from ray==2.41.0) (1.3.1)\n",
+      "Requirement already satisfied: frozenlist in /opt/bitnami/miniconda/lib/python3.11/site-packages (from ray==2.41.0) (1.4.1)\n",
+      "Requirement already satisfied: requests in /opt/bitnami/miniconda/lib/python3.11/site-packages (from ray==2.41.0) (2.31.0)\n",
+      "Requirement already satisfied: attrs>=22.2.0 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from jsonschema->ray==2.41.0) (23.2.0)\n",
+      "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from jsonschema->ray==2.41.0) (2023.12.1)\n",
+      "Requirement already satisfied: referencing>=0.28.4 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from jsonschema->ray==2.41.0) (0.35.1)\n",
+      "Requirement already satisfied: rpds-py>=0.7.1 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from jsonschema->ray==2.41.0) (0.18.1)\n",
+      "Requirement already satisfied: charset-normalizer<4,>=2 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from requests->ray==2.41.0) (2.0.4)\n",
+      "Requirement already satisfied: idna<4,>=2.5 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from requests->ray==2.41.0) (3.7)\n",
+      "Requirement already satisfied: urllib3<3,>=1.21.1 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from requests->ray==2.41.0) (2.1.0)\n",
+      "Requirement already satisfied: certifi>=2017.4.17 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from requests->ray==2.41.0) (2024.2.2)\n",
+      "Downloading ray-2.41.0-cp311-cp311-manylinux2014_x86_64.whl (67.3 MB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m67.3/67.3 MB\u001b[0m \u001b[31m25.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m00:01\u001b[0m\n",
+      "\u001b[?25hDownloading click-8.2.1-py3-none-any.whl (102 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m102.2/102.2 kB\u001b[0m \u001b[31m11.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading msgpack-1.1.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (429 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m430.0/430.0 kB\u001b[0m \u001b[31m41.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading protobuf-6.32.0-cp39-abi3-manylinux2014_x86_64.whl (322 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m322.0/322.0 kB\u001b[0m \u001b[31m33.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading filelock-3.19.1-py3-none-any.whl (15 kB)\n",
+      "Installing collected packages: protobuf, msgpack, filelock, click, ray\n",
+      "\u001b[33m  WARNING: The scripts ray, rllib, serve and tune are installed in '/tmp/models-cache/bin' which is not on PATH.\n",
+      "  Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\u001b[0m\u001b[33m\n",
+      "\u001b[0mSuccessfully installed click-8.2.1 filelock-3.19.1 msgpack-1.1.1 protobuf-6.32.0 ray-2.41.0\n"
+     ]
+    }
+   ],
+   "source": [
+    "!pip install ray==2.41.0"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "788f1517-251c-4171-af7d-f4c7a5073d71",
+   "metadata": {
+    "collapsed": true,
+    "jupyter": {
+     "outputs_hidden": true
+    },
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Defaulting to user installation because normal site-packages is not writeable\n",
+      "Collecting numpy\n",
+      "  Downloading numpy-2.3.2-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (62 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m62.1/62.1 kB\u001b[0m \u001b[31m3.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hCollecting mlflow\n",
+      "  Downloading mlflow-3.3.1-py3-none-any.whl.metadata (30 kB)\n",
+      "Collecting tensorflow\n",
+      "  Downloading tensorflow-2.20.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.5 kB)\n",
+      "Requirement already satisfied: ray[client,default,serve] in ./models-cache/lib/python3.11/site-packages (2.41.0)\n",
+      "Collecting mlflow-skinny==3.3.1 (from mlflow)\n",
+      "  Downloading mlflow_skinny-3.3.1-py3-none-any.whl.metadata (31 kB)\n",
+      "Collecting mlflow-tracing==3.3.1 (from mlflow)\n",
+      "  Downloading mlflow_tracing-3.3.1-py3-none-any.whl.metadata (19 kB)\n",
+      "Collecting Flask<4 (from mlflow)\n",
+      "  Downloading flask-3.1.2-py3-none-any.whl.metadata (3.2 kB)\n",
+      "Requirement already satisfied: alembic!=1.10.0,<2 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from mlflow) (1.13.1)\n",
+      "Collecting cryptography<46,>=43.0.0 (from mlflow)\n",
+      "  Downloading cryptography-45.0.6-cp311-abi3-manylinux_2_34_x86_64.whl.metadata (5.7 kB)\n",
+      "Requirement already satisfied: docker<8,>=4.0.0 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from mlflow) (7.1.0)\n",
+      "Collecting graphene<4 (from mlflow)\n",
+      "  Downloading graphene-3.4.3-py2.py3-none-any.whl.metadata (6.9 kB)\n",
+      "Collecting gunicorn<24 (from mlflow)\n",
+      "  Downloading gunicorn-23.0.0-py3-none-any.whl.metadata (4.4 kB)\n",
+      "Collecting matplotlib<4 (from mlflow)\n",
+      "  Downloading matplotlib-3.10.5-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (11 kB)\n",
+      "Collecting pandas<3 (from mlflow)\n",
+      "  Downloading pandas-2.3.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (91 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m91.2/91.2 kB\u001b[0m \u001b[31m12.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hCollecting pyarrow<22,>=4.0.0 (from mlflow)\n",
+      "  Downloading pyarrow-21.0.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (3.3 kB)\n",
+      "Collecting scikit-learn<2 (from mlflow)\n",
+      "  Downloading scikit_learn-1.7.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (11 kB)\n",
+      "Collecting scipy<2 (from mlflow)\n",
+      "  Downloading scipy-1.16.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (61 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m62.0/62.0 kB\u001b[0m \u001b[31m8.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hRequirement already satisfied: sqlalchemy<3,>=1.4.0 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from mlflow) (2.0.30)\n",
+      "Collecting cachetools<7,>=5.0.0 (from mlflow-skinny==3.3.1->mlflow)\n",
+      "  Downloading cachetools-6.2.0-py3-none-any.whl.metadata (5.4 kB)\n",
+      "Requirement already satisfied: click<9,>=7.0 in ./models-cache/lib/python3.11/site-packages (from mlflow-skinny==3.3.1->mlflow) (8.2.1)\n",
+      "Collecting cloudpickle<4 (from mlflow-skinny==3.3.1->mlflow)\n",
+      "  Downloading cloudpickle-3.1.1-py3-none-any.whl.metadata (7.1 kB)\n",
+      "Collecting databricks-sdk<1,>=0.20.0 (from mlflow-skinny==3.3.1->mlflow)\n",
+      "  Downloading databricks_sdk-0.64.0-py3-none-any.whl.metadata (39 kB)\n",
+      "Collecting fastapi<1 (from mlflow-skinny==3.3.1->mlflow)\n",
+      "  Downloading fastapi-0.116.1-py3-none-any.whl.metadata (28 kB)\n",
+      "Collecting gitpython<4,>=3.1.9 (from mlflow-skinny==3.3.1->mlflow)\n",
+      "  Downloading gitpython-3.1.45-py3-none-any.whl.metadata (13 kB)\n",
+      "Collecting importlib_metadata!=4.7.0,<9,>=3.7.0 (from mlflow-skinny==3.3.1->mlflow)\n",
+      "  Downloading importlib_metadata-8.7.0-py3-none-any.whl.metadata (4.8 kB)\n",
+      "Collecting opentelemetry-api<3,>=1.9.0 (from mlflow-skinny==3.3.1->mlflow)\n",
+      "  Downloading opentelemetry_api-1.36.0-py3-none-any.whl.metadata (1.5 kB)\n",
+      "Collecting opentelemetry-sdk<3,>=1.9.0 (from mlflow-skinny==3.3.1->mlflow)\n",
+      "  Downloading opentelemetry_sdk-1.36.0-py3-none-any.whl.metadata (1.5 kB)\n",
+      "Requirement already satisfied: packaging<26 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from mlflow-skinny==3.3.1->mlflow) (23.2)\n",
+      "Requirement already satisfied: protobuf<7,>=3.12.0 in ./models-cache/lib/python3.11/site-packages (from mlflow-skinny==3.3.1->mlflow) (6.32.0)\n",
+      "Requirement already satisfied: pydantic<3,>=1.10.8 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from mlflow-skinny==3.3.1->mlflow) (2.7.3)\n",
+      "Requirement already satisfied: pyyaml<7,>=5.1 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from mlflow-skinny==3.3.1->mlflow) (6.0.1)\n",
+      "Requirement already satisfied: requests<3,>=2.17.3 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from mlflow-skinny==3.3.1->mlflow) (2.31.0)\n",
+      "Collecting sqlparse<1,>=0.4.0 (from mlflow-skinny==3.3.1->mlflow)\n",
+      "  Downloading sqlparse-0.5.3-py3-none-any.whl.metadata (3.9 kB)\n",
+      "Requirement already satisfied: typing-extensions<5,>=4.0.0 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from mlflow-skinny==3.3.1->mlflow) (4.12.2)\n",
+      "Collecting uvicorn<1 (from mlflow-skinny==3.3.1->mlflow)\n",
+      "  Downloading uvicorn-0.35.0-py3-none-any.whl.metadata (6.5 kB)\n",
+      "Collecting absl-py>=1.0.0 (from tensorflow)\n",
+      "  Downloading absl_py-2.3.1-py3-none-any.whl.metadata (3.3 kB)\n",
+      "Collecting astunparse>=1.6.0 (from tensorflow)\n",
+      "  Downloading astunparse-1.6.3-py2.py3-none-any.whl.metadata (4.4 kB)\n",
+      "Collecting flatbuffers>=24.3.25 (from tensorflow)\n",
+      "  Downloading flatbuffers-25.2.10-py2.py3-none-any.whl.metadata (875 bytes)\n",
+      "Collecting gast!=0.5.0,!=0.5.1,!=0.5.2,>=0.2.1 (from tensorflow)\n",
+      "  Downloading gast-0.6.0-py3-none-any.whl.metadata (1.3 kB)\n",
+      "Collecting google_pasta>=0.1.1 (from tensorflow)\n",
+      "  Downloading google_pasta-0.2.0-py3-none-any.whl.metadata (814 bytes)\n",
+      "Collecting libclang>=13.0.0 (from tensorflow)\n",
+      "  Downloading libclang-18.1.1-py2.py3-none-manylinux2010_x86_64.whl.metadata (5.2 kB)\n",
+      "Collecting opt_einsum>=2.3.2 (from tensorflow)\n",
+      "  Downloading opt_einsum-3.4.0-py3-none-any.whl.metadata (6.3 kB)\n",
+      "Requirement already satisfied: setuptools in /opt/bitnami/miniconda/lib/python3.11/site-packages (from tensorflow) (69.5.1)\n",
+      "Requirement already satisfied: six>=1.12.0 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from tensorflow) (1.16.0)\n",
+      "Collecting termcolor>=1.1.0 (from tensorflow)\n",
+      "  Downloading termcolor-3.1.0-py3-none-any.whl.metadata (6.4 kB)\n",
+      "Collecting wrapt>=1.11.0 (from tensorflow)\n",
+      "  Downloading wrapt-1.17.3-cp311-cp311-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl.metadata (6.4 kB)\n",
+      "Collecting grpcio<2.0,>=1.24.3 (from tensorflow)\n",
+      "  Downloading grpcio-1.74.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.8 kB)\n",
+      "Collecting tensorboard~=2.20.0 (from tensorflow)\n",
+      "  Downloading tensorboard-2.20.0-py3-none-any.whl.metadata (1.8 kB)\n",
+      "Collecting keras>=3.10.0 (from tensorflow)\n",
+      "  Downloading keras-3.11.3-py3-none-any.whl.metadata (5.9 kB)\n",
+      "Collecting h5py>=3.11.0 (from tensorflow)\n",
+      "  Downloading h5py-3.14.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (2.7 kB)\n",
+      "Collecting ml_dtypes<1.0.0,>=0.5.1 (from tensorflow)\n",
+      "  Downloading ml_dtypes-0.5.3-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (8.9 kB)\n",
+      "Requirement already satisfied: filelock in ./models-cache/lib/python3.11/site-packages (from ray[client,default,serve]) (3.19.1)\n",
+      "Requirement already satisfied: jsonschema in /opt/bitnami/miniconda/lib/python3.11/site-packages (from ray[client,default,serve]) (4.22.0)\n",
+      "Requirement already satisfied: msgpack<2.0.0,>=1.0.0 in ./models-cache/lib/python3.11/site-packages (from ray[client,default,serve]) (1.1.1)\n",
+      "Requirement already satisfied: aiosignal in /opt/bitnami/miniconda/lib/python3.11/site-packages (from ray[client,default,serve]) (1.3.1)\n",
+      "Requirement already satisfied: frozenlist in /opt/bitnami/miniconda/lib/python3.11/site-packages (from ray[client,default,serve]) (1.4.1)\n",
+      "Requirement already satisfied: aiohttp>=3.7 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from ray[client,default,serve]) (3.9.5)\n",
+      "Collecting aiohttp-cors (from ray[client,default,serve])\n",
+      "  Downloading aiohttp_cors-0.8.1-py3-none-any.whl.metadata (20 kB)\n",
+      "Collecting colorful (from ray[client,default,serve])\n",
+      "  Downloading colorful-0.5.7-py2.py3-none-any.whl.metadata (16 kB)\n",
+      "Collecting opencensus (from ray[client,default,serve])\n",
+      "  Downloading opencensus-0.11.4-py2.py3-none-any.whl.metadata (12 kB)\n",
+      "Requirement already satisfied: prometheus-client>=0.7.1 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from ray[client,default,serve]) (0.20.0)\n",
+      "Collecting smart-open (from ray[client,default,serve])\n",
+      "  Downloading smart_open-7.3.0.post1-py3-none-any.whl.metadata (24 kB)\n",
+      "Collecting virtualenv!=20.21.1,>=20.0.24 (from ray[client,default,serve])\n",
+      "  Downloading virtualenv-20.34.0-py3-none-any.whl.metadata (4.6 kB)\n",
+      "Collecting py-spy>=0.2.0 (from ray[client,default,serve])\n",
+      "  Downloading py_spy-0.4.1-py2.py3-none-manylinux_2_5_x86_64.manylinux1_x86_64.whl.metadata (510 bytes)\n",
+      "Collecting starlette (from ray[client,default,serve])\n",
+      "  Downloading starlette-0.47.3-py3-none-any.whl.metadata (6.2 kB)\n",
+      "Collecting watchfiles (from ray[client,default,serve])\n",
+      "  Downloading watchfiles-1.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.9 kB)\n",
+      "Requirement already satisfied: attrs>=17.3.0 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from aiohttp>=3.7->ray[client,default,serve]) (23.2.0)\n",
+      "Requirement already satisfied: multidict<7.0,>=4.5 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from aiohttp>=3.7->ray[client,default,serve]) (6.0.5)\n",
+      "Requirement already satisfied: yarl<2.0,>=1.0 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from aiohttp>=3.7->ray[client,default,serve]) (1.9.4)\n",
+      "Requirement already satisfied: Mako in /opt/bitnami/miniconda/lib/python3.11/site-packages (from alembic!=1.10.0,<2->mlflow) (1.3.5)\n",
+      "Requirement already satisfied: wheel<1.0,>=0.23.0 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from astunparse>=1.6.0->tensorflow) (0.43.0)\n",
+      "Requirement already satisfied: cffi>=1.14 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from cryptography<46,>=43.0.0->mlflow) (1.16.0)\n",
+      "Requirement already satisfied: urllib3>=1.26.0 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from docker<8,>=4.0.0->mlflow) (2.1.0)\n",
+      "Collecting blinker>=1.9.0 (from Flask<4->mlflow)\n",
+      "  Downloading blinker-1.9.0-py3-none-any.whl.metadata (1.6 kB)\n",
+      "Collecting itsdangerous>=2.2.0 (from Flask<4->mlflow)\n",
+      "  Downloading itsdangerous-2.2.0-py3-none-any.whl.metadata (1.9 kB)\n",
+      "Requirement already satisfied: jinja2>=3.1.2 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from Flask<4->mlflow) (3.1.4)\n",
+      "Requirement already satisfied: markupsafe>=2.1.1 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from Flask<4->mlflow) (2.1.5)\n",
+      "Collecting werkzeug>=3.1.0 (from Flask<4->mlflow)\n",
+      "  Downloading werkzeug-3.1.3-py3-none-any.whl.metadata (3.7 kB)\n",
+      "Collecting graphql-core<3.3,>=3.1 (from graphene<4->mlflow)\n",
+      "  Downloading graphql_core-3.2.6-py3-none-any.whl.metadata (11 kB)\n",
+      "Collecting graphql-relay<3.3,>=3.1 (from graphene<4->mlflow)\n",
+      "  Downloading graphql_relay-3.2.0-py3-none-any.whl.metadata (12 kB)\n",
+      "Requirement already satisfied: python-dateutil<3,>=2.7.0 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from graphene<4->mlflow) (2.9.0.post0)\n",
+      "Collecting rich (from keras>=3.10.0->tensorflow)\n",
+      "  Downloading rich-14.1.0-py3-none-any.whl.metadata (18 kB)\n",
+      "Collecting namex (from keras>=3.10.0->tensorflow)\n",
+      "  Downloading namex-0.1.0-py3-none-any.whl.metadata (322 bytes)\n",
+      "Collecting optree (from keras>=3.10.0->tensorflow)\n",
+      "  Downloading optree-0.17.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (33 kB)\n",
+      "Collecting contourpy>=1.0.1 (from matplotlib<4->mlflow)\n",
+      "  Downloading contourpy-1.3.3-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (5.5 kB)\n",
+      "Collecting cycler>=0.10 (from matplotlib<4->mlflow)\n",
+      "  Downloading cycler-0.12.1-py3-none-any.whl.metadata (3.8 kB)\n",
+      "Collecting fonttools>=4.22.0 (from matplotlib<4->mlflow)\n",
+      "  Downloading fonttools-4.59.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (108 kB)\n",
+      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m108.9/108.9 kB\u001b[0m \u001b[31m11.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hCollecting kiwisolver>=1.3.1 (from matplotlib<4->mlflow)\n",
+      "  Downloading kiwisolver-1.4.9-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (6.3 kB)\n",
+      "Collecting pillow>=8 (from matplotlib<4->mlflow)\n",
+      "  Downloading pillow-11.3.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (9.0 kB)\n",
+      "Collecting pyparsing>=2.3.1 (from matplotlib<4->mlflow)\n",
+      "  Downloading pyparsing-3.2.3-py3-none-any.whl.metadata (5.0 kB)\n",
+      "Collecting pytz>=2020.1 (from pandas<3->mlflow)\n",
+      "  Downloading pytz-2025.2-py2.py3-none-any.whl.metadata (22 kB)\n",
+      "Collecting tzdata>=2022.7 (from pandas<3->mlflow)\n",
+      "  Downloading tzdata-2025.2-py2.py3-none-any.whl.metadata (1.4 kB)\n",
+      "Requirement already satisfied: annotated-types>=0.4.0 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from pydantic<3,>=1.10.8->mlflow-skinny==3.3.1->mlflow) (0.7.0)\n",
+      "Requirement already satisfied: pydantic-core==2.18.4 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from pydantic<3,>=1.10.8->mlflow-skinny==3.3.1->mlflow) (2.18.4)\n",
+      "Requirement already satisfied: charset-normalizer<4,>=2 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from requests<3,>=2.17.3->mlflow-skinny==3.3.1->mlflow) (2.0.4)\n",
+      "Requirement already satisfied: idna<4,>=2.5 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from requests<3,>=2.17.3->mlflow-skinny==3.3.1->mlflow) (3.7)\n",
+      "Requirement already satisfied: certifi>=2017.4.17 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from requests<3,>=2.17.3->mlflow-skinny==3.3.1->mlflow) (2024.2.2)\n",
+      "Collecting joblib>=1.2.0 (from scikit-learn<2->mlflow)\n",
+      "  Downloading joblib-1.5.1-py3-none-any.whl.metadata (5.6 kB)\n",
+      "Collecting threadpoolctl>=3.1.0 (from scikit-learn<2->mlflow)\n",
+      "  Downloading threadpoolctl-3.6.0-py3-none-any.whl.metadata (13 kB)\n",
+      "Requirement already satisfied: greenlet!=0.4.17 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from sqlalchemy<3,>=1.4.0->mlflow) (3.0.3)\n",
+      "Requirement already satisfied: anyio<5,>=3.6.2 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from starlette->ray[client,default,serve]) (4.4.0)\n",
+      "Collecting markdown>=2.6.8 (from tensorboard~=2.20.0->tensorflow)\n",
+      "  Downloading markdown-3.8.2-py3-none-any.whl.metadata (5.1 kB)\n",
+      "Collecting tensorboard-data-server<0.8.0,>=0.7.0 (from tensorboard~=2.20.0->tensorflow)\n",
+      "  Downloading tensorboard_data_server-0.7.2-py3-none-manylinux_2_31_x86_64.whl.metadata (1.1 kB)\n",
+      "Collecting distlib<1,>=0.3.7 (from virtualenv!=20.21.1,>=20.0.24->ray[client,default,serve])\n",
+      "  Downloading distlib-0.4.0-py2.py3-none-any.whl.metadata (5.2 kB)\n",
+      "Requirement already satisfied: platformdirs<5,>=3.9.1 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from virtualenv!=20.21.1,>=20.0.24->ray[client,default,serve]) (3.10.0)\n",
+      "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from jsonschema->ray[client,default,serve]) (2023.12.1)\n",
+      "Requirement already satisfied: referencing>=0.28.4 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from jsonschema->ray[client,default,serve]) (0.35.1)\n",
+      "Requirement already satisfied: rpds-py>=0.7.1 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from jsonschema->ray[client,default,serve]) (0.18.1)\n",
+      "Collecting opencensus-context>=0.1.3 (from opencensus->ray[client,default,serve])\n",
+      "  Downloading opencensus_context-0.1.3-py2.py3-none-any.whl.metadata (3.3 kB)\n",
+      "Collecting google-api-core<3.0.0,>=1.0.0 (from opencensus->ray[client,default,serve])\n",
+      "  Downloading google_api_core-2.25.1-py3-none-any.whl.metadata (3.0 kB)\n",
+      "Requirement already satisfied: h11>=0.8 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from uvicorn[standard]; extra == \"serve\"->ray[client,default,serve]) (0.14.0)\n",
+      "Collecting httptools>=0.6.3 (from uvicorn[standard]; extra == \"serve\"->ray[client,default,serve])\n",
+      "  Downloading httptools-0.6.4-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.6 kB)\n",
+      "Collecting python-dotenv>=0.13 (from uvicorn[standard]; extra == \"serve\"->ray[client,default,serve])\n",
+      "  Downloading python_dotenv-1.1.1-py3-none-any.whl.metadata (24 kB)\n",
+      "Collecting uvloop>=0.15.1 (from uvicorn[standard]; extra == \"serve\"->ray[client,default,serve])\n",
+      "  Downloading uvloop-0.21.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.9 kB)\n",
+      "Collecting websockets>=10.4 (from uvicorn[standard]; extra == \"serve\"->ray[client,default,serve])\n",
+      "  Downloading websockets-15.0.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.8 kB)\n",
+      "Requirement already satisfied: sniffio>=1.1 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from anyio<5,>=3.6.2->starlette->ray[client,default,serve]) (1.3.1)\n",
+      "Requirement already satisfied: pycparser in /opt/bitnami/miniconda/lib/python3.11/site-packages (from cffi>=1.14->cryptography<46,>=43.0.0->mlflow) (2.21)\n",
+      "Collecting google-auth~=2.0 (from databricks-sdk<1,>=0.20.0->mlflow-skinny==3.3.1->mlflow)\n",
+      "  Downloading google_auth-2.40.3-py2.py3-none-any.whl.metadata (6.2 kB)\n",
+      "Collecting gitdb<5,>=4.0.1 (from gitpython<4,>=3.1.9->mlflow-skinny==3.3.1->mlflow)\n",
+      "  Downloading gitdb-4.0.12-py3-none-any.whl.metadata (1.2 kB)\n",
+      "Collecting googleapis-common-protos<2.0.0,>=1.56.2 (from google-api-core<3.0.0,>=1.0.0->opencensus->ray[client,default,serve])\n",
+      "  Downloading googleapis_common_protos-1.70.0-py3-none-any.whl.metadata (9.3 kB)\n",
+      "Collecting proto-plus<2.0.0,>=1.22.3 (from google-api-core<3.0.0,>=1.0.0->opencensus->ray[client,default,serve])\n",
+      "  Downloading proto_plus-1.26.1-py3-none-any.whl.metadata (2.2 kB)\n",
+      "Collecting zipp>=3.20 (from importlib_metadata!=4.7.0,<9,>=3.7.0->mlflow-skinny==3.3.1->mlflow)\n",
+      "  Downloading zipp-3.23.0-py3-none-any.whl.metadata (3.6 kB)\n",
+      "Collecting opentelemetry-semantic-conventions==0.57b0 (from opentelemetry-sdk<3,>=1.9.0->mlflow-skinny==3.3.1->mlflow)\n",
+      "  Downloading opentelemetry_semantic_conventions-0.57b0-py3-none-any.whl.metadata (2.4 kB)\n",
+      "Collecting markdown-it-py>=2.2.0 (from rich->keras>=3.10.0->tensorflow)\n",
+      "  Downloading markdown_it_py-4.0.0-py3-none-any.whl.metadata (7.3 kB)\n",
+      "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from rich->keras>=3.10.0->tensorflow) (2.18.0)\n",
+      "Collecting smmap<6,>=3.0.1 (from gitdb<5,>=4.0.1->gitpython<4,>=3.1.9->mlflow-skinny==3.3.1->mlflow)\n",
+      "  Downloading smmap-5.0.2-py3-none-any.whl.metadata (4.3 kB)\n",
+      "Collecting cachetools<7,>=5.0.0 (from mlflow-skinny==3.3.1->mlflow)\n",
+      "  Downloading cachetools-5.5.2-py3-none-any.whl.metadata (5.4 kB)\n",
+      "Collecting pyasn1-modules>=0.2.1 (from google-auth~=2.0->databricks-sdk<1,>=0.20.0->mlflow-skinny==3.3.1->mlflow)\n",
+      "  Downloading pyasn1_modules-0.4.2-py3-none-any.whl.metadata (3.5 kB)\n",
+      "Collecting rsa<5,>=3.1.4 (from google-auth~=2.0->databricks-sdk<1,>=0.20.0->mlflow-skinny==3.3.1->mlflow)\n",
+      "  Downloading rsa-4.9.1-py3-none-any.whl.metadata (5.6 kB)\n",
+      "Collecting mdurl~=0.1 (from markdown-it-py>=2.2.0->rich->keras>=3.10.0->tensorflow)\n",
+      "  Downloading mdurl-0.1.2-py3-none-any.whl.metadata (1.6 kB)\n",
+      "Collecting pyasn1<0.7.0,>=0.6.1 (from pyasn1-modules>=0.2.1->google-auth~=2.0->databricks-sdk<1,>=0.20.0->mlflow-skinny==3.3.1->mlflow)\n",
+      "  Downloading pyasn1-0.6.1-py3-none-any.whl.metadata (8.4 kB)\n",
+      "Downloading numpy-2.3.2-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (16.9 MB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m16.9/16.9 MB\u001b[0m \u001b[31m119.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n",
+      "\u001b[?25hDownloading mlflow-3.3.1-py3-none-any.whl (26.4 MB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m26.4/26.4 MB\u001b[0m \u001b[31m87.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m00:01\u001b[0m\n",
+      "\u001b[?25hDownloading mlflow_skinny-3.3.1-py3-none-any.whl (2.0 MB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.0/2.0 MB\u001b[0m \u001b[31m104.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading mlflow_tracing-3.3.1-py3-none-any.whl (1.1 MB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.1/1.1 MB\u001b[0m \u001b[31m67.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading tensorflow-2.20.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (620.6 MB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m620.6/620.6 MB\u001b[0m \u001b[31m2.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m00:01\u001b[0m\n",
+      "\u001b[?25hDownloading absl_py-2.3.1-py3-none-any.whl (135 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m135.8/135.8 kB\u001b[0m \u001b[31m16.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading astunparse-1.6.3-py2.py3-none-any.whl (12 kB)\n",
+      "Downloading cryptography-45.0.6-cp311-abi3-manylinux_2_34_x86_64.whl (4.5 MB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.5/4.5 MB\u001b[0m \u001b[31m121.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m\n",
+      "\u001b[?25hDownloading fastapi-0.116.1-py3-none-any.whl (95 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m95.6/95.6 kB\u001b[0m \u001b[31m10.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading flask-3.1.2-py3-none-any.whl (103 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m103.3/103.3 kB\u001b[0m \u001b[31m12.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading flatbuffers-25.2.10-py2.py3-none-any.whl (30 kB)\n",
+      "Downloading gast-0.6.0-py3-none-any.whl (21 kB)\n",
+      "Downloading google_pasta-0.2.0-py3-none-any.whl (57 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m57.5/57.5 kB\u001b[0m \u001b[31m7.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading graphene-3.4.3-py2.py3-none-any.whl (114 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m114.9/114.9 kB\u001b[0m \u001b[31m16.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading grpcio-1.74.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (6.2 MB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.2/6.2 MB\u001b[0m \u001b[31m114.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n",
+      "\u001b[?25hDownloading gunicorn-23.0.0-py3-none-any.whl (85 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m85.0/85.0 kB\u001b[0m \u001b[31m11.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading h5py-3.14.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (4.5 MB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.5/4.5 MB\u001b[0m \u001b[31m128.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m\n",
+      "\u001b[?25hDownloading keras-3.11.3-py3-none-any.whl (1.4 MB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.4/1.4 MB\u001b[0m \u001b[31m81.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading libclang-18.1.1-py2.py3-none-manylinux2010_x86_64.whl (24.5 MB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m24.5/24.5 MB\u001b[0m \u001b[31m102.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n",
+      "\u001b[?25hDownloading matplotlib-3.10.5-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (8.7 MB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m8.7/8.7 MB\u001b[0m \u001b[31m133.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n",
+      "\u001b[?25hDownloading ml_dtypes-0.5.3-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (4.9 MB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.9/4.9 MB\u001b[0m \u001b[31m108.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m\n",
+      "\u001b[?25hDownloading opt_einsum-3.4.0-py3-none-any.whl (71 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m71.9/71.9 kB\u001b[0m \u001b[31m10.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading pandas-2.3.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (12.4 MB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m12.4/12.4 MB\u001b[0m \u001b[31m138.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m0:01\u001b[0m\n",
+      "\u001b[?25hDownloading py_spy-0.4.1-py2.py3-none-manylinux_2_5_x86_64.manylinux1_x86_64.whl (2.8 MB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.8/2.8 MB\u001b[0m \u001b[31m116.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading pyarrow-21.0.0-cp311-cp311-manylinux_2_28_x86_64.whl (42.8 MB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m42.8/42.8 MB\u001b[0m \u001b[31m44.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m00:01\u001b[0m\n",
+      "\u001b[?25hDownloading scikit_learn-1.7.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (9.7 MB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m9.7/9.7 MB\u001b[0m \u001b[31m144.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n",
+      "\u001b[?25hDownloading scipy-1.16.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (35.4 MB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m35.4/35.4 MB\u001b[0m \u001b[31m56.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m00:01\u001b[0m\n",
+      "\u001b[?25hDownloading starlette-0.47.3-py3-none-any.whl (72 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m73.0/73.0 kB\u001b[0m \u001b[31m10.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading tensorboard-2.20.0-py3-none-any.whl (5.5 MB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.5/5.5 MB\u001b[0m \u001b[31m121.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m\n",
+      "\u001b[?25hDownloading termcolor-3.1.0-py3-none-any.whl (7.7 kB)\n",
+      "Downloading virtualenv-20.34.0-py3-none-any.whl (6.0 MB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.0/6.0 MB\u001b[0m \u001b[31m125.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m\n",
+      "\u001b[?25hDownloading wrapt-1.17.3-cp311-cp311-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl (82 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m82.4/82.4 kB\u001b[0m \u001b[31m11.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading aiohttp_cors-0.8.1-py3-none-any.whl (25 kB)\n",
+      "Downloading colorful-0.5.7-py2.py3-none-any.whl (201 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m201.5/201.5 kB\u001b[0m \u001b[31m24.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading opencensus-0.11.4-py2.py3-none-any.whl (128 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m128.2/128.2 kB\u001b[0m \u001b[31m18.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading smart_open-7.3.0.post1-py3-none-any.whl (61 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m61.9/61.9 kB\u001b[0m \u001b[31m9.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading watchfiles-1.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (453 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m453.1/453.1 kB\u001b[0m \u001b[31m49.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading blinker-1.9.0-py3-none-any.whl (8.5 kB)\n",
+      "Downloading cloudpickle-3.1.1-py3-none-any.whl (20 kB)\n",
+      "Downloading contourpy-1.3.3-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (355 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m355.2/355.2 kB\u001b[0m \u001b[31m40.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading cycler-0.12.1-py3-none-any.whl (8.3 kB)\n",
+      "Downloading databricks_sdk-0.64.0-py3-none-any.whl (703 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m703.4/703.4 kB\u001b[0m \u001b[31m52.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading distlib-0.4.0-py2.py3-none-any.whl (469 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m469.0/469.0 kB\u001b[0m \u001b[31m44.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading fonttools-4.59.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (5.0 MB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.0/5.0 MB\u001b[0m \u001b[31m118.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m\n",
+      "\u001b[?25hDownloading gitpython-3.1.45-py3-none-any.whl (208 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m208.2/208.2 kB\u001b[0m \u001b[31m25.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading google_api_core-2.25.1-py3-none-any.whl (160 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m160.8/160.8 kB\u001b[0m \u001b[31m21.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading graphql_core-3.2.6-py3-none-any.whl (203 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m203.4/203.4 kB\u001b[0m \u001b[31m28.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading graphql_relay-3.2.0-py3-none-any.whl (16 kB)\n",
+      "Downloading httptools-0.6.4-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (459 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m459.8/459.8 kB\u001b[0m \u001b[31m44.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading importlib_metadata-8.7.0-py3-none-any.whl (27 kB)\n",
+      "Downloading itsdangerous-2.2.0-py3-none-any.whl (16 kB)\n",
+      "Downloading joblib-1.5.1-py3-none-any.whl (307 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m307.7/307.7 kB\u001b[0m \u001b[31m33.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading kiwisolver-1.4.9-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (1.4 MB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.4/1.4 MB\u001b[0m \u001b[31m80.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading markdown-3.8.2-py3-none-any.whl (106 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m106.8/106.8 kB\u001b[0m \u001b[31m14.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading opencensus_context-0.1.3-py2.py3-none-any.whl (5.1 kB)\n",
+      "Downloading opentelemetry_api-1.36.0-py3-none-any.whl (65 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m65.6/65.6 kB\u001b[0m \u001b[31m8.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading opentelemetry_sdk-1.36.0-py3-none-any.whl (119 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m120.0/120.0 kB\u001b[0m \u001b[31m16.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading opentelemetry_semantic_conventions-0.57b0-py3-none-any.whl (201 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m201.6/201.6 kB\u001b[0m \u001b[31m23.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading pillow-11.3.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (6.6 MB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.6/6.6 MB\u001b[0m \u001b[31m136.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m\n",
+      "\u001b[?25hDownloading pyparsing-3.2.3-py3-none-any.whl (111 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m111.1/111.1 kB\u001b[0m \u001b[31m14.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading python_dotenv-1.1.1-py3-none-any.whl (20 kB)\n",
+      "Downloading pytz-2025.2-py2.py3-none-any.whl (509 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m509.2/509.2 kB\u001b[0m \u001b[31m52.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading sqlparse-0.5.3-py3-none-any.whl (44 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m44.4/44.4 kB\u001b[0m \u001b[31m6.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading tensorboard_data_server-0.7.2-py3-none-manylinux_2_31_x86_64.whl (6.6 MB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.6/6.6 MB\u001b[0m \u001b[31m133.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m\n",
+      "\u001b[?25hDownloading threadpoolctl-3.6.0-py3-none-any.whl (18 kB)\n",
+      "Downloading tzdata-2025.2-py2.py3-none-any.whl (347 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m347.8/347.8 kB\u001b[0m \u001b[31m40.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading uvicorn-0.35.0-py3-none-any.whl (66 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m66.4/66.4 kB\u001b[0m \u001b[31m9.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading uvloop-0.21.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (4.0 MB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.0/4.0 MB\u001b[0m \u001b[31m135.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading websockets-15.0.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (182 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m182.3/182.3 kB\u001b[0m \u001b[31m24.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading werkzeug-3.1.3-py3-none-any.whl (224 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m224.5/224.5 kB\u001b[0m \u001b[31m29.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading namex-0.1.0-py3-none-any.whl (5.9 kB)\n",
+      "Downloading optree-0.17.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (402 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m402.0/402.0 kB\u001b[0m \u001b[31m43.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading rich-14.1.0-py3-none-any.whl (243 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m243.4/243.4 kB\u001b[0m \u001b[31m31.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading gitdb-4.0.12-py3-none-any.whl (62 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m62.8/62.8 kB\u001b[0m \u001b[31m8.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading google_auth-2.40.3-py2.py3-none-any.whl (216 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m216.1/216.1 kB\u001b[0m \u001b[31m25.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading cachetools-5.5.2-py3-none-any.whl (10 kB)\n",
+      "Downloading googleapis_common_protos-1.70.0-py3-none-any.whl (294 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m294.5/294.5 kB\u001b[0m \u001b[31m39.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading markdown_it_py-4.0.0-py3-none-any.whl (87 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m87.3/87.3 kB\u001b[0m \u001b[31m12.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading proto_plus-1.26.1-py3-none-any.whl (50 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m50.2/50.2 kB\u001b[0m \u001b[31m7.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading zipp-3.23.0-py3-none-any.whl (10 kB)\n",
+      "Downloading mdurl-0.1.2-py3-none-any.whl (10.0 kB)\n",
+      "Downloading pyasn1_modules-0.4.2-py3-none-any.whl (181 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m181.3/181.3 kB\u001b[0m \u001b[31m23.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hDownloading rsa-4.9.1-py3-none-any.whl (34 kB)\n",
+      "Downloading smmap-5.0.2-py3-none-any.whl (24 kB)\n",
+      "Downloading pyasn1-0.6.1-py3-none-any.whl (83 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m83.1/83.1 kB\u001b[0m \u001b[31m10.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hInstalling collected packages: pytz, py-spy, opencensus-context, namex, libclang, flatbuffers, distlib, colorful, zipp, wrapt, werkzeug, websockets, virtualenv, uvloop, uvicorn, tzdata, threadpoolctl, termcolor, tensorboard-data-server, sqlparse, smmap, python-dotenv, pyparsing, pyasn1, pyarrow, proto-plus, pillow, optree, opt_einsum, numpy, mdurl, markdown, kiwisolver, joblib, itsdangerous, httptools, gunicorn, grpcio, graphql-core, googleapis-common-protos, google_pasta, gast, fonttools, cycler, cloudpickle, cachetools, blinker, astunparse, absl-py, watchfiles, tensorboard, starlette, smart-open, scipy, rsa, pyasn1-modules, pandas, ml_dtypes, markdown-it-py, importlib_metadata, h5py, graphql-relay, gitdb, Flask, cryptography, contourpy, scikit-learn, rich, opentelemetry-api, matplotlib, graphene, google-auth, gitpython, fastapi, aiohttp-cors, opentelemetry-semantic-conventions, keras, google-api-core, databricks-sdk, tensorflow, opentelemetry-sdk, opencensus, mlflow-tracing, mlflow-skinny, mlflow\n",
+      "\u001b[33m  WARNING: The script websockets is installed in '/tmp/models-cache/bin' which is not on PATH.\n",
+      "  Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\u001b[0m\u001b[33m\n",
+      "\u001b[0m\u001b[33m  WARNING: The script virtualenv is installed in '/tmp/models-cache/bin' which is not on PATH.\n",
+      "  Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\u001b[0m\u001b[33m\n",
+      "\u001b[0m\u001b[33m  WARNING: The script uvicorn is installed in '/tmp/models-cache/bin' which is not on PATH.\n",
+      "  Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\u001b[0m\u001b[33m\n",
+      "\u001b[0m\u001b[33m  WARNING: The script sqlformat is installed in '/tmp/models-cache/bin' which is not on PATH.\n",
+      "  Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\u001b[0m\u001b[33m\n",
+      "\u001b[0m\u001b[33m  WARNING: The script dotenv is installed in '/tmp/models-cache/bin' which is not on PATH.\n",
+      "  Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\u001b[0m\u001b[33m\n",
+      "\u001b[0m\u001b[33m  WARNING: The scripts f2py and numpy-config are installed in '/tmp/models-cache/bin' which is not on PATH.\n",
+      "  Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\u001b[0m\u001b[33m\n",
+      "\u001b[0m\u001b[33m  WARNING: The script markdown_py is installed in '/tmp/models-cache/bin' which is not on PATH.\n",
+      "  Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\u001b[0m\u001b[33m\n",
+      "\u001b[0m\u001b[33m  WARNING: The script gunicorn is installed in '/tmp/models-cache/bin' which is not on PATH.\n",
+      "  Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\u001b[0m\u001b[33m\n",
+      "\u001b[0m\u001b[33m  WARNING: The scripts fonttools, pyftmerge, pyftsubset and ttx are installed in '/tmp/models-cache/bin' which is not on PATH.\n",
+      "  Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\u001b[0m\u001b[33m\n",
+      "\u001b[0m\u001b[33m  WARNING: The script watchfiles is installed in '/tmp/models-cache/bin' which is not on PATH.\n",
+      "  Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\u001b[0m\u001b[33m\n",
+      "\u001b[0m\u001b[33m  WARNING: The script tensorboard is installed in '/tmp/models-cache/bin' which is not on PATH.\n",
+      "  Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\u001b[0m\u001b[33m\n",
+      "\u001b[0m\u001b[33m  WARNING: The scripts pyrsa-decrypt, pyrsa-encrypt, pyrsa-keygen, pyrsa-priv2pub, pyrsa-sign and pyrsa-verify are installed in '/tmp/models-cache/bin' which is not on PATH.\n",
+      "  Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\u001b[0m\u001b[33m\n",
+      "\u001b[0m\u001b[33m  WARNING: The script markdown-it is installed in '/tmp/models-cache/bin' which is not on PATH.\n",
+      "  Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\u001b[0m\u001b[33m\n",
+      "\u001b[0m\u001b[33m  WARNING: The script flask is installed in '/tmp/models-cache/bin' which is not on PATH.\n",
+      "  Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\u001b[0m\u001b[33m\n",
+      "\u001b[0m\u001b[33m  WARNING: The script fastapi is installed in '/tmp/models-cache/bin' which is not on PATH.\n",
+      "  Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\u001b[0m\u001b[33m\n",
+      "\u001b[0m\u001b[33m  WARNING: The scripts import_pb_to_tensorboard, saved_model_cli, tensorboard, tf_upgrade_v2, tflite_convert and toco are installed in '/tmp/models-cache/bin' which is not on PATH.\n",
+      "  Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\u001b[0m\u001b[33m\n",
+      "\u001b[0m\u001b[33m  WARNING: The script mlflow is installed in '/tmp/models-cache/bin' which is not on PATH.\n",
+      "  Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\u001b[0m\u001b[33m\n",
+      "\u001b[0m\u001b[33m  WARNING: The script mlflow is installed in '/tmp/models-cache/bin' which is not on PATH.\n",
+      "  Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\u001b[0m\u001b[33m\n",
+      "\u001b[0m\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
+      "pyopenssl 24.1.0 requires cryptography<43,>=41.0.5, but you have cryptography 45.0.6 which is incompatible.\u001b[0m\u001b[31m\n",
+      "\u001b[0mSuccessfully installed Flask-3.1.2 absl-py-2.3.1 aiohttp-cors-0.8.1 astunparse-1.6.3 blinker-1.9.0 cachetools-5.5.2 cloudpickle-3.1.1 colorful-0.5.7 contourpy-1.3.3 cryptography-45.0.6 cycler-0.12.1 databricks-sdk-0.64.0 distlib-0.4.0 fastapi-0.116.1 flatbuffers-25.2.10 fonttools-4.59.1 gast-0.6.0 gitdb-4.0.12 gitpython-3.1.45 google-api-core-2.25.1 google-auth-2.40.3 google_pasta-0.2.0 googleapis-common-protos-1.70.0 graphene-3.4.3 graphql-core-3.2.6 graphql-relay-3.2.0 grpcio-1.74.0 gunicorn-23.0.0 h5py-3.14.0 httptools-0.6.4 importlib_metadata-8.7.0 itsdangerous-2.2.0 joblib-1.5.1 keras-3.11.3 kiwisolver-1.4.9 libclang-18.1.1 markdown-3.8.2 markdown-it-py-4.0.0 matplotlib-3.10.5 mdurl-0.1.2 ml_dtypes-0.5.3 mlflow-3.3.1 mlflow-skinny-3.3.1 mlflow-tracing-3.3.1 namex-0.1.0 numpy-2.3.2 opencensus-0.11.4 opencensus-context-0.1.3 opentelemetry-api-1.36.0 opentelemetry-sdk-1.36.0 opentelemetry-semantic-conventions-0.57b0 opt_einsum-3.4.0 optree-0.17.0 pandas-2.3.2 pillow-11.3.0 proto-plus-1.26.1 py-spy-0.4.1 pyarrow-21.0.0 pyasn1-0.6.1 pyasn1-modules-0.4.2 pyparsing-3.2.3 python-dotenv-1.1.1 pytz-2025.2 rich-14.1.0 rsa-4.9.1 scikit-learn-1.7.1 scipy-1.16.1 smart-open-7.3.0.post1 smmap-5.0.2 sqlparse-0.5.3 starlette-0.47.3 tensorboard-2.20.0 tensorboard-data-server-0.7.2 tensorflow-2.20.0 termcolor-3.1.0 threadpoolctl-3.6.0 tzdata-2025.2 uvicorn-0.35.0 uvloop-0.21.0 virtualenv-20.34.0 watchfiles-1.1.0 websockets-15.0.1 werkzeug-3.1.3 wrapt-1.17.3 zipp-3.23.0\n"
+     ]
+    }
+   ],
+   "source": [
+    "!pip install numpy mlflow tensorflow \"ray[serve,default,client]\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "e8deec5c-6239-4087-8a4d-27c091e9fc3c",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2025-08-27 12:00:23.577265: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.\n",
+      "2025-08-27 12:00:23.626853: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n",
+      "To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
+      "2025-08-27 12:00:25.157402: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.\n",
+      "2025/08/27 12:00:27 INFO mlflow.tracking.fluent: Experiment with name 'Diabetes_Prediction_TensorFlow' does not exist. Creating a new experiment.\n",
+      "2025/08/27 12:00:27 WARNING mlflow.tracking.context.registry: Encountered unexpected error during resolving tags: 'getpwuid(): uid not found: 1001'\n",
+      "2025/08/27 12:00:27 WARNING mlflow.utils.git_utils: Failed to import Git (the Git executable is probably not on your PATH), so Git SHA is not available. Error: Failed to initialize: Bad git executable.\n",
+      "The git executable must be specified in one of the following ways:\n",
+      "    - be included in your $PATH\n",
+      "    - be set via $GIT_PYTHON_GIT_EXECUTABLE\n",
+      "    - explicitly set via git.refresh(<full-path-to-git-executable>)\n",
+      "\n",
+      "All git commands will error until this is rectified.\n",
+      "\n",
+      "This initial message can be silenced or aggravated in the future by setting the\n",
+      "$GIT_PYTHON_REFRESH environment variable. Use one of the following values:\n",
+      "    - quiet|q|silence|s|silent|none|n|0: for no message or exception\n",
+      "    - warn|w|warning|log|l|1: for a warning message (logging level CRITICAL, displayed by default)\n",
+      "    - error|e|exception|raise|r|2: for a raised exception\n",
+      "\n",
+      "Example:\n",
+      "    export GIT_PYTHON_REFRESH=quiet\n",
+      "\n",
+      "/tmp/models-cache/lib/python3.11/site-packages/keras/src/layers/core/dense.py:92: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.\n",
+      "  super().__init__(activity_regularizer=activity_regularizer, **kwargs)\n",
+      "2025-08-27 12:00:29.352582: E external/local_xla/xla/stream_executor/cuda/cuda_platform.cc:51] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "🏃 View run classy-wren-479 at: http://ai-starter-kit-mlflow:5000/#/experiments/1/runs/7ca28b8521a049dc8a014d4235909db3\n",
+      "🧪 View experiment at: http://ai-starter-kit-mlflow:5000/#/experiments/1\n"
+     ]
+    }
+   ],
+   "source": [
+    "import mlflow\n",
+    "import mlflow.tensorflow\n",
+    "import numpy as np\n",
+    "\n",
+    "from sklearn.datasets import load_diabetes\n",
+    "from sklearn.model_selection import train_test_split\n",
+    "from sklearn.preprocessing import StandardScaler\n",
+    "\n",
+    "import tensorflow as tf\n",
+    "from tensorflow import keras\n",
+    "from tensorflow.keras import layers\n",
+    "\n",
+    "# -------------------\n",
+    "# Prepare Data\n",
+    "# -------------------\n",
+    "data = load_diabetes()\n",
+    "X = data.data\n",
+    "y = data.target.reshape(-1, 1)\n",
+    "\n",
+    "X_train, X_test, y_train, y_test = train_test_split(\n",
+    "    X, y, test_size=0.2, random_state=42\n",
+    ")\n",
+    "\n",
+    "# -------------------\n",
+    "# Define Model\n",
+    "# -------------------\n",
+    "def create_model(input_dim):\n",
+    "    model = keras.Sequential([\n",
+    "        layers.Dense(64, activation=\"relu\", input_shape=(input_dim,)),\n",
+    "        layers.Dense(32, activation=\"relu\"),\n",
+    "        layers.Dense(1)  # regression output\n",
+    "    ])\n",
+    "    model.compile(optimizer=\"adam\", loss=\"mse\", metrics=[\"mse\"])\n",
+    "    return model\n",
+    "\n",
+    "input_dim = X_train.shape[1]\n",
+    "epochs = 50\n",
+    "batch_size = 32\n",
+    "\n",
+    "mlflow.set_experiment(\"Diabetes_Prediction_TensorFlow\")\n",
+    "\n",
+    "with mlflow.start_run():\n",
+    "    mlflow.log_param(\"epochs\", epochs)\n",
+    "    mlflow.log_param(\"batch_size\", batch_size)\n",
+    "    mlflow.log_param(\"optimizer\", \"adam\")\n",
+    "    mlflow.log_param(\"loss_fn\", \"mse\")\n",
+    "    mlflow.log_param(\"input_features\", input_dim)\n",
+    "\n",
+    "    model = create_model(input_dim)\n",
+    "\n",
+    "    # Train\n",
+    "    history = model.fit(\n",
+    "        X_train, y_train,\n",
+    "        validation_data=(X_test, y_test),\n",
+    "        epochs=epochs,\n",
+    "        batch_size=batch_size,\n",
+    "        verbose=0\n",
+    "    )\n",
+    "\n",
+    "    # Evaluation\n",
+    "    loss, mse = model.evaluate(X_test, y_test, verbose=0)\n",
+    "    rmse = np.sqrt(mse)\n",
+    "\n",
+    "    mlflow.log_metric(\"mse\", mse)\n",
+    "    mlflow.log_metric(\"rmse\", rmse)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "556ae0b2-6fa6-4271-9e7d-553cd7056aab",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/opt/bitnami/miniconda/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
+      "  from .autonotebook import tqdm as notebook_tqdm\n",
+      "2025-08-27 12:00:35,162\tINFO util.py:154 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.\n",
+      "2025-08-27 12:00:35,747\tINFO worker.py:1514 -- Using address ray://ai-starter-kit-kuberay-head-svc:10001 set in the environment variable RAY_ADDRESS\n",
+      "2025-08-27 12:00:35,748\tINFO client_builder.py:244 -- Passing the following kwargs to ray.init() on the server: log_to_driver\n",
+      "SIGTERM handler is not set because current thread is not the main thread.\n",
+      "2025-08-27 12:00:40,043\tWARNING utils.py:1591 -- Python patch version mismatch: The cluster was started with:\n",
+      "    Ray: 2.41.0\n",
+      "    Python: 3.11.11\n",
+      "This process on Ray Client was started with:\n",
+      "    Ray: 2.41.0\n",
+      "    Python: 3.11.9\n",
+      "\n",
+      "\u001b[36m(ProxyActor pid=2818, ip=10.23.2.212)\u001b[0m INFO 2025-08-27 05:00:48,855 proxy 10.23.2.212 -- Proxy starting on node 08fb6ed7e6841998dfb9cc9f99c999cd4317663f09d65db617dbd3dc (HTTP port: 8000).\n",
+      "\u001b[36m(ProxyActor pid=2818)\u001b[0m INFO 2025-08-27 05:00:48,855 proxy 10.23.2.212 -- Proxy starting on node 08fb6ed7e6841998dfb9cc9f99c999cd4317663f09d65db617dbd3dc (HTTP port: 8000).\n",
+      "INFO 2025-08-27 12:00:49,116 serve 124 -- Started Serve in namespace \"serve\".\n",
+      "\u001b[36m(ProxyActor pid=2818, ip=10.23.2.212)\u001b[0m INFO 2025-08-27 05:00:49,087 proxy 10.23.2.212 -- Got updated endpoints: {}.\n",
+      "\u001b[36m(ProxyActor pid=2818)\u001b[0m INFO 2025-08-27 05:00:49,087 proxy 10.23.2.212 -- Got updated endpoints: {}.\n",
+      "\u001b[36m(ServeController pid=2739, ip=10.23.2.212)\u001b[0m INFO 2025-08-27 05:00:49,258 controller 2739 -- Deploying new version of Deployment(name='TensorFlowMLflowDeployment', app='default') (initial target replicas: 1).\n",
+      "\u001b[36m(ServeController pid=2739)\u001b[0m INFO 2025-08-27 05:00:49,258 controller 2739 -- Deploying new version of Deployment(name='TensorFlowMLflowDeployment', app='default') (initial target replicas: 1).\n",
+      "\u001b[36m(ProxyActor pid=2818, ip=10.23.2.212)\u001b[0m INFO 2025-08-27 05:00:49,262 proxy 10.23.2.212 -- Got updated endpoints: {Deployment(name='TensorFlowMLflowDeployment', app='default'): EndpointInfo(route='/predict', app_is_cross_language=False)}.\n",
+      "\u001b[36m(ProxyActor pid=2818)\u001b[0m INFO 2025-08-27 05:00:49,262 proxy 10.23.2.212 -- Got updated endpoints: {Deployment(name='TensorFlowMLflowDeployment', app='default'): EndpointInfo(route='/predict', app_is_cross_language=False)}.\n",
+      "\u001b[36m(ServeController pid=2739, ip=10.23.2.212)\u001b[0m INFO 2025-08-27 05:00:49,363 controller 2739 -- Adding 1 replica to Deployment(name='TensorFlowMLflowDeployment', app='default').\n",
+      "\u001b[36m(ServeController pid=2739)\u001b[0m INFO 2025-08-27 05:00:49,363 controller 2739 -- Adding 1 replica to Deployment(name='TensorFlowMLflowDeployment', app='default').\n",
+      "\u001b[36m(ServeController pid=2739, ip=10.23.2.212)\u001b[0m WARNING 2025-08-27 05:01:19,429 controller 2739 -- Deployment 'TensorFlowMLflowDeployment' in application 'default' has 1 replicas that have taken more than 30s to be scheduled. This may be due to waiting for the cluster to auto-scale or for a runtime environment to be installed. Resources required for each replica: {\"CPU\": 1}, total resources available: {\"CPU\": 7.0}. Use `ray status` for more details.\n",
+      "\u001b[36m(ServeController pid=2739)\u001b[0m WARNING 2025-08-27 05:01:19,429 controller 2739 -- Deployment 'TensorFlowMLflowDeployment' in application 'default' has 1 replicas that have taken more than 30s to be scheduled. This may be due to waiting for the cluster to auto-scale or for a runtime environment to be installed. Resources required for each replica: {\"CPU\": 1}, total resources available: {\"CPU\": 7.0}. Use `ray status` for more details.\n",
+      "\u001b[36m(ServeController pid=2739, ip=10.23.2.212)\u001b[0m WARNING 2025-08-27 05:01:49,531 controller 2739 -- Deployment 'TensorFlowMLflowDeployment' in application 'default' has 1 replicas that have taken more than 30s to be scheduled. This may be due to waiting for the cluster to auto-scale or for a runtime environment to be installed. Resources required for each replica: {\"CPU\": 1}, total resources available: {\"CPU\": 7.0}. Use `ray status` for more details.\n",
+      "\u001b[36m(ServeController pid=2739)\u001b[0m WARNING 2025-08-27 05:01:49,531 controller 2739 -- Deployment 'TensorFlowMLflowDeployment' in application 'default' has 1 replicas that have taken more than 30s to be scheduled. This may be due to waiting for the cluster to auto-scale or for a runtime environment to be installed. Resources required for each replica: {\"CPU\": 1}, total resources available: {\"CPU\": 7.0}. Use `ray status` for more details.\n",
+      "\u001b[36m(ServeReplica:default:TensorFlowMLflowDeployment pid=2265, ip=10.23.2.212)\u001b[0m 2025-08-27 05:02:02.743930: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n",
+      "\u001b[36m(ServeReplica:default:TensorFlowMLflowDeployment pid=2265, ip=10.23.2.212)\u001b[0m To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
+      "\u001b[36m(ServeReplica:default:TensorFlowMLflowDeployment pid=2265)\u001b[0m 2025-08-27 05:02:02.743930: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n",
+      "\u001b[36m(ServeReplica:default:TensorFlowMLflowDeployment pid=2265)\u001b[0m To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
+      "\u001b[36m(ServeReplica:default:TensorFlowMLflowDeployment pid=2265)\u001b[0m 2025-08-27 05:02:07.748054: E external/local_xla/xla/stream_executor/cuda/cuda_platform.cc:51] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)\n",
+      "\u001b[36m(ServeReplica:default:TensorFlowMLflowDeployment pid=2265, ip=10.23.2.212)\u001b[0m 2025-08-27 05:02:07.748054: E external/local_xla/xla/stream_executor/cuda/cuda_platform.cc:51] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[36m(ServeReplica:default:TensorFlowMLflowDeployment pid=2265)\u001b[0m Loading model...\n",
+      "\u001b[36m(ServeReplica:default:TensorFlowMLflowDeployment pid=2265)\u001b[0m Model loaded successfully.\n",
+      "\u001b[36m(ServeReplica:default:TensorFlowMLflowDeployment pid=2265, ip=10.23.2.212)\u001b[0m Loading model...\n",
+      "\u001b[36m(ServeReplica:default:TensorFlowMLflowDeployment pid=2265, ip=10.23.2.212)\u001b[0m Model loaded successfully.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO 2025-08-27 12:02:08,716 serve 124 -- Application 'default' is ready at http://127.0.0.1:8000/predict.\n",
+      "INFO 2025-08-27 12:02:08,718 serve 124 -- Deployed app 'default' successfully.\n",
+      "\u001b[36m(ServeReplica:default:TensorFlowMLflowDeployment pid=2265)\u001b[0m INFO 2025-08-27 05:02:08,967 default_TensorFlowMLflowDeployment 0zpbmyix 63d6b1b4-67f7-4a5c-ad92-6d7989e3cf5a -- CALL __call__ OK 150.7ms\n",
+      "\u001b[36m(ServeReplica:default:TensorFlowMLflowDeployment pid=2265, ip=10.23.2.212)\u001b[0m INFO 2025-08-27 05:02:08,967 default_TensorFlowMLflowDeployment 0zpbmyix 63d6b1b4-67f7-4a5c-ad92-6d7989e3cf5a -- CALL __call__ OK 150.7ms\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 85ms/step\n",
+      "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 85ms/step3.2.212)\u001b[0m \n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\u001b[36m(ProxyActor pid=1775)\u001b[0m INFO 2025-08-27 05:02:09,833 proxy 10.23.2.213 -- Proxy starting on node 45179940bb5a43115519b525607191a8f1a059b70c5c61c14cee8a0f (HTTP port: 8000).\n",
+      "\u001b[36m(ProxyActor pid=1775, ip=10.23.2.212)\u001b[0m INFO 2025-08-27 05:02:09,833 proxy 10.23.2.213 -- Proxy starting on node 45179940bb5a43115519b525607191a8f1a059b70c5c61c14cee8a0f (HTTP port: 8000).\n",
+      "\u001b[36m(ProxyActor pid=1775)\u001b[0m INFO 2025-08-27 05:02:09,992 proxy 10.23.2.213 -- Got updated endpoints: {Deployment(name='TensorFlowMLflowDeployment', app='default'): EndpointInfo(route='/predict', app_is_cross_language=False)}.\n",
+      "\u001b[36m(ProxyActor pid=1775, ip=10.23.2.212)\u001b[0m INFO 2025-08-27 05:02:09,992 proxy 10.23.2.213 -- Got updated endpoints: {Deployment(name='TensorFlowMLflowDeployment', app='default'): EndpointInfo(route='/predict', app_is_cross_language=False)}.\n"
+     ]
+    }
+   ],
+   "source": [
+    "import numpy as np\n",
+    "import mlflow.tensorflow\n",
+    "import tensorflow as tf\n",
+    "from starlette.requests import Request\n",
+    "from typing import Dict\n",
+    "\n",
+    "from ray import serve\n",
+    "import ray\n",
+    "\n",
+    "\n",
+    "@serve.deployment(\n",
+    "    ray_actor_options={\n",
+    "        \"runtime_env\": {\n",
+    "            \"pip\": [\"tensorflow\"]\n",
+    "        },\n",
+    "    }\n",
+    ")\n",
+    "class TensorFlowMLflowDeployment:\n",
+    "    def __init__(self):\n",
+    "        print(\"Loading model...\")\n",
+    "        self.model = model\n",
+    "        print(\"Model loaded successfully.\")\n",
+    "\n",
+    "    async def __call__(self, input_data) -> Dict:\n",
+    "        try:\n",
+    "            if isinstance(input_data, Request):\n",
+    "                data = await input_data.json()\n",
+    "            else:\n",
+    "                data = input_data\n",
+    "            features = data.get(\"features\", None)\n",
+    "            if features is None:\n",
+    "                return {\"error\": \"Missing 'features' in request\"}\n",
+    "            X = np.array(features).reshape(1, -1)\n",
+    "\n",
+    "            # Make prediction with TensorFlow model\n",
+    "            prediction = self.model.predict(X).flatten().tolist()\n",
+    "\n",
+    "            return {\"prediction\": prediction}\n",
+    "        except Exception as e:\n",
+    "            return {\"error\": str(e)}\n",
+    "\n",
+    "\n",
+    "# Bind and deploy\n",
+    "app = TensorFlowMLflowDeployment.bind()\n",
+    "handle = serve.run(app, route_prefix=\"/predict\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "e264af73-6634-412b-9cbc-86b79c18e775",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'prediction': [179.46218872070312]}"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "json_data = {\"features\": [0.0380759, 0.0506801, 0.0616962, 0.0218724, -0.0442235, -0.0348208, -0.0434008, -0.00259226, 0.0199084, -0.0176461]}\n",
+    "response = handle.remote(json_data)\n",
+    "await response"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/requirements.txt b/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/requirements.txt
new file mode 100644
index 000000000..ec119b4e1
--- /dev/null
+++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/requirements.txt
@@ -0,0 +1,10 @@
+transformers
+torch
+tensorflow
+huggingface_hub
+numpy
+ipywidgets
+mlflow==2.19.0
+ollama
+panel
+ray==2.41.0
diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/welcome.ipynb b/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/welcome.ipynb
new file mode 100644
index 000000000..19bc4bdb6
--- /dev/null
+++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/welcome.ipynb
@@ -0,0 +1,104 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "8048aa56-4549-4afa-b8b0-d111cc7020c3",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[[0.7645573019981384, 0.14142529666423798], [0.13549786806106567, 0.5999557375907898]]\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Requires transformers>=4.51.0\n",
+    "\n",
+    "import torch\n",
+    "import torch.nn.functional as F\n",
+    "\n",
+    "from torch import Tensor\n",
+    "from transformers import AutoTokenizer, AutoModel\n",
+    "\n",
+    "\n",
+    "def last_token_pool(last_hidden_states: Tensor,\n",
+    "                 attention_mask: Tensor) -> Tensor:\n",
+    "    left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])\n",
+    "    if left_padding:\n",
+    "        return last_hidden_states[:, -1]\n",
+    "    else:\n",
+    "        sequence_lengths = attention_mask.sum(dim=1) - 1\n",
+    "        batch_size = last_hidden_states.shape[0]\n",
+    "        return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]\n",
+    "\n",
+    "\n",
+    "def get_detailed_instruct(task_description: str, query: str) -> str:\n",
+    "    return f'Instruct: {task_description}\\nQuery:{query}'\n",
+    "\n",
+    "# Each query must come with a one-sentence instruction that describes the task\n",
+    "task = 'Given a web search query, retrieve relevant passages that answer the query'\n",
+    "\n",
+    "queries = [\n",
+    "    get_detailed_instruct(task, 'What is the capital of China?'),\n",
+    "    get_detailed_instruct(task, 'Explain gravity')\n",
+    "]\n",
+    "# No need to add instruction for retrieval documents\n",
+    "documents = [\n",
+    "    \"The capital of China is Beijing.\",\n",
+    "    \"Gravity is a force that attracts two bodies towards each other. It gives weight to physical objects and is responsible for the movement of planets around the sun.\"\n",
+    "]\n",
+    "input_texts = queries + documents\n",
+    "\n",
+    "tokenizer = AutoTokenizer.from_pretrained('Qwen/Qwen3-Embedding-0.6B', padding_side='left')\n",
+    "model = AutoModel.from_pretrained('Qwen/Qwen3-Embedding-0.6B')\n",
+    "\n",
+    "# We recommend enabling flash_attention_2 for better acceleration and memory saving.\n",
+    "# model = AutoModel.from_pretrained('Qwen/Qwen3-Embedding-0.6B', attn_implementation=\"flash_attention_2\", torch_dtype=torch.float16).cuda()\n",
+    "\n",
+    "max_length = 8192\n",
+    "\n",
+    "# Tokenize the input texts\n",
+    "batch_dict = tokenizer(\n",
+    "    input_texts,\n",
+    "    padding=True,\n",
+    "    truncation=True,\n",
+    "    max_length=max_length,\n",
+    "    return_tensors=\"pt\",\n",
+    ")\n",
+    "batch_dict.to(model.device)\n",
+    "outputs = model(**batch_dict)\n",
+    "embeddings = last_token_pool(outputs.last_hidden_state, batch_dict['attention_mask'])\n",
+    "\n",
+    "# normalize embeddings\n",
+    "embeddings = F.normalize(embeddings, p=2, dim=1)\n",
+    "scores = (embeddings[:2] @ embeddings[2:].T)\n",
+    "print(scores.tolist())\n",
+    "# [[0.7645568251609802, 0.14142508804798126], [0.13549736142158508, 0.5999549627304077]]\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.10"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/networkpolicy.yaml b/ai/ai-starter-kit/helm-chart/ai-starter-kit/networkpolicy.yaml
new file mode 100644
index 000000000..d985d3ba8
--- /dev/null
+++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/networkpolicy.yaml
@@ -0,0 +1,46 @@
+apiVersion: networking.k8s.io/v1
+kind: NetworkPolicy
+metadata:
+  name: allow-singleuser-egress-to-ray
+  namespace: default
+spec:
+  podSelector:
+    matchLabels:
+      app: jupyterhub
+      component: singleuser-server
+      release: ai-starter-kit
+  policyTypes: ["Egress"]
+  egress:
+    - to:
+        - podSelector:
+            matchLabels:
+              ray.io/node-type: head
+      ports:
+        - protocol: TCP
+          port: 8265   
+        - protocol: TCP
+          port: 8000  
+        - protocol: TCP
+          port: 10001  
+--- 
+apiVersion: networking.k8s.io/v1
+kind: NetworkPolicy
+metadata:
+  name: allow-singleuser-egress-to-mlflow
+  namespace: default
+spec:
+  podSelector:
+    matchLabels:
+      app: jupyterhub
+      component: singleuser-server
+      release: ai-starter-kit
+  policyTypes: ["Egress"]
+  egress:
+    - to:
+        - podSelector:
+            matchLabels:
+              app.kubernetes.io/name: mlflow
+              app.kubernetes.io/instance: ai-starter-kit
+      ports:
+        - protocol: TCP
+          port: 5000
\ No newline at end of file
diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/NOTES.txt b/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/NOTES.txt
new file mode 100644
index 000000000..4e33a20ed
--- /dev/null
+++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/NOTES.txt
@@ -0,0 +1 @@
+AI Starter Kit installed. Enjoy
\ No newline at end of file
diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/_helpers.tpl b/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/_helpers.tpl
new file mode 100644
index 000000000..cf0c5e081
--- /dev/null
+++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/_helpers.tpl
@@ -0,0 +1,62 @@
+{{/*
+Expand the name of the chart.
+*/}}
+{{- define "ai-starter-kit.name" -}}
+{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Create a default fully qualified app name.
+We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
+If release name contains chart name it will be used as a full name.
+*/}}
+{{- define "ai-starter-kit.fullname" -}}
+{{- if .Values.fullnameOverride }}
+{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- $name := default .Chart.Name .Values.nameOverride }}
+{{- if contains $name .Release.Name }}
+{{- .Release.Name | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
+{{- end }}
+{{- end }}
+{{- end }}
+
+{{/*
+Create chart name and version as used by the chart label.
+*/}}
+{{- define "ai-starter-kit.chart" -}}
+{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Common labels
+*/}}
+{{- define "ai-starter-kit.labels" -}}
+helm.sh/chart: {{ include "ai-starter-kit.chart" . }}
+{{ include "ai-starter-kit.selectorLabels" . }}
+{{- if .Chart.AppVersion }}
+app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
+{{- end }}
+app.kubernetes.io/managed-by: {{ .Release.Service }}
+{{- end }}
+
+{{/*
+Selector labels
+*/}}
+{{- define "ai-starter-kit.selectorLabels" -}}
+app.kubernetes.io/name: {{ include "ai-starter-kit.name" . }}
+app.kubernetes.io/instance: {{ .Release.Name }}
+{{- end }}
+
+{{/*
+Create the name of the service account to use
+*/}}
+{{- define "ai-starter-kit.serviceAccountName" -}}
+{{- if .Values.serviceAccount.create }}
+{{- default (include "ai-starter-kit.fullname" .) .Values.serviceAccount.name }}
+{{- else }}
+{{- default "default" .Values.serviceAccount.name }}
+{{- end }}
+{{- end }}
diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/configmaps.yaml b/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/configmaps.yaml
new file mode 100644
index 000000000..e03429ee9
--- /dev/null
+++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/configmaps.yaml
@@ -0,0 +1,18 @@
+---
+{{- /*
+Create a single ConfigMap with all initialization files for the jupyterhub singleuser pod.
+This ConfigMap is mounted as a volume.
+*/ -}}
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: ai-starter-kit-init-files
+  labels:
+    app.kubernetes.io/managed-by: {{ $.Release.Service | quote }}
+    app.kubernetes.io/instance: {{ $.Release.Name | quote }}
+    helm.sh/chart: "{{ $.Chart.Name }}-{{ $.Chart.Version }}"
+data:
+{{- range $path, $bytes := .Files.Glob "files/*" }}
+  {{ base $path | quote }}: |-
+{{ $bytes | toString | nindent 4 }}
+{{- end }}
diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/generic-device-plugin.yaml b/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/generic-device-plugin.yaml
new file mode 100644
index 000000000..3e387f5ce
--- /dev/null
+++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/generic-device-plugin.yaml
@@ -0,0 +1,65 @@
+{{- if .Values.genericDevicePlugin.enabled }}
+apiVersion: apps/v1
+kind: DaemonSet
+metadata:
+  name: {{ include "ai-starter-kit.fullname" . }}-generic-device-plugin
+  namespace: kube-system
+  labels:
+    {{- include "ai-starter-kit.labels" . | nindent 4 }}
+    app.kubernetes.io/component: generic-device-plugin
+spec:
+  selector:
+    matchLabels:
+      {{- include "ai-starter-kit.selectorLabels" . | nindent 6 }}
+      app.kubernetes.io/component: generic-device-plugin
+  template:
+    metadata:
+      labels:
+        {{- include "ai-starter-kit.selectorLabels" . | nindent 8 }}
+        app.kubernetes.io/component: generic-device-plugin
+    spec:
+      priorityClassName: system-node-critical
+      tolerations:
+      - operator: "Exists"
+        effect: "NoExecute"
+      - operator: "Exists"
+        effect: "NoSchedule"
+      containers:
+      - image: {{ .Values.genericDevicePlugin.image.repository | default "squat/generic-device-plugin" }}:{{ .Values.genericDevicePlugin.image.tag | default "latest" }}
+        imagePullPolicy: {{ .Values.genericDevicePlugin.image.pullPolicy | default "IfNotPresent" }}
+        name: generic-device-plugin
+        args:
+        - --device
+        - |
+          name: dri
+          groups:
+          - count: {{ .Values.genericDevicePlugin.device.count | default 4 }}
+            paths:
+            - path: /dev/dri
+        resources:
+          requests:
+            cpu: {{ .Values.genericDevicePlugin.resources.requests.cpu | default "50m" }}
+            memory: {{ .Values.genericDevicePlugin.resources.requests.memory | default "10Mi" }}
+          limits:
+            cpu: {{ .Values.genericDevicePlugin.resources.limits.cpu | default "50m" }}
+            memory: {{ .Values.genericDevicePlugin.resources.limits.memory | default "20Mi" }}
+        ports:
+        - containerPort: 8080
+          name: http
+        securityContext:
+          privileged: true
+        volumeMounts:
+        - name: device-plugin
+          mountPath: /var/lib/kubelet/device-plugins
+        - name: dev
+          mountPath: /dev
+      volumes:
+      - name: device-plugin
+        hostPath:
+          path: /var/lib/kubelet/device-plugins
+      - name: dev
+        hostPath:
+          path: /dev
+  updateStrategy:
+    type: RollingUpdate
+{{- end }}
\ No newline at end of file
diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/hf-secret.yaml b/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/hf-secret.yaml
new file mode 100644
index 000000000..308b0a94a
--- /dev/null
+++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/hf-secret.yaml
@@ -0,0 +1,13 @@
+{{- if .Values.huggingface.token }}
+apiVersion: v1
+kind: Secret
+metadata:
+  name: {{ .Release.Name }}-hf-token-secret
+  labels:
+    app.kubernetes.io/managed-by: {{ .Release.Service | quote }}
+    app.kubernetes.io/instance: {{ .Release.Name | quote }}
+    helm.sh/chart: "{{ .Chart.Name }}-{{ .Chart.Version }}"
+type: Opaque
+stringData:
+  token: {{ .Values.huggingface.token }}
+{{- end }}
\ No newline at end of file
diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/local-pv.yaml b/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/local-pv.yaml
new file mode 100644
index 000000000..0797b93e3
--- /dev/null
+++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/local-pv.yaml
@@ -0,0 +1,16 @@
+{{- if .Values.localPersistence.enabled }}
+apiVersion: v1
+kind: PersistentVolume
+metadata:
+  name: {{ .Release.Name }}-models-cache-pv
+  labels:
+    type: local
+spec:
+  storageClassName: manual
+  capacity:
+    storage: {{ .Values.modelsCachePvc.size }}
+  accessModes:
+    - ReadWriteOnce
+  hostPath:
+    path: "{{ .Values.localPersistence.hostPath }}"
+{{- end }}
diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/pvc-mc-only.yaml b/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/pvc-mc-only.yaml
new file mode 100644
index 000000000..99179f9f1
--- /dev/null
+++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/pvc-mc-only.yaml
@@ -0,0 +1,28 @@
+{{- if .Values.modelsCacheOnlyPvc.enabled -}}
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: {{ .Release.Name }}-models-cache-only-pvc
+  labels:
+    app.kubernetes.io/managed-by: {{ .Release.Service | quote }}
+    app.kubernetes.io/instance: {{ .Release.Name | quote }}
+    helm.sh/chart: "{{ .Chart.Name }}-{{ .Chart.Version }}"
+spec:
+  accessModes:
+    {{- toYaml .Values.modelsCacheOnlyPvc.accessModes | nindent 4 }}
+  {{- if .Values.localPersistence.enabled }}
+  storageClassName: manual
+  {{- else }}
+    {{- /*
+      If storageClassName is set to a specific class, it will be used.
+      If storageClassName is set to an empty string (""), no storage class will be used for provisioning.
+      If storageClassName is null or omitted, the default storage class will be used.
+    */}}
+    {{- if or .Values.modelsCacheOnlyPvc.storageClassName (eq .Values.modelsCacheOnlyPvc.storageClassName "") }}
+  storageClassName: {{ .Values.modelsCacheOnlyPvc.storageClassName | quote }}
+    {{- end }}
+  {{- end }}
+  resources:
+    requests:
+      storage: {{ .Values.modelsCacheOnlyPvc.size }}
+{{- end -}}
diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/pvc-ray.yaml b/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/pvc-ray.yaml
new file mode 100644
index 000000000..cb4ae5b1d
--- /dev/null
+++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/pvc-ray.yaml
@@ -0,0 +1,28 @@
+{{- if .Values.rayPvc.enabled -}}
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: {{ .Release.Name }}-ray-pvc
+  labels:
+    app.kubernetes.io/managed-by: {{ .Release.Service | quote }}
+    app.kubernetes.io/instance: {{ .Release.Name | quote }}
+    helm.sh/chart: "{{ .Chart.Name }}-{{ .Chart.Version }}"
+spec:
+  accessModes:
+    {{- toYaml .Values.rayPvc.accessModes | nindent 4 }}
+  {{- if .Values.localPersistence.enabled }}
+  storageClassName: manual
+  {{- else }}
+    {{- /*
+      If storageClassName is set to a specific class, it will be used.
+      If storageClassName is set to an empty string (""), no storage class will be used for provisioning.
+      If storageClassName is null or omitted, the default storage class will be used.
+    */}}
+    {{- if or .Values.rayPvc.storageClassName (eq .Values.rayPvc.storageClassName "") }}
+  storageClassName: {{ .Values.rayPvc.storageClassName | quote }}
+    {{- end }}
+  {{- end }}
+  resources:
+    requests:
+      storage: {{ .Values.rayPvc.size }}
+{{- end -}}
diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/pvc.yaml b/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/pvc.yaml
new file mode 100644
index 000000000..36ba98fdc
--- /dev/null
+++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/pvc.yaml
@@ -0,0 +1,28 @@
+{{- if .Values.modelsCachePvc.enabled -}}
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: {{ .Release.Name }}-models-cache-pvc
+  labels:
+    app.kubernetes.io/managed-by: {{ .Release.Service | quote }}
+    app.kubernetes.io/instance: {{ .Release.Name | quote }}
+    helm.sh/chart: "{{ .Chart.Name }}-{{ .Chart.Version }}"
+spec:
+  accessModes:
+    {{- toYaml .Values.modelsCachePvc.accessModes | nindent 4 }}
+  {{- if .Values.localPersistence.enabled }}
+  storageClassName: manual
+  {{- else }}
+    {{- /*
+      If storageClassName is set to a specific class, it will be used.
+      If storageClassName is set to an empty string (""), no storage class will be used for provisioning.
+      If storageClassName is null or omitted, the default storage class will be used.
+    */}}
+    {{- if or .Values.modelsCachePvc.storageClassName (eq .Values.modelsCachePvc.storageClassName "") }}
+  storageClassName: {{ .Values.modelsCachePvc.storageClassName | quote }}
+    {{- end }}
+  {{- end }}
+  resources:
+    requests:
+      storage: {{ .Values.modelsCachePvc.size }}
+{{- end -}}
\ No newline at end of file
diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/ramalama-deployment.yaml b/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/ramalama-deployment.yaml
new file mode 100644
index 000000000..5650c35c4
--- /dev/null
+++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/ramalama-deployment.yaml
@@ -0,0 +1,58 @@
+{{- if .Values.ramalama.enabled }}
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: {{ include "ai-starter-kit.fullname" . }}-ramalama
+  labels:
+    {{- include "ai-starter-kit.labels" . | nindent 4 }}
+    app.kubernetes.io/component: ramalama
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      {{- include "ai-starter-kit.selectorLabels" . | nindent 6 }}
+      app.kubernetes.io/component: ramalama
+  template:
+    metadata:
+      labels:
+        {{- include "ai-starter-kit.selectorLabels" . | nindent 8 }}
+        app.kubernetes.io/component: ramalama
+    spec:
+      {{- if .Values.ramalama.nodeSelector }}
+      nodeSelector:
+        {{- toYaml .Values.ramalama.nodeSelector | nindent 8 }}
+      {{- end }}
+      containers:
+      - name: ramalama
+        image: "{{ .Values.ramalama.image.repository }}:{{ .Values.ramalama.image.tag }}"
+        imagePullPolicy: {{ .Values.ramalama.image.pullPolicy }}
+        {{- if .Values.ramalama.command }}
+        command:
+          {{- toYaml .Values.ramalama.command | nindent 10 }}
+        {{- end }}
+        ports:
+        - containerPort: 8080
+          protocol: TCP
+        {{- if .Values.ramalama.resources }}
+        resources:
+          {{- toYaml .Values.ramalama.resources | nindent 10 }}
+        {{- end }}
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ include "ai-starter-kit.fullname" . }}-ramalama
+  labels:
+    {{- include "ai-starter-kit.labels" . | nindent 4 }}
+    app.kubernetes.io/component: ramalama
+spec:
+  type: ClusterIP
+  ports:
+    - port: 8080
+      targetPort: 8080
+      protocol: TCP
+      name: http
+  selector:
+    {{- include "ai-starter-kit.selectorLabels" . | nindent 4 }}
+    app.kubernetes.io/component: ramalama
+{{- end }}
\ No newline at end of file
diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/values.yaml b/ai/ai-starter-kit/helm-chart/ai-starter-kit/values.yaml
new file mode 100644
index 000000000..3155ea642
--- /dev/null
+++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/values.yaml
@@ -0,0 +1,199 @@
+jupyterhub:
+  nameOverride: "jupyterhub"
+  # This value has to be null in order to apply release name on this chart's resources.
+  # https://github.com/jupyterhub/zero-to-jupyterhub-k8s/blob/b4b51301ac886511c643cc5d428b15ff38006bee/jupyterhub/values.yaml#L1
+  fullnameOverride:
+
+  singleuser:
+    networkPolicy:
+      enabled: false
+    defaultUrl: "/lab/tree/welcome.ipynb"
+    image:
+      name: jupyterhub/k8s-singleuser-sample
+      tag: "4.2.0"
+    initContainers:
+      # This init cntainer makes sure that home folder that we mount has correct owner
+      - name: chown-home-mount-dir
+        image: jupyterhub/k8s-singleuser-sample:4.2.0
+        securityContext:
+          runAsUser: 0
+        command: ["chown", "jovyan", "/home/jovyan"]
+        volumeMounts:
+          - name: home
+            mountPath: /home/jovyan
+            subPath: jupyterhub_workspace
+
+      - name: model-initializer
+        image: jupyterhub/k8s-singleuser-sample:4.2.0
+        env:
+          - name: HF_TOKEN
+            valueFrom:
+              secretKeyRef:
+                name: ai-starter-kit-hf-token-secret
+                key: token
+        command:
+          - /bin/sh
+          - -c
+          - |
+            set -e
+            pip install -r /tmp/requirements.txt
+
+            python /tmp/download_models.py
+
+            # populate workspace with initial notebook files
+            for f in /tmp/*.ipynb; do
+              if [ -f "$f" ]; then
+                # Use cp -n to not overwrite existing files.
+                cp -n "$f" /home/jovyan/
+              fi
+            done
+        volumeMounts:
+          # This 'home' volume is created by the helm chart's 'homeMountPath' option.
+          # We mount it to initContainer too, so all downloads and installations are persisted in this mounted home folder.
+          - name: home
+            mountPath: /home/jovyan
+            subPath: jupyterhub_workspace
+          - name: init-files
+            mountPath: /tmp
+            readOnly: true
+
+    storage:
+      type: static
+      static:
+        pvcName: "ai-starter-kit-models-cache-pvc"
+        subPath: "jupyterhub_workspace"
+      capacity: 20Gi
+      homeMountPath: /home/jovyan
+      extraVolumes:
+        - name: init-files
+          configMap:
+            name: "ai-starter-kit-init-files"
+    # This environment variables list have its own format: https://z2jh.jupyter.org/en/latest/resources/reference.html#singleuser-extraenv
+    extraEnv:
+      HF_TOKEN:
+        name: HF_TOKEN
+        valueFrom:
+          secretKeyRef:
+            name: ai-starter-kit-hf-token-secret
+            key: token
+      RAY_ADDRESS: "ray://ai-starter-kit-kuberay-head-svc:10001"
+      MLFLOW_TRACKING_URI: "http://ai-starter-kit-mlflow:5000"
+  hub:
+    networkPolicy:
+      enabled: false
+    db:
+      type: sqlite-pvc
+      pvc:
+        annotations:
+          # Without this helm will not keep the pvc after uninstallation
+          # https://github.com/jupyterhub/zero-to-jupyterhub-k8s/issues/3718
+          helm.sh/resource-policy: keep
+    extraConfig:
+      00-dummy-authenticator: |
+        c.DummyAuthenticator.password = "sneakypass"
+      01-spawner-timeouts: |
+        c.KubeSpawner.start_timeout = 1800
+  proxy:
+    chp:
+      networkPolicy:
+        enabled: false
+    traefik:
+      networkPolicy:
+        enabled: false
+
+ray-cluster:
+  enabled: false
+  image:
+    tag: "2.41.0-py312-cpu-aarch64"
+  head:
+    serviceType: ClusterIP
+    resources:
+      requests:
+        cpu: "1"
+        memory: "2G"
+        ephemeral-storage: 10Gi
+      limits:
+        cpu: "4"
+        memory: "8G"
+        ephemeral-storage: 10Gi
+  worker:
+    resources:
+      requests:
+        cpu: "1"
+        memory: "2G"
+        ephemeral-storage: 10Gi
+      limits:
+        cpu: "4"
+        memory: "8G"
+        ephemeral-storage: 10Gi
+
+mlflow:
+  resources:
+    requests:
+      cpu: 100m
+      memory: 512Mi
+    limits:
+      cpu: 1000m
+      memory: 1Gi
+
+huggingface:
+  # Provide your Hugging Face token here to download gated or private models.
+  # It is recommended to set this via --set or a separate values file, e.g.,
+  # --set huggingface.token=hf_...
+  token: ""
+
+rayPvc:
+  enabled: false
+  storageClassName: "standard-rwo"
+  accessModes:
+    - ReadWriteOnce
+  size: 100Gi
+
+modelsCachePvc:
+  enabled: true
+  # To use the default StorageClass, set storageClassName to null or omit it.
+  # To use a specific StorageClass (e.g. "standard-rwo" on GKE), provide its name.
+  # To create a PVC that doesn't request any StorageClass, set it to an empty string ("").
+  storageClassName: "standard-rwo"
+  accessModes:
+    - ReadWriteOnce
+  size: 10Gi
+
+modelsCacheOnlyPvc:
+  enabled: false
+
+localPersistence:
+  # For local development with minikube, this allows persisting the models-cache
+  # on the host machine, surviving `minikube stop/start`.
+  # 1. Create a directory on your host: `mkdir -p /data/models-cache`
+  # 2. Start minikube with the mount: `minikube start --mount --mount-string="/data/models-cache:/data/models-cache"`
+  # 3. Set enabled to true below, or via `--set localPersistence.enabled=true`
+  enabled: true
+  # This path must match the destination path inside the minikube node.
+  hostPath: "/tmp/models-cache"
+
+ollama:
+  enabled: true
+  ollama:
+    models:
+      pull:
+        - gemma3
+  persistentVolume:
+    enabled: true
+    existingClaim: "ai-starter-kit-models-cache-pvc"
+    subPath: "ollama"
+
+
+ramalama:
+  enabled: true
+  command:
+    - /bin/sh
+    - -c
+    - ramalama pull qwen2.5:1.5b && ramalama serve qwen2.5:1.5b --port 8080
+  image:
+    repository: "quay.io/ramalama/ramalama"
+    tag: "latest"
+    pullPolicy: IfNotPresent
+
+genericDevicePlugin:
+  enabled: false
\ No newline at end of file