diff --git a/.gitignore b/.gitignore index 6ef5822c8..0635abd98 100644 --- a/.gitignore +++ b/.gitignore @@ -43,3 +43,10 @@ cscope.* /bazel-* *.pyc + +# Helm chart dependecies cache +**/Chart.lock +**/charts/*.tgz + +# Helm chart output directory +ai/ai-starter-kit/out \ No newline at end of file diff --git a/ai/ai-starter-kit/Makefile b/ai/ai-starter-kit/Makefile new file mode 100644 index 000000000..40717cb20 --- /dev/null +++ b/ai/ai-starter-kit/Makefile @@ -0,0 +1,70 @@ +.PHONY: check_hf_token check_OCI_target package_helm lint dep_update install install_gke start uninstall push_helm + +check_hf_token: +ifndef HF_TOKEN + $(error HF_TOKEN is not set) +endif + +check_OCI_target: +ifndef OCI_HELM_TARGET + $(error OCI_HELM_TARGET is not set) +endif + +package_helm: + helm package helm-chart/ai-starter-kit/ --destination out/ + +push_helm: check_OCI_target + helm push out/ai-starter-kit* oci://$$OCI_HELM_TARGET + +lint: + helm lint helm-chart/ai-starter-kit + +dep_update: + helm dependency update helm-chart/ai-starter-kit + +install: check_hf_token + helm upgrade --install ai-starter-kit helm-chart/ai-starter-kit --set huggingface.token="$$HF_TOKEN" --timeout 10m -f helm-chart/ai-starter-kit/values.yaml + +install_gke: check_hf_token + helm upgrade --install ai-starter-kit helm-chart/ai-starter-kit --set huggingface.token="$$HF_TOKEN" --timeout 10m -f helm-chart/ai-starter-kit/values-gke.yaml + +install_gke_gpu: check_hf_token + helm upgrade --install ai-starter-kit helm-chart/ai-starter-kit --set huggingface.token="$$HF_TOKEN" --timeout 10m -f helm-chart/ai-starter-kit/values-gke-gpu.yaml + +start: + mkdir -p /tmp/models-cache + minikube start --cpus 4 --memory 15000 --mount --mount-string="/tmp/models-cache:/tmp/models-cache" + +start_gpu: + mkdir -p $HOME/models-cache + minikube start --driver krunkit --cpus 4 --memory 15000 --mount --mount-string="$HOME/models-cache:$HOME/models-cache" + +uninstall: + helm uninstall ai-starter-kit + kubectl delete pod jupyter-user + kubectl delete pvc ai-starter-kit-jupyterhub-hub-db-dir + +destroy: + minikube delete + +validate_jupyterhub: + kubectl get pods; \ + kubectl wait --for=condition=Ready pods -l 'component!=continuous-image-puller' --timeout=1800s; \ + kubectl get pods; \ + kubectl get services; \ + kubectl port-forward service/ai-starter-kit-jupyterhub-proxy-public 8081:80 & \ + PID=$$!; \ + echo "Port-forward PID=$${PID}"; \ + sleep 5s; \ + python3 ./ci/test_hub.py "127.0.0.1:8081"; \ + kill $$PID + +validate_ray: + kubectl wait --for=condition=Ready pods -l 'app.kubernetes.io/created-by=kuberay-operator' --timeout=1800s; \ + kubectl get pods; \ + kubectl get services; \ + kubectl port-forward service/ai-starter-kit-kuberay-head-svc 8265:8265 & \ + PID=$$!; \ + sleep 10s; \ + ray job submit --address=http://127.0.0.1:8265 -- python -c "import ray; ray.init(); print(ray.cluster_resources())"; \ + kill $$PID diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/.helmignore b/ai/ai-starter-kit/helm-chart/ai-starter-kit/.helmignore new file mode 100644 index 000000000..0e8a0eb36 --- /dev/null +++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/Chart.yaml b/ai/ai-starter-kit/helm-chart/ai-starter-kit/Chart.yaml new file mode 100644 index 000000000..9bf77a3b5 --- /dev/null +++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/Chart.yaml @@ -0,0 +1,45 @@ +apiVersion: v2 +name: ai-starter-kit +description: A Helm chart for Kubernetes + +# A chart can be either an 'application' or a 'library' chart. +# +# Application charts are a collection of templates that can be packaged into versioned archives +# to be deployed. +# +# Library charts provide useful utilities or functions for the chart developer. They're included as +# a dependency of application charts to inject those utilities and functions into the rendering +# pipeline. Library charts do not define any templates and therefore cannot be deployed. +type: application + +# This is the chart version. This version number should be incremented each time you make changes +# to the chart and its templates, including the app version. +# Versions are expected to follow Semantic Versioning (https://semver.org/) +version: 0.1.0 + +# This is the version number of the application being deployed. This version number should be +# incremented each time you make changes to the application. Versions are not expected to +# follow Semantic Versioning. They should reflect the version the application is using. +# It is recommended to use it with quotes. +appVersion: "0.1.0" + + +dependencies: + - name: kuberay-operator + condition: ray-cluster.enabled + version: "1.3.0" + repository: "https://ray-project.github.io/kuberay-helm" + - condition: ray-cluster.enabled + name: ray-cluster + version: "1.3.0" + repository: "https://ray-project.github.io/kuberay-helm" + - name: jupyterhub + version: "4.2.0" + repository: "https://hub.jupyter.org/helm-chart/" + - name: mlflow + version: "0.12.0" + repository: "https://community-charts.github.io/helm-charts" + - name: ollama + condition: ollama.enabled + version: "1.27.0" + repository: "https://helm.otwld.com" diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/README.md b/ai/ai-starter-kit/helm-chart/ai-starter-kit/README.md new file mode 100644 index 000000000..741d27007 --- /dev/null +++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/README.md @@ -0,0 +1,291 @@ +# AI Starter Kit + +A comprehensive Helm chart for deploying a complete AI/ML development environment on Kubernetes. This starter kit provides a ready-to-use platform with JupyterHub notebooks, model serving capabilities, and experiment tracking - perfect for teams starting their AI journey or prototyping AI applications. + +## Purpose + +The AI Starter Kit simplifies the deployment of AI infrastructure by providing: + +- **JupyterHub**: Multi-user notebook environment with pre-configured AI/ML libraries +- **Model Serving**: Support for both Ollama and Ramalama model servers +- **MLflow**: Experiment tracking and model management +- **GPU Support**: Configurations for GPU acceleration on GKE and macOS +- **Model Caching**: Persistent storage for efficient model management +- **Example Notebooks**: Pre-loaded notebooks to get you started immediately + +## Prerequisites + +### General Requirements +- Kubernetes cluster (minikube, GKE) +- Helm 3.x installed +- kubectl configured to access your cluster +- Hugging Face token for accessing models + +### Platform-Specific Requirements + +#### Minikube (Local Development) +- Docker Desktop or similar container runtime +- Minimum 4 CPU cores and 16GB RAM available +- 40GB+ free disk space + +#### GKE (Google Kubernetes Engine) +- Google Cloud CLI (`gcloud`) installed and configured +- Appropriate GCP permissions to create clusters + +#### macOS with GPU (Apple Silicon) +- macOS with Apple Silicon (M1/M2/M3/M4) +- minikube with krunkit driver +- 16GB+ RAM recommended + +## Installation + +### Quick Start (Minikube) + +1. **Start minikube with persistent storage:** +```bash +minikube start --cpus 4 --memory 15000 \ + --mount --mount-string="/tmp/models-cache:/tmp/models-cache" +``` + +2. **Install the chart:** +```bash +helm install ai-starter-kit . \ + --set huggingface.token="YOUR_HF_TOKEN" \ + -f values.yaml +``` + +3. **Access JupyterHub:** +```bash +kubectl port-forward svc/ai-starter-kit-jupyterhub-proxy-public 8080:80 +``` +Navigate to http://localhost:8080 and login with any username and password `sneakypass` + +### GKE Deployment + +1. **Create a GKE Autopilot cluster:** +```bash +export REGION=us-central1 +export CLUSTER_NAME="ai-starter-cluster" +export PROJECT_ID=$(gcloud config get project) + +gcloud container clusters create-auto ${CLUSTER_NAME} \ + --project=${PROJECT_ID} \ + --region=${REGION} \ + --release-channel=rapid \ + --labels=created-by=ai-on-gke,guide=ai-starter-kit +``` + +2. **Get cluster credentials:** +```bash +gcloud container clusters get-credentials ${CLUSTER_NAME} --location=${REGION} +``` + +3. **Install the chart with GKE-specific values:** +```bash +helm install ai-starter-kit . \ + --set huggingface.token="YOUR_HF_TOKEN" \ + -f values.yaml \ + -f values-gke.yaml +``` + +### GKE with GPU (Ollama) + +For GPU-accelerated model serving with Ollama: + +```bash +helm install ai-starter-kit . \ + --set huggingface.token="YOUR_HF_TOKEN" \ + -f values-gke.yaml \ + -f values-ollama-gpu.yaml +``` + +### GKE with GPU (Ramalama) + +For GPU-accelerated model serving with Ramalama: + +```bash +helm install ai-starter-kit . \ + --set huggingface.token="YOUR_HF_TOKEN" \ + -f values-gke.yaml \ + -f values-ramalama-gpu.yaml +``` + +### macOS with Apple Silicon GPU + +1. **Start minikube with krunkit driver:** +```bash +minikube start --driver krunkit \ + --cpus 8 --memory 16000 --disk-size 40000mb \ + --mount --mount-string="/tmp/models-cache:/tmp/models-cache" +``` + +2. **Install with macOS GPU support:** +```bash +helm install ai-starter-kit . \ + --set huggingface.token="YOUR_HF_TOKEN" \ + -f values.yaml \ + -f values-macos.yaml +``` + +## Configuration + +### Key Configuration Options + +| Parameter | Description | Default | +|-----------|-------------|---------| +| `huggingface.token` | HuggingFace token for models | `"YOUR_HF_TOKEN"` | +| `ollama.enabled` | Enable Ollama model server | `true` | +| `ramalama.enabled` | Enable Ramalama model server | `true` | +| `modelsCachePvc.size` | Size of model cache storage | `10Gi` | +| `jupyterhub.singleuser.defaultUrl` | Default notebook path | `/lab/tree/welcome.ipynb` | +| `mlflow.enabled` | Enable MLflow tracking server | `true` | + +### Storage Configuration + +The chart supports different storage configurations: + +- **Local Development**: Uses hostPath volumes with minikube mount +- **GKE**: Uses standard GKE storage classes (`standard-rwo`, `standard-rwx`) +- **Custom**: Configure via `modelsCachePvc.storageClassName` + +### Model Servers + +#### Ollama +Ollama is enabled by default and provides: +- Easy model management +- REST API for inference +- Support for popular models (Llama, Gemma, Qwen, etc.) +- GPU acceleration support + +#### Ramalama +Ramalama provides: +- Alternative model serving solution +- Support for CUDA and Metal (macOS) acceleration +- Lightweight deployment option + +You can run either Ollama or Ramalama, but not both simultaneously. Toggle using: +```yaml +ollama: + enabled: true/false +ramalama: + enabled: true/false +``` + +## Usage + +### Accessing Services + +#### JupyterHub +```bash +# Port forward to access JupyterHub +kubectl port-forward svc/ai-starter-kit-jupyterhub-proxy-public 8080:80 +# Access at: http://localhost:8080 +# Default password: sneakypass +``` + +#### MLflow +```bash +# Port forward to access MLflow UI +kubectl port-forward svc/ai-starter-kit-mlflow 5000:5000 +# Access at: http://localhost:5000 +``` + +#### Ollama/Ramalama API +```bash +# For Ollama +kubectl port-forward svc/ai-starter-kit-ollama 11434:11434 + +# For Ramalama +kubectl port-forward svc/ai-starter-kit-ramalama 8080:8080 +``` + +### Pre-loaded Example Notebooks + +The JupyterHub environment comes with pre-loaded example notebooks: +- `chat_bot.ipynb`: Simple chatbot interface using Ollama for conversational AI. +- `multi-agent-ollama.ipynb`: Multi-agent workflow demonstration using Ollama. +- `multi-agent-ramalama.ipynb`: Similar multi-agent workflow using RamaLama runtime for comparison. +- `welcome.ipynb`: Introduction notebook with embedding model examples using Qwen models. + +These notebooks are automatically copied to your workspace on first login. + +## Architecture + +The AI Starter Kit consists of: + +1. **JupyterHub**: Multi-user notebook server with persistent storage +2. **Model Serving**: Choice of Ollama or Ramalama for LLM inference +3. **MLflow**: Experiment tracking and model registry +4. **Persistent Storage**: Shared model cache to avoid redundant downloads +5. **Init Containers**: Automated setup of models and notebooks + +## Cleanup + +### Uninstall the chart +```bash +helm uninstall ai-starter-kit +``` + +### Delete persistent volumes (optional) +```bash +kubectl delete pvc ai-starter-kit-models-cache-pvc +kubectl delete pvc ai-starter-kit-jupyterhub-hub-db-dir +``` + +### Delete GKE cluster +```bash +gcloud container clusters delete ${CLUSTER_NAME} --region=${REGION} +``` + +### Stop minikube +```bash +minikube stop +minikube delete # To completely remove the cluster +``` + +## Troubleshooting + +### Common Issues + +#### Pods stuck in Pending state +- Check available resources: `kubectl describe pod ` +- Increase cluster resources or reduce resource requests + +#### Model download failures +- Verify Hugging Face token is set correctly +- Check internet connectivity from pods +- Increase init container timeout in values + +#### GPU not detected +- Verify GPU nodes are available: `kubectl get nodes -o wide` +- Check GPU driver installation +- Ensure correct node selectors and tolerations + +#### Storage issues +- Verify PVC is bound: `kubectl get pvc` +- Check storage class availability: `kubectl get storageclass` +- Ensure sufficient disk space + +### Debug Commands +```bash +# Check pod status +kubectl get pods -n default + +# View pod logs +kubectl logs -f + +# Describe pod for events +kubectl describe pod + +# Check resource usage +kubectl top nodes +kubectl top pods +``` + +## Resources + +- [JupyterHub Documentation](https://jupyterhub.readthedocs.io/) +- [MLflow Documentation](https://mlflow.org/docs/latest/index.html) +- [Ollama Documentation](https://ollama.ai/docs) +- [Kubernetes Documentation](https://kubernetes.io/docs/) +- [Helm Documentation](https://helm.sh/docs/) \ No newline at end of file diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/chat_bot.ipynb b/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/chat_bot.ipynb new file mode 100644 index 000000000..0834cf6c3 --- /dev/null +++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/chat_bot.ipynb @@ -0,0 +1,312 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "e9e3dd59-b4d9-4de5-a6aa-a72d1480ac77", + "metadata": {}, + "outputs": [], + "source": [ + "from ollama import Client\n", + "\n", + "client = Client(\n", + " host='http://ai-starter-kit-ollama:11434',\n", + " headers={'x-some-header': 'some-value'}\n", + ")\n", + "\n", + "def get_response(prompt):\n", + " response = client.chat(model='gemma3', messages=[\n", + " {\n", + " 'role': 'user',\n", + " 'content': prompt,\n", + " },\n", + " ])\n", + " return response.message.content" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "dd1513d4-18c5-46d7-8260-f90be004d315", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/javascript": "(function(root) {\n function now() {\n return new Date();\n }\n\n const force = true;\n const py_version = '3.7.3'.replace('rc', '-rc.').replace('.dev', '-dev.');\n const reloading = false;\n const Bokeh = root.Bokeh;\n\n // Set a timeout for this load but only if we are not already initializing\n if (typeof (root._bokeh_timeout) === \"undefined\" || (force || !root._bokeh_is_initializing)) {\n root._bokeh_timeout = Date.now() + 5000;\n root._bokeh_failed_load = false;\n }\n\n function run_callbacks() {\n try {\n root._bokeh_onload_callbacks.forEach(function(callback) {\n if (callback != null)\n callback();\n });\n } finally {\n delete root._bokeh_onload_callbacks;\n }\n console.debug(\"Bokeh: all callbacks have finished\");\n }\n\n function load_libs(css_urls, js_urls, js_modules, js_exports, callback) {\n if (css_urls == null) css_urls = [];\n if (js_urls == null) js_urls = [];\n if (js_modules == null) js_modules = [];\n if (js_exports == null) js_exports = {};\n\n root._bokeh_onload_callbacks.push(callback);\n\n if (root._bokeh_is_loading > 0) {\n // Don't load bokeh if it is still initializing\n console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n return null;\n } else if (js_urls.length === 0 && js_modules.length === 0 && Object.keys(js_exports).length === 0) {\n // There is nothing to load\n run_callbacks();\n return null;\n }\n\n function on_load() {\n root._bokeh_is_loading--;\n if (root._bokeh_is_loading === 0) {\n console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n run_callbacks()\n }\n }\n window._bokeh_on_load = on_load\n\n function on_error(e) {\n const src_el = e.srcElement\n console.error(\"failed to load \" + (src_el.href || src_el.src));\n }\n\n const skip = [];\n if (window.requirejs) {\n window.requirejs.config({'packages': {}, 'paths': {}, 'shim': {}});\n root._bokeh_is_loading = css_urls.length + 0;\n } else {\n root._bokeh_is_loading = css_urls.length + js_urls.length + js_modules.length + Object.keys(js_exports).length;\n }\n\n const existing_stylesheets = []\n const links = document.getElementsByTagName('link')\n for (let i = 0; i < links.length; i++) {\n const link = links[i]\n if (link.href != null) {\n existing_stylesheets.push(link.href)\n }\n }\n for (let i = 0; i < css_urls.length; i++) {\n const url = css_urls[i];\n const escaped = encodeURI(url)\n if (existing_stylesheets.indexOf(escaped) !== -1) {\n on_load()\n continue;\n }\n const element = document.createElement(\"link\");\n element.onload = on_load;\n element.onerror = on_error;\n element.rel = \"stylesheet\";\n element.type = \"text/css\";\n element.href = url;\n console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n document.body.appendChild(element);\n } var existing_scripts = []\n const scripts = document.getElementsByTagName('script')\n for (let i = 0; i < scripts.length; i++) {\n var script = scripts[i]\n if (script.src != null) {\n existing_scripts.push(script.src)\n }\n }\n for (let i = 0; i < js_urls.length; i++) {\n const url = js_urls[i];\n const escaped = encodeURI(url)\n if (skip.indexOf(escaped) !== -1 || existing_scripts.indexOf(escaped) !== -1) {\n if (!window.requirejs) {\n on_load();\n }\n continue;\n }\n const element = document.createElement('script');\n element.onload = on_load;\n element.onerror = on_error;\n element.async = false;\n element.src = url;\n console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n document.head.appendChild(element);\n }\n for (let i = 0; i < js_modules.length; i++) {\n const url = js_modules[i];\n const escaped = encodeURI(url)\n if (skip.indexOf(escaped) !== -1 || existing_scripts.indexOf(escaped) !== -1) {\n if (!window.requirejs) {\n on_load();\n }\n continue;\n }\n var element = document.createElement('script');\n element.onload = on_load;\n element.onerror = on_error;\n element.async = false;\n element.src = url;\n element.type = \"module\";\n console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n document.head.appendChild(element);\n }\n for (const name in js_exports) {\n const url = js_exports[name];\n const escaped = encodeURI(url)\n if (skip.indexOf(escaped) >= 0 || root[name] != null) {\n if (!window.requirejs) {\n on_load();\n }\n continue;\n }\n var element = document.createElement('script');\n element.onerror = on_error;\n element.async = false;\n element.type = \"module\";\n console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n element.textContent = `\n import ${name} from \"${url}\"\n window.${name} = ${name}\n window._bokeh_on_load()\n `\n document.head.appendChild(element);\n }\n if (!js_urls.length && !js_modules.length) {\n on_load()\n }\n };\n\n function inject_raw_css(css) {\n const element = document.createElement(\"style\");\n element.appendChild(document.createTextNode(css));\n document.body.appendChild(element);\n }\n\n const js_urls = [\"https://cdn.holoviz.org/panel/1.7.5/dist/bundled/reactiveesm/es-module-shims@^1.10.0/dist/es-module-shims.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-3.7.3.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-gl-3.7.3.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-widgets-3.7.3.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-tables-3.7.3.min.js\", \"https://cdn.holoviz.org/panel/1.7.5/dist/panel.min.js\"];\n const js_modules = [];\n const js_exports = {};\n const css_urls = [];\n const inline_js = [ function(Bokeh) {\n Bokeh.set_log_level(\"info\");\n },\nfunction(Bokeh) {} // ensure no trailing comma for IE\n ];\n\n function run_inline_js() {\n if ((root.Bokeh !== undefined) || (force === true)) {\n for (let i = 0; i < inline_js.length; i++) {\n try {\n inline_js[i].call(root, root.Bokeh);\n } catch(e) {\n if (!reloading) {\n throw e;\n }\n }\n }\n // Cache old bokeh versions\n if (Bokeh != undefined && !reloading) {\n var NewBokeh = root.Bokeh;\n if (Bokeh.versions === undefined) {\n Bokeh.versions = new Map();\n }\n if (NewBokeh.version !== Bokeh.version) {\n Bokeh.versions.set(NewBokeh.version, NewBokeh)\n }\n root.Bokeh = Bokeh;\n }\n } else if (Date.now() < root._bokeh_timeout) {\n setTimeout(run_inline_js, 100);\n } else if (!root._bokeh_failed_load) {\n console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n root._bokeh_failed_load = true;\n }\n root._bokeh_is_initializing = false\n }\n\n function load_or_wait() {\n // Implement a backoff loop that tries to ensure we do not load multiple\n // versions of Bokeh and its dependencies at the same time.\n // In recent versions we use the root._bokeh_is_initializing flag\n // to determine whether there is an ongoing attempt to initialize\n // bokeh, however for backward compatibility we also try to ensure\n // that we do not start loading a newer (Panel>=1.0 and Bokeh>3) version\n // before older versions are fully initialized.\n if (root._bokeh_is_initializing && Date.now() > root._bokeh_timeout) {\n // If the timeout and bokeh was not successfully loaded we reset\n // everything and try loading again\n root._bokeh_timeout = Date.now() + 5000;\n root._bokeh_is_initializing = false;\n root._bokeh_onload_callbacks = undefined;\n root._bokeh_is_loading = 0\n console.log(\"Bokeh: BokehJS was loaded multiple times but one version failed to initialize.\");\n load_or_wait();\n } else if (root._bokeh_is_initializing || (typeof root._bokeh_is_initializing === \"undefined\" && root._bokeh_onload_callbacks !== undefined)) {\n setTimeout(load_or_wait, 100);\n } else {\n root._bokeh_is_initializing = true\n root._bokeh_onload_callbacks = []\n const bokeh_loaded = root.Bokeh != null && (root.Bokeh.version === py_version || (root.Bokeh.versions !== undefined && root.Bokeh.versions.has(py_version)));\n if (!reloading && !bokeh_loaded) {\n if (root.Bokeh) {\n root.Bokeh = undefined;\n }\n console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n }\n load_libs(css_urls, js_urls, js_modules, js_exports, function() {\n console.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n run_inline_js();\n });\n }\n }\n // Give older versions of the autoload script a head-start to ensure\n // they initialize before we start loading newer version.\n setTimeout(load_or_wait, 100)\n}(window));", + "application/vnd.holoviews_load.v0+json": "" + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/javascript": "\nif ((window.PyViz === undefined) || (window.PyViz instanceof HTMLElement)) {\n window.PyViz = {comms: {}, comm_status:{}, kernels:{}, receivers: {}, plot_index: []}\n}\n\n\n function JupyterCommManager() {\n }\n\n JupyterCommManager.prototype.register_target = function(plot_id, comm_id, msg_handler) {\n if (window.comm_manager || ((window.Jupyter !== undefined) && (Jupyter.notebook.kernel != null))) {\n var comm_manager = window.comm_manager || Jupyter.notebook.kernel.comm_manager;\n comm_manager.register_target(comm_id, function(comm) {\n comm.on_msg(msg_handler);\n });\n } else if ((plot_id in window.PyViz.kernels) && (window.PyViz.kernels[plot_id])) {\n window.PyViz.kernels[plot_id].registerCommTarget(comm_id, function(comm) {\n comm.onMsg = msg_handler;\n });\n } else if (typeof google != 'undefined' && google.colab.kernel != null) {\n google.colab.kernel.comms.registerTarget(comm_id, (comm) => {\n var messages = comm.messages[Symbol.asyncIterator]();\n function processIteratorResult(result) {\n var message = result.value;\n var content = {data: message.data, comm_id};\n var buffers = []\n for (var buffer of message.buffers || []) {\n buffers.push(new DataView(buffer))\n }\n var metadata = message.metadata || {};\n var msg = {content, buffers, metadata}\n msg_handler(msg);\n return messages.next().then(processIteratorResult);\n }\n return messages.next().then(processIteratorResult);\n })\n }\n }\n\n JupyterCommManager.prototype.get_client_comm = function(plot_id, comm_id, msg_handler) {\n if (comm_id in window.PyViz.comms) {\n return window.PyViz.comms[comm_id];\n } else if (window.comm_manager || ((window.Jupyter !== undefined) && (Jupyter.notebook.kernel != null))) {\n var comm_manager = window.comm_manager || Jupyter.notebook.kernel.comm_manager;\n var comm = comm_manager.new_comm(comm_id, {}, {}, {}, comm_id);\n if (msg_handler) {\n comm.on_msg(msg_handler);\n }\n } else if ((plot_id in window.PyViz.kernels) && (window.PyViz.kernels[plot_id])) {\n var comm = window.PyViz.kernels[plot_id].connectToComm(comm_id);\n let retries = 0;\n const open = () => {\n if (comm.active) {\n comm.open();\n } else if (retries > 3) {\n console.warn('Comm target never activated')\n } else {\n retries += 1\n setTimeout(open, 500)\n }\n }\n if (comm.active) {\n comm.open();\n } else {\n setTimeout(open, 500)\n }\n if (msg_handler) {\n comm.onMsg = msg_handler;\n }\n } else if (typeof google != 'undefined' && google.colab.kernel != null) {\n var comm_promise = google.colab.kernel.comms.open(comm_id)\n comm_promise.then((comm) => {\n window.PyViz.comms[comm_id] = comm;\n if (msg_handler) {\n var messages = comm.messages[Symbol.asyncIterator]();\n function processIteratorResult(result) {\n var message = result.value;\n var content = {data: message.data};\n var metadata = message.metadata || {comm_id};\n var msg = {content, metadata}\n msg_handler(msg);\n return messages.next().then(processIteratorResult);\n }\n return messages.next().then(processIteratorResult);\n }\n })\n var sendClosure = (data, metadata, buffers, disposeOnDone) => {\n return comm_promise.then((comm) => {\n comm.send(data, metadata, buffers, disposeOnDone);\n });\n };\n var comm = {\n send: sendClosure\n };\n }\n window.PyViz.comms[comm_id] = comm;\n return comm;\n }\n window.PyViz.comm_manager = new JupyterCommManager();\n \n\n\nvar JS_MIME_TYPE = 'application/javascript';\nvar HTML_MIME_TYPE = 'text/html';\nvar EXEC_MIME_TYPE = 'application/vnd.holoviews_exec.v0+json';\nvar CLASS_NAME = 'output';\n\n/**\n * Render data to the DOM node\n */\nfunction render(props, node) {\n var div = document.createElement(\"div\");\n var script = document.createElement(\"script\");\n node.appendChild(div);\n node.appendChild(script);\n}\n\n/**\n * Handle when a new output is added\n */\nfunction handle_add_output(event, handle) {\n var output_area = handle.output_area;\n var output = handle.output;\n if ((output.data == undefined) || (!output.data.hasOwnProperty(EXEC_MIME_TYPE))) {\n return\n }\n var id = output.metadata[EXEC_MIME_TYPE][\"id\"];\n var toinsert = output_area.element.find(\".\" + CLASS_NAME.split(' ')[0]);\n if (id !== undefined) {\n var nchildren = toinsert.length;\n var html_node = toinsert[nchildren-1].children[0];\n html_node.innerHTML = output.data[HTML_MIME_TYPE];\n var scripts = [];\n var nodelist = html_node.querySelectorAll(\"script\");\n for (var i in nodelist) {\n if (nodelist.hasOwnProperty(i)) {\n scripts.push(nodelist[i])\n }\n }\n\n scripts.forEach( function (oldScript) {\n var newScript = document.createElement(\"script\");\n var attrs = [];\n var nodemap = oldScript.attributes;\n for (var j in nodemap) {\n if (nodemap.hasOwnProperty(j)) {\n attrs.push(nodemap[j])\n }\n }\n attrs.forEach(function(attr) { newScript.setAttribute(attr.name, attr.value) });\n newScript.appendChild(document.createTextNode(oldScript.innerHTML));\n oldScript.parentNode.replaceChild(newScript, oldScript);\n });\n if (JS_MIME_TYPE in output.data) {\n toinsert[nchildren-1].children[1].textContent = output.data[JS_MIME_TYPE];\n }\n output_area._hv_plot_id = id;\n if ((window.Bokeh !== undefined) && (id in Bokeh.index)) {\n window.PyViz.plot_index[id] = Bokeh.index[id];\n } else {\n window.PyViz.plot_index[id] = null;\n }\n } else if (output.metadata[EXEC_MIME_TYPE][\"server_id\"] !== undefined) {\n var bk_div = document.createElement(\"div\");\n bk_div.innerHTML = output.data[HTML_MIME_TYPE];\n var script_attrs = bk_div.children[0].attributes;\n for (var i = 0; i < script_attrs.length; i++) {\n toinsert[toinsert.length - 1].childNodes[1].setAttribute(script_attrs[i].name, script_attrs[i].value);\n }\n // store reference to server id on output_area\n output_area._bokeh_server_id = output.metadata[EXEC_MIME_TYPE][\"server_id\"];\n }\n}\n\n/**\n * Handle when an output is cleared or removed\n */\nfunction handle_clear_output(event, handle) {\n var id = handle.cell.output_area._hv_plot_id;\n var server_id = handle.cell.output_area._bokeh_server_id;\n if (((id === undefined) || !(id in PyViz.plot_index)) && (server_id !== undefined)) { return; }\n var comm = window.PyViz.comm_manager.get_client_comm(\"hv-extension-comm\", \"hv-extension-comm\", function () {});\n if (server_id !== null) {\n comm.send({event_type: 'server_delete', 'id': server_id});\n return;\n } else if (comm !== null) {\n comm.send({event_type: 'delete', 'id': id});\n }\n delete PyViz.plot_index[id];\n if ((window.Bokeh !== undefined) & (id in window.Bokeh.index)) {\n var doc = window.Bokeh.index[id].model.document\n doc.clear();\n const i = window.Bokeh.documents.indexOf(doc);\n if (i > -1) {\n window.Bokeh.documents.splice(i, 1);\n }\n }\n}\n\n/**\n * Handle kernel restart event\n */\nfunction handle_kernel_cleanup(event, handle) {\n delete PyViz.comms[\"hv-extension-comm\"];\n window.PyViz.plot_index = {}\n}\n\n/**\n * Handle update_display_data messages\n */\nfunction handle_update_output(event, handle) {\n handle_clear_output(event, {cell: {output_area: handle.output_area}})\n handle_add_output(event, handle)\n}\n\nfunction register_renderer(events, OutputArea) {\n function append_mime(data, metadata, element) {\n // create a DOM node to render to\n var toinsert = this.create_output_subarea(\n metadata,\n CLASS_NAME,\n EXEC_MIME_TYPE\n );\n this.keyboard_manager.register_events(toinsert);\n // Render to node\n var props = {data: data, metadata: metadata[EXEC_MIME_TYPE]};\n render(props, toinsert[0]);\n element.append(toinsert);\n return toinsert\n }\n\n events.on('output_added.OutputArea', handle_add_output);\n events.on('output_updated.OutputArea', handle_update_output);\n events.on('clear_output.CodeCell', handle_clear_output);\n events.on('delete.Cell', handle_clear_output);\n events.on('kernel_ready.Kernel', handle_kernel_cleanup);\n\n OutputArea.prototype.register_mime_type(EXEC_MIME_TYPE, append_mime, {\n safe: true,\n index: 0\n });\n}\n\nif (window.Jupyter !== undefined) {\n try {\n var events = require('base/js/events');\n var OutputArea = require('notebook/js/outputarea').OutputArea;\n if (OutputArea.prototype.mime_types().indexOf(EXEC_MIME_TYPE) == -1) {\n register_renderer(events, OutputArea);\n }\n } catch(err) {\n }\n}\n", + "application/vnd.holoviews_load.v0+json": "" + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.holoviews_exec.v0+json": "", + "text/html": [ + "
\n", + "
\n", + "
\n", + "" + ] + }, + "metadata": { + "application/vnd.holoviews_exec.v0+json": { + "id": "b6fd14e0-f8d2-46e7-9c4d-722893d04d7e" + } + }, + "output_type": "display_data" + }, + { + "data": {}, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.holoviews_exec.v0+json": "", + "text/html": [ + "
\n", + "
\n", + "
\n", + "" + ], + "text/plain": [ + "Column\n", + " [0] TextInput(placeholder='Enter text here…')\n", + " [1] Row\n", + " [0] Button(name='Chat!')\n", + " [2] ParamFunction(function, _pane=Column, defer_load=False, height=300, loading_indicator=True, sizing_mode='fixed', width=300)" + ] + }, + "execution_count": 2, + "metadata": { + "application/vnd.holoviews_exec.v0+json": { + "id": "2854d6b0-689d-4dc0-8861-1834489708e9" + } + }, + "output_type": "execute_result" + } + ], + "source": [ + "import panel as pn # GUI\n", + "pn.extension()\n", + "\n", + "panels = [] # collect display \n", + "context = [ ] # accumulate messages\n", + "\n", + "\n", + "def collect_messages(_):\n", + " prompt = inp.value_input\n", + " inp.value = ''\n", + " if (not prompt):\n", + " return pn.Column(*panels)\n", + "\n", + " response = get_response(prompt)\n", + " context.append({'role':'user', 'content':f\"{prompt}\"})\n", + " context.append({'role':'assistant', 'content':f\"{response}\"})\n", + " panels.append(\n", + " pn.Row('User:', pn.pane.Markdown(prompt, width=600)))\n", + " panels.append(\n", + " pn.Row('Assistant:', pn.pane.Markdown(response, width=600)))\n", + " \n", + " return pn.Column(*panels)\n", + "\n", + "\n", + "inp = pn.widgets.TextInput(value=\"Hi\", placeholder='Enter text here…')\n", + "button_conversation = pn.widgets.Button(name=\"Chat!\")\n", + "interactive_conversation = pn.bind(collect_messages, button_conversation)\n", + "dashboard = pn.Column(\n", + " inp,\n", + " pn.Row(button_conversation),\n", + " pn.panel(interactive_conversation, loading_indicator=True, height=300, width=300),\n", + ")\n", + "\n", + "dashboard" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.11" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/download_models.py b/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/download_models.py new file mode 100644 index 000000000..69529726b --- /dev/null +++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/download_models.py @@ -0,0 +1,22 @@ +import sys +from huggingface_hub import snapshot_download + +# --- Model Download --- +if __name__ == "__main__": + # List your desired Hugging Face model names here + model_names = [ + "Qwen/Qwen3-Embedding-0.6B", + ] + + for model_name in model_names: + print(f"--- Downloading {model_name} ---") + try: + if len(sys.argv) > 1: + snapshot_download(repo_id=model_name, cache_dir=sys.argv[0]) + else: + snapshot_download(repo_id=model_name) + print(f"Successfully cached {model_name}") + except Exception as e: + print(f"Failed to download {model_name}. Error: {e}") + + print("--- Model download process finished. ---") diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/multi-agent-ollama.ipynb b/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/multi-agent-ollama.ipynb new file mode 100644 index 000000000..a25cfbe2a --- /dev/null +++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/multi-agent-ollama.ipynb @@ -0,0 +1,525 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "079fadd2-200e-4d37-8ae2-be2792e3a24e", + "metadata": {}, + "source": [ + "### Cell 1 - Install Ollama and verify environment\n", + "\n", + "Installs Ollama for local model serving, sets up environment variables, and verifies the installation." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "79db57cd-fb72-4b10-b0fb-5e9cd5c007b6", + "metadata": {}, + "outputs": [], + "source": [ + "!pip -q install ollama requests --disable-pip-version-check\n", + "\n", + "import os, subprocess, time, json, requests\n", + "from pathlib import Path\n", + "\n", + "os.environ['OLLAMA_HOST'] = os.getenv('OLLAMA_HOST', 'http://ai-starter-kit-ollama:11434')\n", + "MODEL_NAME = \"qwen2.5:1.5b\"\n", + "MLFLOW_URI = os.getenv(\"MLFLOW_TRACKING_URI\", \"http://ai-starter-kit-mlflow:5000\")\n", + "\n", + "OLLAMA_HOST = os.environ['OLLAMA_HOST']\n", + "\n", + "print(\"Environment Configuration:\")\n", + "print(\"Ollama Host:\", OLLAMA_HOST)\n", + "print(\"Model: \", MODEL_NAME)\n", + "print(\"MLflow: \", MLFLOW_URI)\n", + "print(\"-\" * 60)\n", + "\n", + "try:\n", + " r = requests.get(f\"{OLLAMA_HOST}/api/version\", timeout=5)\n", + " print(\"Ollama version:\", r.json())\n", + "except Exception as e:\n", + " print(\"Note: Ollama service not running. Starting it in next cell...\")" + ] + }, + { + "cell_type": "markdown", + "id": "fe862173-fd9a-41ae-a27b-63875f788024", + "metadata": {}, + "source": [ + "### Cell 2 - Start Ollama service and pull model\n", + "\n", + "Starts the Ollama service if not running, pulls the Qwen 2.5 1.5B model, and verifies it's ready." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "34da3e26-6276-48b7-b3ac-c90359df6547", + "metadata": {}, + "outputs": [], + "source": [ + "import subprocess, time, requests, os\n", + "\n", + "OLLAMA_HOST = os.environ.get('OLLAMA_HOST', 'http://ai-starter-kit-ollama:11434')\n", + "MODEL_NAME = \"qwen2.5:1.5b\"\n", + "\n", + "def check_ollama():\n", + " try:\n", + " r = requests.get(f\"{OLLAMA_HOST}/api/tags\", timeout=2)\n", + " return r.status_code == 200\n", + " except:\n", + " return False\n", + "\n", + "if not check_ollama() and OLLAMA_HOST.startswith(\"http://ai-starter-kit-ollama\"):\n", + " print(\"Starting Ollama service...\")\n", + " try:\n", + " subprocess.Popen([\"ollama\", \"serve\"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)\n", + " time.sleep(3)\n", + " except Exception as e:\n", + " print(f\"Could not start Ollama automatically: {e}\")\n", + " print(\"Please start Ollama manually with: ollama serve\")\n", + "\n", + "if check_ollama():\n", + " print(\"Ollama service is running\")\n", + " \n", + " print(f\"\\nPulling model {MODEL_NAME}...\")\n", + " try:\n", + " r = requests.get(f\"{OLLAMA_HOST}/api/tags\")\n", + " models = r.json().get('models', [])\n", + " model_exists = any(m.get('name') == MODEL_NAME for m in models)\n", + " \n", + " if not model_exists:\n", + " pull_data = {\"name\": MODEL_NAME}\n", + " r = requests.post(f\"{OLLAMA_HOST}/api/pull\", json=pull_data, stream=True)\n", + " for line in r.iter_lines():\n", + " if line:\n", + " try:\n", + " status = json.loads(line)\n", + " if 'status' in status:\n", + " print(f\" {status['status']}\", end='\\r')\n", + " except:\n", + " pass\n", + " print(f\"\\nModel {MODEL_NAME} pulled successfully\")\n", + " else:\n", + " print(f\"Model {MODEL_NAME} already available\")\n", + " except Exception as e:\n", + " print(f\"Error pulling model: {e}\")\n", + "else:\n", + " print(\"Warning: Ollama service is not running\")\n", + " print(\"Please ensure Ollama is installed and running\")" + ] + }, + { + "cell_type": "markdown", + "id": "8111d705-595e-4e65-8479-bdc76191fa31", + "metadata": {}, + "source": [ + "### Cell 3 - Create OpenAI-compatible API wrapper\n", + "\n", + "Sets up a simple FastAPI server that wraps Ollama with an OpenAI-compatible API, including MLflow tracking." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bbea1539-e9ab-460a-9cfc-20a42807f616", + "metadata": {}, + "outputs": [], + "source": [ + "!pip -q install fastapi uvicorn mlflow --disable-pip-version-check\n", + "\n", + "import os, subprocess, time, json, requests, threading\n", + "from pathlib import Path\n", + "\n", + "api_wrapper_code = '''\n", + "import os, time, uuid, requests, json\n", + "from fastapi import FastAPI, Request\n", + "from fastapi.responses import JSONResponse\n", + "import uvicorn\n", + "\n", + "USE_MLFLOW = False\n", + "try:\n", + " import mlflow\n", + " mlflow_uri = os.getenv(\"MLFLOW_TRACKING_URI\")\n", + " if mlflow_uri:\n", + " mlflow.set_tracking_uri(mlflow_uri)\n", + " mlflow.set_experiment(\"ollama-llm\")\n", + " USE_MLFLOW = True\n", + "except:\n", + " pass\n", + "\n", + "app = FastAPI()\n", + "OLLAMA_HOST = os.getenv(\"OLLAMA_HOST\", \"http://ai-starter-kit-ollama:11434\")\n", + "MODEL_NAME = os.getenv(\"MODEL_NAME\", \"qwen2.5:1.5b\")\n", + "\n", + "@app.get(\"/v1/healthz\")\n", + "async def health():\n", + " return {\"status\": \"ok\", \"model\": MODEL_NAME}\n", + "\n", + "@app.post(\"/v1/chat/completions\")\n", + "async def chat_completions(request: Request):\n", + " t0 = time.time()\n", + " body = await request.json()\n", + " \n", + " messages = body.get(\"messages\", [])\n", + " temperature = body.get(\"temperature\", 0.7)\n", + " max_tokens = body.get(\"max_tokens\", 256)\n", + " \n", + " # Call Ollama API\n", + " ollama_payload = {\n", + " \"model\": MODEL_NAME,\n", + " \"messages\": messages,\n", + " \"stream\": False,\n", + " \"options\": {\n", + " \"temperature\": temperature,\n", + " \"num_predict\": max_tokens\n", + " }\n", + " }\n", + " \n", + " try:\n", + " r = requests.post(f\"{OLLAMA_HOST}/api/chat\", json=ollama_payload, timeout=120)\n", + " r.raise_for_status()\n", + " ollama_response = r.json()\n", + " \n", + " content = ollama_response.get(\"message\", {}).get(\"content\", \"\")\n", + " prompt_tokens = len(\" \".join(m.get(\"content\", \"\") for m in messages).split())\n", + " completion_tokens = len(content.split())\n", + " \n", + " if USE_MLFLOW:\n", + " try:\n", + " with mlflow.start_run():\n", + " mlflow.log_params({\n", + " \"temperature\": temperature,\n", + " \"max_tokens\": max_tokens,\n", + " \"model\": MODEL_NAME\n", + " })\n", + " mlflow.log_metrics({\n", + " \"duration_ms\": int((time.time() - t0) * 1000),\n", + " \"prompt_tokens_approx\": prompt_tokens,\n", + " \"completion_tokens_approx\": completion_tokens,\n", + " \"total_tokens_approx\": prompt_tokens + completion_tokens\n", + " })\n", + " except:\n", + " pass\n", + " \n", + " return {\n", + " \"id\": \"chatcmpl-\" + uuid.uuid4().hex[:8],\n", + " \"object\": \"chat.completion\",\n", + " \"created\": int(time.time()),\n", + " \"model\": MODEL_NAME,\n", + " \"choices\": [{\n", + " \"index\": 0,\n", + " \"message\": {\"role\": \"assistant\", \"content\": content},\n", + " \"finish_reason\": \"stop\"\n", + " }],\n", + " \"usage\": {\n", + " \"prompt_tokens\": prompt_tokens,\n", + " \"completion_tokens\": completion_tokens,\n", + " \"total_tokens\": prompt_tokens + completion_tokens\n", + " }\n", + " }\n", + " except Exception as e:\n", + " return JSONResponse(status_code=500, content={\"error\": str(e)})\n", + "\n", + "if __name__ == \"__main__\":\n", + " uvicorn.run(app, host=\"0.0.0.0\", port=8000)\n", + "'''\n", + "\n", + "with open('/tmp/ollama_wrapper.py', 'w') as f:\n", + " f.write(api_wrapper_code)\n", + "\n", + "!pkill -f ollama_wrapper.py 2>/dev/null || true\n", + "\n", + "env_vars = f\"\"\"\n", + "export OLLAMA_HOST=\"{os.getenv('OLLAMA_HOST', 'http://ai-starter-kit-ollama:11434')}\"\n", + "export MODEL_NAME=\"qwen2.5:1.5b\"\n", + "export MLFLOW_TRACKING_URI=\"{os.getenv('MLFLOW_TRACKING_URI', 'http://ai-starter-kit-mlflow:5000')}\"\n", + "\"\"\"\n", + "\n", + "!echo '{env_vars}' > /tmp/env_vars.sh\n", + "!bash -c 'source /tmp/env_vars.sh && nohup python /tmp/ollama_wrapper.py > /tmp/wrapper.log 2>&1 &'\n", + "\n", + "print(\"Starting API wrapper...\")\n", + "for i in range(30):\n", + " time.sleep(1)\n", + " try:\n", + " r = requests.get(\"http://localhost:8000/v1/healthz\", timeout=1)\n", + " if r.status_code == 200:\n", + " print(\"API Status:\", r.json())\n", + " print(f\"\\nOpenAI-compatible API running at: http://localhost:8000/v1\")\n", + " print(f\"Health: http://localhost:8000/v1/healthz\")\n", + " print(f\"Chat: http://localhost:8000/v1/chat/completions\")\n", + " break\n", + " except:\n", + " if i % 5 == 0:\n", + " print(f\" Waiting for API to start... ({i}s)\")\n", + " continue\n", + "else:\n", + " print(\"\\nAPI wrapper failed to start. Checking logs:\")\n", + " !tail -20 /tmp/wrapper.log\n", + " print(\"\\nYou can still use direct Ollama API in the next cells.\")" + ] + }, + { + "cell_type": "markdown", + "id": "a411c015-c802-4ca1-81bb-3f4790d9626a", + "metadata": {}, + "source": [ + "### Cell 4 - Basic client + latency test\n", + "\n", + "Tests the OpenAI-compatible API with a simple chat request and measures latency." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3be634e2-a82f-42c9-8e31-57e6868a86ee", + "metadata": {}, + "outputs": [], + "source": [ + "import os, time, requests, json\n", + "\n", + "USE_WRAPPER = True\n", + "BASE_URL = \"http://localhost:8000/v1\" if USE_WRAPPER else os.getenv(\"OLLAMA_HOST\", \"http://ai-starter-kit-ollama:11434\")\n", + "\n", + "def health():\n", + " if USE_WRAPPER:\n", + " r = requests.get(f\"{BASE_URL}/healthz\", timeout=10)\n", + " print(\"Health:\", r.status_code, r.json())\n", + " else:\n", + " r = requests.get(f\"{BASE_URL}/api/tags\", timeout=10)\n", + " print(\"Health:\", r.status_code, \"Models available:\", len(r.json().get('models', [])))\n", + "\n", + "def chat(prompt, temperature=0.4, max_tokens=220):\n", + " if USE_WRAPPER:\n", + " body = {\n", + " \"model\": \"qwen2.5:1.5b\",\n", + " \"temperature\": temperature,\n", + " \"max_tokens\": max_tokens,\n", + " \"messages\": [\n", + " {\"role\": \"system\", \"content\": \"You are a helpful assistant. Be concise.\"},\n", + " {\"role\": \"user\", \"content\": prompt},\n", + " ]\n", + " }\n", + " endpoint = f\"{BASE_URL}/chat/completions\"\n", + " else:\n", + " body = {\n", + " \"model\": \"qwen2.5:1.5b\",\n", + " \"messages\": [\n", + " {\"role\": \"system\", \"content\": \"You are a helpful assistant. Be concise.\"},\n", + " {\"role\": \"user\", \"content\": prompt},\n", + " ],\n", + " \"stream\": False,\n", + " \"options\": {\n", + " \"temperature\": temperature,\n", + " \"num_predict\": max_tokens\n", + " }\n", + " }\n", + " endpoint = f\"{BASE_URL}/api/chat\"\n", + " \n", + " t0 = time.time()\n", + " r = requests.post(endpoint, json=body, timeout=120)\n", + " dt = time.time() - t0\n", + " r.raise_for_status()\n", + " \n", + " if USE_WRAPPER:\n", + " response = r.json()\n", + " content = response[\"choices\"][0][\"message\"][\"content\"]\n", + " usage = response.get(\"usage\", {})\n", + " else:\n", + " response = r.json()\n", + " content = response.get(\"message\", {}).get(\"content\", \"\")\n", + " usage = {\"total_tokens\": \"estimated: \" + str(len(content.split()) + len(prompt.split()))}\n", + " \n", + " print(f\"\\nLatency: {dt:.2f}s | usage: {usage}\")\n", + " print(\"\\n---\\n\", content)\n", + " return content\n", + "\n", + "health()\n", + "_ = chat(\"Say 'test ok' then give me one short fun fact about llamas.\")" + ] + }, + { + "cell_type": "markdown", + "id": "553d2756-8949-43e3-8342-71387688e0fa", + "metadata": {}, + "source": [ + "### Cell 5 - Multi-agent pipeline\n", + "\n", + "Implements a simple three-agent workflow (Researcher -> Writer -> Critic) using the local LLM." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0f6713f3-8b60-40b2-ad3c-ebf6db4f66e1", + "metadata": {}, + "outputs": [], + "source": [ + "import os, requests, json, time\n", + "\n", + "BASE_URL = \"http://localhost:8000/v1\" \n", + "OLLAMA_DIRECT = os.getenv(\"OLLAMA_HOST\", \"http://ai-starter-kit-ollama:11434\")\n", + "\n", + "def call_llm(role_prompt, user_message, temperature=0.4, max_tokens=150, use_wrapper=True):\n", + " if use_wrapper:\n", + " body = {\n", + " \"model\": \"qwen2.5:1.5b\",\n", + " \"temperature\": temperature,\n", + " \"max_tokens\": max_tokens,\n", + " \"messages\": [\n", + " {\"role\": \"system\", \"content\": role_prompt},\n", + " {\"role\": \"user\", \"content\": user_message}\n", + " ]\n", + " }\n", + " try:\n", + " r = requests.post(f\"{BASE_URL}/chat/completions\", json=body, timeout=120)\n", + " r.raise_for_status()\n", + " return r.json()[\"choices\"][0][\"message\"][\"content\"]\n", + " except Exception as e:\n", + " return f\"Error: {e}\"\n", + " else:\n", + " body = {\n", + " \"model\": \"qwen2.5:1.5b\",\n", + " \"messages\": [\n", + " {\"role\": \"system\", \"content\": role_prompt},\n", + " {\"role\": \"user\", \"content\": user_message}\n", + " ],\n", + " \"stream\": False,\n", + " \"options\": {\n", + " \"temperature\": temperature,\n", + " \"num_predict\": max_tokens\n", + " }\n", + " }\n", + " try:\n", + " r = requests.post(f\"{OLLAMA_DIRECT}/api/chat\", json=body, timeout=120)\n", + " r.raise_for_status()\n", + " return r.json().get(\"message\", {}).get(\"content\", \"\")\n", + " except Exception as e:\n", + " return f\"Error: {e}\"\n", + "\n", + "print(\"=\" * 60)\n", + "print(\"Running Multi-Agent Workflow with Ollama\")\n", + "print(\"=\" * 60)\n", + "\n", + "task = \"Research the latest advancements in quantum computing as of 2025.\"\n", + "\n", + "try:\n", + " r = requests.get(f\"{BASE_URL}/healthz\", timeout=2)\n", + " use_wrapper = r.status_code == 200\n", + " print(\"Using: OpenAI-compatible wrapper\\n\")\n", + "except:\n", + " use_wrapper = False\n", + " print(\"Using: Direct Ollama API\\n\")\n", + "\n", + "print(\"1. RESEARCHER:\")\n", + "print(\"-\" * 40)\n", + "research_prompt = \"You are a researcher. Provide 3-4 key facts about the topic. Be concise and factual.\"\n", + "research_notes = call_llm(research_prompt, task, temperature=0.35, max_tokens=140, use_wrapper=use_wrapper)\n", + "print(research_notes)\n", + "time.sleep(1)\n", + "\n", + "print(\"\\n2. WRITER:\")\n", + "print(\"-\" * 40)\n", + "writer_prompt = \"You are a technical writer. Based on the following notes, write a brief report.\"\n", + "writer_task = f\"Write a report based on these notes:\\n{research_notes}\"\n", + "report = call_llm(writer_prompt, writer_task, temperature=0.55, max_tokens=220, use_wrapper=use_wrapper)\n", + "print(report)\n", + "time.sleep(1)\n", + "\n", + "print(\"\\n3. CRITIC/EDITOR:\")\n", + "print(\"-\" * 40)\n", + "critic_prompt = \"You are an editor. Review the report and provide a final polished version.\"\n", + "critic_task = f\"Review and improve this report:\\n{report}\"\n", + "final_output = call_llm(critic_prompt, critic_task, temperature=0.45, max_tokens=160, use_wrapper=use_wrapper)\n", + "print(final_output)\n", + "\n", + "print(\"\\n\" + \"=\" * 60)\n", + "print(\"Multi-agent workflow complete\")\n", + "print(\"=\" * 60)" + ] + }, + { + "cell_type": "markdown", + "id": "0af596cf-5ba6-42df-a030-61d7a20d6f7b", + "metadata": {}, + "source": [ + "### Cell 6 - MLFlow: connect to tracking server and list recent runs\n", + "\n", + "Connects to MLflow tracking server and displays recent model inference runs with metrics." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "03a1b042-04df-4cd0-9099-4cc763ecfe9d", + "metadata": {}, + "outputs": [], + "source": [ + "!pip -q install mlflow==2.14.3 --disable-pip-version-check\n", + "\n", + "import os, mlflow\n", + "from datetime import datetime\n", + "\n", + "tracking_uri = os.getenv(\"MLFLOW_TRACKING_URI\", \"http://ai-starter-kit-mlflow:5000\")\n", + "mlflow.set_tracking_uri(tracking_uri)\n", + "print(f\"MLflow Tracking URI: {tracking_uri}\")\n", + "\n", + "exp_name = \"ollama-llm\"\n", + "exp = mlflow.set_experiment(exp_name)\n", + "print(f\"Experiment: {exp.name} (ID: {exp.experiment_id})\")\n", + "print(\"-\" * 60)\n", + "\n", + "client = mlflow.tracking.MlflowClient()\n", + "runs = client.search_runs(\n", + " exp.experiment_id,\n", + " order_by=[\"attributes.start_time DESC\"],\n", + " max_results=10\n", + ")\n", + "\n", + "if not runs:\n", + " print(\"No runs found. Run cells 4 or 5 first to generate inference requests.\")\n", + "else:\n", + " print(f\"\\nFound {len(runs)} recent runs:\")\n", + " print(\"-\" * 60)\n", + " \n", + " for i, run in enumerate(runs, 1):\n", + " start_time = datetime.fromtimestamp(run.info.start_time/1000).strftime('%Y-%m-%d %H:%M:%S')\n", + " duration = run.data.metrics.get('duration_ms', 'N/A')\n", + " temp = run.data.params.get('temperature', 'N/A')\n", + " max_tokens = run.data.params.get('max_tokens', 'N/A')\n", + " total_tokens = run.data.metrics.get('total_tokens_approx', 'N/A')\n", + " \n", + " print(f\"\\nRun {i}:\")\n", + " print(f\" ID: {run.info.run_id[:12]}...\")\n", + " print(f\" Time: {start_time}\")\n", + " print(f\" Status: {run.info.status}\")\n", + " print(f\" Temperature: {temp}\")\n", + " print(f\" Max Tokens: {max_tokens}\")\n", + " print(f\" Duration: {duration} ms\")\n", + " print(f\" Total Tokens: {total_tokens}\")\n", + " \n", + " print(\"\\n\" + \"=\" * 60)\n", + " print(\"SUMMARY:\")\n", + " successful = sum(1 for r in runs if r.info.status == 'FINISHED')\n", + " durations = [r.data.metrics.get('duration_ms', 0) for r in runs if r.data.metrics.get('duration_ms')]\n", + " avg_duration = sum(durations) / len(durations) if durations else 0\n", + " \n", + " print(f\" Total Runs: {len(runs)}\")\n", + " print(f\" Successful: {successful}\")\n", + " print(f\" Failed: {len(runs) - successful}\")\n", + " print(f\" Avg Duration: {avg_duration:.1f} ms\" if avg_duration else \" Avg Duration: N/A\")\n", + "\n", + "print(\"\\n\" + \"=\" * 60)\n", + "print(\"MLflow verification complete\")" + ] + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/multi-agent-ramalama.ipynb b/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/multi-agent-ramalama.ipynb new file mode 100644 index 000000000..07aff13cc --- /dev/null +++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/multi-agent-ramalama.ipynb @@ -0,0 +1,466 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "079fadd2-200e-4d37-8ae2-be2792e3a24e", + "metadata": {}, + "source": [ + "### Cell 1 - Install RamaLama and verify environment\n", + "\n", + "Installs RamaLama for local model serving, sets up environment variables, and verifies the installation." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "79db57cd-fb72-4b10-b0fb-5e9cd5c007b6", + "metadata": {}, + "outputs": [], + "source": [ + "!pip -q install requests --disable-pip-version-check\n", + "\n", + "import os, time, json, requests\n", + "from pathlib import Path\n", + "\n", + "os.environ['RAMALAMA_HOST'] = 'http://ai-starter-kit-ramalama:8080'\n", + "MODEL_NAME = \"qwen2.5:1.5b\"\n", + "MLFLOW_URI = os.getenv(\"MLFLOW_TRACKING_URI\", \"http://ai-starter-kit-mlflow:5000\")\n", + "\n", + "RAMALAMA_HOST = os.environ['RAMALAMA_HOST']\n", + "\n", + "print(\"Environment Configuration:\")\n", + "print(\"RamaLama Host:\", RAMALAMA_HOST)\n", + "print(\"Model: \", MODEL_NAME)\n", + "print(\"MLflow: \", MLFLOW_URI)\n", + "print(\"-\" * 60)\n", + "\n", + "try:\n", + " r = requests.get(f\"{RAMALAMA_HOST}/v1/models\", timeout=5)\n", + " print(\"RamaLama models:\", r.json())\n", + "except Exception as e:\n", + " print(f\"Error connecting to RamaLama: {e}\")" + ] + }, + { + "cell_type": "markdown", + "id": "fe862173-fd9a-41ae-a27b-63875f788024", + "metadata": {}, + "source": [ + "### Cell 2 - Start RamaLama service and pull model\n", + "\n", + "Starts the RamaLama service if not running, pulls the Qwen 2.5 1.5B model, and verifies it's ready." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "34da3e26-6276-48b7-b3ac-c90359df6547", + "metadata": {}, + "outputs": [], + "source": [ + "import requests, os, json\n", + "\n", + "RAMALAMA_HOST = os.environ.get('RAMALAMA_HOST', 'http://ai-starter-kit-ramalama:8080')\n", + "MODEL_NAME = \"qwen2.5:1.5b\"\n", + "\n", + "def check_ramalama():\n", + " try:\n", + " r = requests.get(f\"{RAMALAMA_HOST}/v1/models\", timeout=2)\n", + " return r.status_code == 200\n", + " except:\n", + " return False\n", + "\n", + "if check_ramalama():\n", + " print(\"RamaLama service is running\")\n", + " \n", + " try:\n", + " r = requests.get(f\"{RAMALAMA_HOST}/v1/models\")\n", + " models = r.json().get('data', [])\n", + " model_exists = any(m.get('id') == MODEL_NAME for m in models) \n", + " if model_exists:\n", + " print(f\"Model {MODEL_NAME} already available\")\n", + " else:\n", + " print(f\"Model {MODEL_NAME} not found; ensure it's pulled in the deployment\")\n", + " except Exception as e:\n", + " print(f\"Error checking model: {e}\")\n", + "else:\n", + " print(\"Warning: RamaLama service is not running\")\n", + " print(\"Please ensure the deployment is healthy\")" + ] + }, + { + "cell_type": "markdown", + "id": "8111d705-595e-4e65-8479-bdc76191fa31", + "metadata": {}, + "source": [ + "### Cell 3 - Create OpenAI-compatible API wrapper\n", + "\n", + "Sets up a simple FastAPI server that wraps RamaLama with an OpenAI-compatible API, including MLflow tracking. Since RamaLama already provides OpenAI compatibility, this acts as a proxy with logging." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bbea1539-e9ab-460a-9cfc-20a42807f616", + "metadata": {}, + "outputs": [], + "source": [ + "!pip -q install fastapi uvicorn mlflow --disable-pip-version-check\n", + "\n", + "import os, threading, time, json\n", + "from pathlib import Path\n", + "\n", + "api_wrapper_code = '''\n", + "import os, time, uuid, requests, json\n", + "from fastapi import FastAPI, Request\n", + "from fastapi.responses import JSONResponse\n", + "import uvicorn\n", + "\n", + "USE_MLFLOW = False\n", + "try:\n", + " import mlflow\n", + " mlflow_uri = os.getenv(\"MLFLOW_TRACKING_URI\")\n", + " if mlflow_uri:\n", + " mlflow.set_tracking_uri(mlflow_uri)\n", + " mlflow.set_experiment(\"ramalama-llm\")\n", + " USE_MLFLOW = True\n", + "except:\n", + " pass\n", + "\n", + "app = FastAPI()\n", + "RAMALAMA_HOST = os.getenv(\"RAMALAMA_HOST\", \"http://127.0.0.1:8080\")\n", + "MODEL_NAME = os.getenv(\"MODEL_NAME\", \"qwen2.5:1.5b\")\n", + "\n", + "@app.get(\"/v1/healthz\")\n", + "async def health():\n", + " return {\"status\": \"ok\", \"model\": MODEL_NAME}\n", + "\n", + "@app.post(\"/v1/chat/completions\")\n", + "async def chat_completions(request: Request):\n", + " t0 = time.time()\n", + " body = await request.json()\n", + " \n", + " messages = body.get(\"messages\", [])\n", + " temperature = body.get(\"temperature\", 0.7)\n", + " max_tokens = body.get(\"max_tokens\", 256)\n", + " \n", + " payload = {\n", + " \"model\": MODEL_NAME,\n", + " \"messages\": messages,\n", + " \"temperature\": temperature,\n", + " \"max_tokens\": max_tokens,\n", + " \"stream\": False\n", + " }\n", + " \n", + " try:\n", + " r = requests.post(f\"{RAMALAMA_HOST}/v1/chat/completions\", json=payload, timeout=120)\n", + " r.raise_for_status()\n", + " response = r.json()\n", + " \n", + " content = response[\"choices\"][0][\"message\"][\"content\"]\n", + " usage = response.get(\"usage\", {})\n", + " prompt_tokens = usage.get(\"prompt_tokens\", len(\" \".join(m.get(\"content\", \"\") for m in messages).split()))\n", + " completion_tokens = usage.get(\"completion_tokens\", len(content.split()))\n", + " total_tokens = prompt_tokens + completion_tokens\n", + " \n", + " if USE_MLFLOW:\n", + " try:\n", + " with mlflow.start_run():\n", + " mlflow.log_params({\n", + " \"temperature\": temperature,\n", + " \"max_tokens\": max_tokens,\n", + " \"model\": MODEL_NAME\n", + " })\n", + " mlflow.log_metrics({\n", + " \"duration_ms\": int((time.time() - t0) * 1000),\n", + " \"prompt_tokens\": prompt_tokens,\n", + " \"completion_tokens\": completion_tokens,\n", + " \"total_tokens\": total_tokens\n", + " })\n", + " except:\n", + " pass\n", + " \n", + " return {\n", + " \"id\": \"chatcmpl-\" + uuid.uuid4().hex[:8],\n", + " \"object\": \"chat.completion\",\n", + " \"created\": int(time.time()),\n", + " \"model\": MODEL_NAME,\n", + " \"choices\": [{\n", + " \"index\": 0,\n", + " \"message\": {\"role\": \"assistant\", \"content\": content},\n", + " \"finish_reason\": \"stop\"\n", + " }],\n", + " \"usage\": {\n", + " \"prompt_tokens\": prompt_tokens,\n", + " \"completion_tokens\": completion_tokens,\n", + " \"total_tokens\": total_tokens\n", + " }\n", + " }\n", + " except Exception as e:\n", + " return JSONResponse(status_code=500, content={\"error\": str(e)})\n", + "\n", + "if __name__ == \"__main__\":\n", + " uvicorn.run(app, host=\"0.0.0.0\", port=8000)\n", + "'''\n", + "\n", + "with open('/tmp/ramalama_wrapper.py', 'w') as f:\n", + " f.write(api_wrapper_code)\n", + "\n", + "def run_api():\n", + " subprocess.run([\"python\", \"/tmp/ramalama_wrapper.py\"], capture_output=True)\n", + "\n", + "import subprocess\n", + "api_process = subprocess.Popen(\n", + " [\"python\", \"/tmp/ramalama_wrapper.py\"],\n", + " env={**os.environ, \n", + " \"RAMALAMA_HOST\": os.getenv(\"RAMALAMA_HOST\", \"http://127.0.0.1:8080\"),\n", + " \"MODEL_NAME\": MODEL_NAME,\n", + " \"MLFLOW_TRACKING_URI\": MLFLOW_URI},\n", + " stdout=subprocess.DEVNULL,\n", + " stderr=subprocess.DEVNULL\n", + ")\n", + "\n", + "time.sleep(3)\n", + "\n", + "API_URL = \"http://localhost:8000\"\n", + "try:\n", + " r = requests.get(f\"{API_URL}/v1/healthz\", timeout=5)\n", + " print(\"API Status:\", r.json())\n", + " print(f\"\\nOpenAI-compatible API running at: {API_URL}/v1\")\n", + " print(f\"Health: {API_URL}/v1/healthz\")\n", + " print(f\"Chat: {API_URL}/v1/chat/completions\")\n", + "except Exception as e:\n", + " print(f\"Warning: API wrapper not responding: {e}\")\n", + " print(\"You may need to run the wrapper manually\")" + ] + }, + { + "cell_type": "markdown", + "id": "a411c015-c802-4ca1-81bb-3f4790d9626a", + "metadata": {}, + "source": [ + "### Cell 4 - Basic client + latency test\n", + "\n", + "Tests the OpenAI-compatible API with a simple chat request and measures latency." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3be634e2-a82f-42c9-8e31-57e6868a86ee", + "metadata": {}, + "outputs": [], + "source": [ + "import os, time, requests, json\n", + "\n", + "USE_WRAPPER = True\n", + "BASE_URL = \"http://localhost:8000/v1\" if USE_WRAPPER else os.getenv(\"RAMALAMA_HOST\", \"http://127.0.0.1:8080\")\n", + "\n", + "def health():\n", + " if USE_WRAPPER:\n", + " r = requests.get(f\"{BASE_URL}/healthz\", timeout=10)\n", + " print(\"Health:\", r.status_code, r.json())\n", + " else:\n", + " r = requests.get(f\"{BASE_URL}/v1/models\", timeout=10)\n", + " print(\"Health:\", r.status_code, \"Models available:\", r.json().get('data', []))\n", + "\n", + "def chat(prompt, temperature=0.4, max_tokens=220):\n", + " body = {\n", + " \"model\": \"qwen2.5:1.5b\",\n", + " \"temperature\": temperature,\n", + " \"max_tokens\": max_tokens,\n", + " \"messages\": [\n", + " {\"role\": \"system\", \"content\": \"You are a helpful assistant. Be concise.\"},\n", + " {\"role\": \"user\", \"content\": prompt},\n", + " ],\n", + " \"stream\": False\n", + " }\n", + " endpoint = f\"{BASE_URL}/chat/completions\"\n", + " \n", + " t0 = time.time()\n", + " r = requests.post(endpoint, json=body, timeout=120)\n", + " dt = time.time() - t0\n", + " r.raise_for_status()\n", + " \n", + " response = r.json()\n", + " content = response[\"choices\"][0][\"message\"][\"content\"]\n", + " usage = response.get(\"usage\", {\"total_tokens\": \"estimated: \" + str(len(content.split()) + len(prompt.split()))})\n", + " \n", + " print(f\"\\nLatency: {dt:.2f}s | usage: {usage}\")\n", + " print(\"\\n---\\n\", content)\n", + " return content\n", + "\n", + "health()\n", + "_ = chat(\"Say 'test ok' then give me one short fun fact about llamas.\")" + ] + }, + { + "cell_type": "markdown", + "id": "553d2756-8949-43e3-8342-71387688e0fa", + "metadata": {}, + "source": [ + "### Cell 5 - Multi-agent pipeline\n", + "\n", + "Implements a simple three-agent workflow (Researcher -> Writer -> Critic) using the local LLM." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0f6713f3-8b60-40b2-ad3c-ebf6db4f66e1", + "metadata": {}, + "outputs": [], + "source": [ + "import os, requests, json, time\n", + "\n", + "BASE_URL = \"http://localhost:8000/v1\" \n", + "RAMALAMA_DIRECT = os.getenv(\"RAMALAMA_HOST\", \"http://127.0.0.1:8080\")\n", + "\n", + "def call_llm(role_prompt, user_message, temperature=0.4, max_tokens=150, use_wrapper=True):\n", + " body = {\n", + " \"model\": \"qwen2.5:1.5b\",\n", + " \"messages\": [\n", + " {\"role\": \"system\", \"content\": role_prompt},\n", + " {\"role\": \"user\", \"content\": user_message}\n", + " ],\n", + " \"temperature\": temperature,\n", + " \"max_tokens\": max_tokens,\n", + " \"stream\": False\n", + " }\n", + " if use_wrapper:\n", + " endpoint = f\"{BASE_URL}/chat/completions\"\n", + " else:\n", + " endpoint = f\"{RAMALAMA_DIRECT}/v1/chat/completions\"\n", + " try:\n", + " r = requests.post(endpoint, json=body, timeout=120)\n", + " r.raise_for_status()\n", + " response = r.json()\n", + " return response[\"choices\"][0][\"message\"][\"content\"]\n", + " except Exception as e:\n", + " return f\"Error: {e}\"\n", + "\n", + "print(\"=\" * 60)\n", + "print(\"Running Multi-Agent Workflow with RamaLama\")\n", + "print(\"=\" * 60)\n", + "\n", + "task = \"Research the latest advancements in quantum computing as of 2025.\"\n", + "\n", + "try:\n", + " r = requests.get(f\"{BASE_URL}/healthz\", timeout=2)\n", + " use_wrapper = r.status_code == 200\n", + " print(\"Using: OpenAI-compatible wrapper\\n\")\n", + "except:\n", + " use_wrapper = False\n", + " print(\"Using: Direct RamaLama API\\n\")\n", + "\n", + "print(\"RESEARCHER:\")\n", + "print(\"-\" * 40)\n", + "research_prompt = \"You are a researcher. Provide 3-4 key facts about the topic. Be concise and factual.\"\n", + "research_notes = call_llm(research_prompt, task, temperature=0.35, max_tokens=140, use_wrapper=use_wrapper)\n", + "print(research_notes)\n", + "time.sleep(1)\n", + "\n", + "print(\"\\nWRITER:\")\n", + "print(\"-\" * 40)\n", + "writer_prompt = \"You are a technical writer. Based on the following notes, write a brief report.\"\n", + "writer_task = f\"Write a report based on these notes:\\n{research_notes}\"\n", + "report = call_llm(writer_prompt, writer_task, temperature=0.55, max_tokens=220, use_wrapper=use_wrapper)\n", + "print(report)\n", + "time.sleep(1)\n", + "\n", + "print(\"\\nCRITIC/EDITOR:\")\n", + "print(\"-\" * 40)\n", + "critic_prompt = \"You are an editor. Review the report and provide a final polished version.\"\n", + "critic_task = f\"Review and improve this report:\\n{report}\"\n", + "final_output = call_llm(critic_prompt, critic_task, temperature=0.45, max_tokens=160, use_wrapper=use_wrapper)\n", + "print(final_output)\n", + "\n", + "print(\"\\n\" + \"=\" * 60)\n", + "print(\"Multi-agent workflow complete\")\n", + "print(\"=\" * 60)" + ] + }, + { + "cell_type": "markdown", + "id": "0af596cf-5ba6-42df-a030-61d7a20d6f7b", + "metadata": {}, + "source": [ + "### Cell 6 - MLFlow: connect to tracking server and list recent runs\n", + "\n", + "Connects to MLflow tracking server and displays recent model inference runs with metrics." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "03a1b042-04df-4cd0-9099-4cc763ecfe9d", + "metadata": {}, + "outputs": [], + "source": [ + "!pip -q install mlflow==2.14.3 --disable-pip-version-check\n", + "\n", + "import os, mlflow\n", + "from datetime import datetime\n", + "\n", + "tracking_uri = os.getenv(\"MLFLOW_TRACKING_URI\", \"http://ai-starter-kit-mlflow:5000\")\n", + "mlflow.set_tracking_uri(tracking_uri)\n", + "print(f\"MLflow Tracking URI: {tracking_uri}\")\n", + "\n", + "exp_name = \"ramalama-llm\"\n", + "exp = mlflow.set_experiment(exp_name)\n", + "print(f\"Experiment: {exp.name} (ID: {exp.experiment_id})\")\n", + "print(\"-\" * 60)\n", + "\n", + "client = mlflow.tracking.MlflowClient()\n", + "runs = client.search_runs(\n", + " exp.experiment_id,\n", + " order_by=[\"attributes.start_time DESC\"],\n", + " max_results=10\n", + ")\n", + "\n", + "if not runs:\n", + " print(\"No runs found. Run cells 4 or 5 first to generate inference requests.\")\n", + "else:\n", + " print(f\"\\nFound {len(runs)} recent runs:\")\n", + " print(\"-\" * 60)\n", + " \n", + " for i, run in enumerate(runs, 1):\n", + " start_time = datetime.fromtimestamp(run.info.start_time/1000).strftime('%Y-%m-%d %H:%M:%S')\n", + " duration = run.data.metrics.get('duration_ms', 'N/A')\n", + " temp = run.data.params.get('temperature', 'N/A')\n", + " max_tokens = run.data.params.get('max_tokens', 'N/A')\n", + " total_tokens = run.data.metrics.get('total_tokens', 'N/A')\n", + " \n", + " print(f\"\\nRun {i}:\")\n", + " print(f\" ID: {run.info.run_id[:12]}...\")\n", + " print(f\" Time: {start_time}\")\n", + " print(f\" Status: {run.info.status}\")\n", + " print(f\" Temperature: {temp}\")\n", + " print(f\" Max Tokens: {max_tokens}\")\n", + " print(f\" Duration: {duration} ms\")\n", + " print(f\" Total Tokens: {total_tokens}\")\n", + " \n", + " print(\"\\n\" + \"=\" * 60)\n", + " print(\"SUMMARY:\")\n", + " successful = sum(1 for r in runs if r.info.status == 'FINISHED')\n", + " durations = [r.data.metrics.get('duration_ms', 0) for r in runs if r.data.metrics.get('duration_ms')]\n", + " avg_duration = sum(durations) / len(durations) if durations else 0\n", + " \n", + " print(f\" Total Runs: {len(runs)}\")\n", + " print(f\" Successful: {successful}\")\n", + " print(f\" Failed: {len(runs) - successful}\")\n", + " print(f\" Avg Duration: {avg_duration:.1f} ms\" if avg_duration else \" Avg Duration: N/A\")\n", + "\n", + "print(\"\\n\" + \"=\" * 60)\n", + "print(\"MLflow verification complete\")" + ] + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/multi-agent.ipynb b/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/multi-agent.ipynb new file mode 100644 index 000000000..23189a639 --- /dev/null +++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/multi-agent.ipynb @@ -0,0 +1,687 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "079fadd2-200e-4d37-8ae2-be2792e3a24e", + "metadata": {}, + "source": [ + "### Cell 1 - Initialize Ray endpoints and verify dashboard\n", + "\n", + "Installs requests, derives the Ray head host from RAY_ADDRESS, builds Dashboard/Serve/MLflow URLs, reads an Hugging Face token, and prints the endpoints plus the Jobs API version for a quick health check." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "79db57cd-fb72-4b10-b0fb-5e9cd5c007b6", + "metadata": {}, + "outputs": [], + "source": [ + "!pip -q install requests==2.* --disable-pip-version-check\n", + "\n", + "import os, textwrap, base64, time, json, requests\n", + "from string import Template\n", + "\n", + "raw_addr = os.getenv(\"RAY_ADDRESS\", \"ray://ai-starter-kit-kuberay-head-svc:10001\")\n", + "if raw_addr.startswith(\"ray://\"):\n", + " HEAD_HOST = raw_addr.split(\"://\", 1)[1].split(\":\", 1)[0]\n", + "else:\n", + " HEAD_HOST = raw_addr.split(\":\", 1)[0] or \"ai-starter-kit-kuberay-head-svc\"\n", + "\n", + "DASH_URL = f\"http://{HEAD_HOST}:8265\"\n", + "SERVE_PORT = int(os.getenv(\"SERVE_PORT\", \"8000\"))\n", + "SERVE_ROUTE = \"/v1\"\n", + "\n", + "HF_TOKEN_PATH = \"/etc/secrets/huggingface/token\"\n", + "HF_TOKEN = \"\"\n", + "if os.path.exists(HF_TOKEN_PATH):\n", + " try:\n", + " HF_TOKEN = open(HF_TOKEN_PATH).read().strip()\n", + " except Exception:\n", + " HF_TOKEN = \"\"\n", + "\n", + "print(\"Head host:\", HEAD_HOST)\n", + "print(\"Jobs API :\", f\"{DASH_URL}/api/jobs/\")\n", + "print(\"Serve URL:\", f\"http://{HEAD_HOST}:{SERVE_PORT}{SERVE_ROUTE}\")\n", + "print(\"MLflow :\", os.getenv(\"MLFLOW_TRACKING_URI\", \"http://ai-starter-kit-mlflow:5000\"))\n", + "\n", + "print(\"Jobs API version:\", requests.get(f\"{DASH_URL}/api/version\", timeout=10).json())\n" + ] + }, + { + "cell_type": "markdown", + "id": "fe862173-fd9a-41ae-a27b-63875f788024", + "metadata": {}, + "source": [ + "### Cell 2 - Deploy a minimal Ray Serve smoke test and verify readiness\n", + "\n", + "Submits a tiny FastAPI app to Ray Serve (one /healthz endpoint under /smoke) as a Ray Job, installing FastAPI on the fly. It polls the Jobs API for status and hits :8000/smoke/healthz up to 60 seconds, printing when the service responds 200 (i.e., smoke test passes)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "34da3e26-6276-48b7-b3ac-c90359df6547", + "metadata": {}, + "outputs": [], + "source": [ + "import os, base64, textwrap, time, requests\n", + "\n", + "DASH_URL = \"http://ai-starter-kit-kuberay-head-svc:8265\"\n", + "\n", + "print(\"Jobs API:\", requests.get(f\"{DASH_URL}/api/version\", timeout=10).json())\n", + "\n", + "serve_py = textwrap.dedent(\"\"\"\n", + " from fastapi import FastAPI\n", + " from ray import serve\n", + " serve.start(detached=True, http_options={\"host\":\"0.0.0.0\",\"port\":8000})\n", + " app = FastAPI()\n", + "\n", + " @serve.deployment(name=\"smoke\", num_replicas=1)\n", + " @serve.ingress(app)\n", + " class Smoke:\n", + " @app.get(\"/healthz\")\n", + " async def health(self): return {\"ok\": True}\n", + "\n", + " serve.run(Smoke.bind(), route_prefix=\"/smoke\")\n", + " print(\"READY: smoke\", flush=True)\n", + "\"\"\").strip()\n", + "\n", + "b64 = base64.b64encode(serve_py.encode()).decode()\n", + "entry = f'python -c \"import base64; exec(base64.b64decode(\\'{b64}\\'))\"'\n", + "submit = requests.post(f\"{DASH_URL}/api/jobs/\", json={\"entrypoint\": entry, \"runtime_env\": {\"pip\": [\"fastapi>=0.110\"]}}, timeout=60).json()\n", + "job_id = submit[\"job_id\"]\n", + "print(\"Job:\", job_id)\n", + "\n", + "svc = \"http://ai-starter-kit-kuberay-head-svc:8000/smoke/healthz\"\n", + "for i in range(60):\n", + " s = requests.get(f\"{DASH_URL}/api/jobs/{job_id}\", timeout=10).json()[\"status\"]\n", + " try:\n", + " r = requests.get(svc, timeout=2)\n", + " print(f\"tick {i:02d}: job={s}, health={r.status_code}\")\n", + " if r.status_code == 200:\n", + " print(\"Smoke OK\")\n", + " break\n", + " except Exception as e:\n", + " print(f\"tick {i:02d}: job={s}, health=ERR {e}\")\n", + " time.sleep(1)" + ] + }, + { + "cell_type": "markdown", + "id": "8111d705-595e-4e65-8479-bdc76191fa31", + "metadata": {}, + "source": [ + "### Cell 3 - Deploy model on Ray Serve with llama-cpp\n", + "\n", + "Packages and submits a Ray Job that spins up a Ray Serve app exposing /v1/healthz and /v1/chat/completions. It downloads the preferred GGUF from Hugging Face, initializes llama-cpp-python, logs to MLflow, and prints the deployed health/chat URLs." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bbea1539-e9ab-460a-9cfc-20a42807f616", + "metadata": {}, + "outputs": [], + "source": [ + "import os, base64, textwrap, requests\n", + "\n", + "HEAD = os.environ.get(\"RAY_HEAD_SVC\", \"ai-starter-kit-kuberay-head-svc\")\n", + "DASH_URL = f\"http://{HEAD}:8265\"\n", + "SERVE_PORT = 8000\n", + "SERVE_ROUTE = \"/v1\"\n", + "\n", + "runtime_env = {\n", + " \"pip\": [\n", + " \"fastapi==0.110.0\",\n", + " \"uvicorn==0.23.2\",\n", + " \"huggingface_hub==0.25.2\",\n", + " \"llama-cpp-python==0.3.16\", \n", + " \"hf_transfer==0.1.6\",\n", + " \"mlflow==2.14.3\", \n", + " ],\n", + " \"env_vars\": {\n", + " \"HF_HUB_ENABLE_HF_TRANSFER\": \"1\",\n", + " \"HUGGINGFACE_HUB_TOKEN\": os.environ.get(\"HUGGINGFACE_HUB_TOKEN\", \"\"),\n", + " \"SERVE_PORT\": str(SERVE_PORT),\n", + "\n", + " \"MODEL_REPO\": \"Qwen/Qwen2.5-1.5B-Instruct-GGUF\",\n", + " \"GGUF_PREF_ORDER\": \"q4_k_m,q4_0,q3_k_m,q2_k\",\n", + "\n", + " \"LLM_CONTEXT\": os.environ.get(\"LLM_CONTEXT\", \"1024\"),\n", + " \"LLM_MAX_TOKENS\": os.environ.get(\"LLM_MAX_TOKENS\", \"256\"),\n", + " \"SERVER_MAX_NEW_TOKENS\": os.environ.get(\"SERVER_MAX_NEW_TOKENS\", \"512\"),\n", + "\n", + " \"LLM_THREADS\": os.environ.get(\"LLM_THREADS\", \"6\"),\n", + " \"OMP_NUM_THREADS\": os.environ.get(\"OMP_NUM_THREADS\", \"6\"),\n", + " \"GPU_LAYERS\": \"0\", \n", + " \n", + " \"PIP_PREFER_BINARY\": \"1\",\n", + " \"CMAKE_ARGS\": \"-DGGML_OPENMP=OFF -DLLAMA_NATIVE=OFF\",\n", + "\n", + " \"HF_HOME\": \"/tmp/hf-cache\",\n", + " \"TRANSFORMERS_CACHE\": \"/tmp/hf-cache\",\n", + "\n", + " \"MLFLOW_TRACKING_URI\": os.environ.get(\"MLFLOW_TRACKING_URI\", \"\"),\n", + " \"MLFLOW_EXPERIMENT_NAME\": os.environ.get(\"MLFLOW_EXPERIMENT_NAME\", \"ray-llama-cpp\"),\n", + " },\n", + "}\n", + "\n", + "serve_py = textwrap.dedent(f\"\"\"\n", + "import os, time, multiprocessing, uuid\n", + "from typing import List, Dict, Any\n", + "from fastapi import FastAPI, Request\n", + "from fastapi.responses import JSONResponse\n", + "from huggingface_hub import HfApi, hf_hub_download\n", + "from ray import serve\n", + "from llama_cpp import Llama\n", + "\n", + "USE_MLFLOW = False\n", + "try:\n", + " import mlflow\n", + " if os.getenv(\"MLFLOW_TRACKING_URI\"):\n", + " mlflow.set_tracking_uri(os.getenv(\"MLFLOW_TRACKING_URI\"))\n", + " mlflow.set_experiment(os.getenv(\"MLFLOW_EXPERIMENT_NAME\",\"ray-llama-cpp\"))\n", + " USE_MLFLOW = True\n", + "except Exception as _e:\n", + " USE_MLFLOW = False\n", + "\n", + "SERVE_PORT = int(os.getenv(\"SERVE_PORT\", \"{SERVE_PORT}\"))\n", + "SERVE_ROUTE = \"{SERVE_ROUTE}\"\n", + "MODEL_REPO = os.getenv(\"MODEL_REPO\", \"Qwen/Qwen2.5-1.5B-Instruct-GGUF\")\n", + "GGUF_PREFS = [s.strip() for s in os.getenv(\"GGUF_PREF_ORDER\",\"q4_k_m,q4_0,q3_k_m,q2_k\").split(\",\") if s.strip()]\n", + "CTX_LEN = int(os.getenv(\"LLM_CONTEXT\", \"2048\"))\n", + "MAX_TOKENS = int(os.getenv(\"LLM_MAX_TOKENS\", \"256\"))\n", + "HF_TOKEN = os.getenv(\"HUGGINGFACE_HUB_TOKEN\") or None\n", + "\n", + "serve.start(detached=True, http_options={{\"host\":\"0.0.0.0\", \"port\":SERVE_PORT}})\n", + "app = FastAPI()\n", + "\n", + "def pick_one_file(repo_id: str, prefs):\n", + " api = HfApi()\n", + " files = api.list_repo_files(repo_id=repo_id, repo_type=\"model\", token=HF_TOKEN)\n", + " ggufs = [f for f in files if f.lower().endswith(\".gguf\")]\n", + " if not ggufs:\n", + " raise RuntimeError(f\"No .gguf files visible in {{repo_id}}\")\n", + " for pref in prefs:\n", + " for f in ggufs:\n", + " if pref.lower() in f.lower():\n", + " return f\n", + " return ggufs[0]\n", + "\n", + "def pick_chat_format(repo: str, fname: str) -> str:\n", + " return \"qwen\"\n", + "\n", + "@serve.deployment(name=\"qwen\", num_replicas=1, ray_actor_options={{\"num_cpus\": 6}})\n", + "@serve.ingress(app)\n", + "class OpenAICompatLlama:\n", + " def __init__(self, repo_id: str = MODEL_REPO):\n", + " target = pick_one_file(repo_id, GGUF_PREFS)\n", + " print(f\"[env] model repo: {{repo_id}} file: {{target}}\", flush=True)\n", + " local_dir = \"/tmp/hf-gguf\"; os.makedirs(local_dir, exist_ok=True)\n", + "\n", + " gguf_path = hf_hub_download(\n", + " repo_id=repo_id, filename=target, token=HF_TOKEN,\n", + " local_dir=local_dir, local_dir_use_symlinks=False,\n", + " force_download=False, resume_download=True\n", + " )\n", + " print(f\"[download] done: {{gguf_path}}\", flush=True)\n", + "\n", + " n_threads = int(os.getenv(\"LLM_THREADS\", max(2, (multiprocessing.cpu_count() or 4)//2)))\n", + " print(f\"[load] llama-cpp-python | ctx={{CTX_LEN}} threads={{n_threads}} gpu_layers={{int(os.getenv('GPU_LAYERS','0'))}}\", flush=True)\n", + "\n", + " self.model_file = os.path.basename(gguf_path)\n", + " self.model_repo = repo_id\n", + " chat_format = pick_chat_format(self.model_repo, self.model_file)\n", + " print(f\"[load] chat_format={{chat_format}}\", flush=True)\n", + "\n", + " self.llm = Llama(\n", + " model_path=gguf_path,\n", + " n_ctx=CTX_LEN,\n", + " n_threads=n_threads,\n", + " n_batch=256, \n", + " n_gpu_layers=int(os.getenv(\"GPU_LAYERS\",\"0\")),\n", + " chat_format=chat_format,\n", + " verbose=False\n", + " )\n", + " print(\"[ready] model loaded\", flush=True)\n", + "\n", + " @app.get(\"/healthz\")\n", + " async def health(self):\n", + " return {{\"status\":\"ok\"}}\n", + "\n", + " @app.post(\"/chat/completions\")\n", + " async def chat_completions(self, request: Request):\n", + " t0 = time.time()\n", + " body = await request.json()\n", + "\n", + " messages = body.get(\"messages\", [])\n", + " temperature = float(body.get(\"temperature\", 0.2))\n", + " req_max = body.get(\"max_tokens\", None)\n", + " stop_words = (body.get(\"stop\", []) or []) + [\"<|im_end|>\", \"\"]\n", + "\n", + " SERVER_MAX = int(os.getenv(\"SERVER_MAX_NEW_TOKENS\", \"512\"))\n", + " max_tokens = int(req_max if isinstance(req_max, int) else MAX_TOKENS)\n", + " max_tokens = max(32, min(max_tokens, CTX_LEN - 128, SERVER_MAX))\n", + "\n", + " rid = \"chatcmpl-\" + uuid.uuid4().hex[:24]\n", + " created = int(time.time())\n", + " model_name = f\"{{self.model_repo}}/{{self.model_file}}\"\n", + "\n", + " try:\n", + " result = self.llm.create_chat_completion(\n", + " messages=messages,\n", + " temperature=temperature,\n", + " max_tokens=max_tokens,\n", + " top_k=50,\n", + " top_p=0.9,\n", + " repeat_penalty=1.1,\n", + " stop=stop_words,\n", + " )\n", + " out_text = (result[\"choices\"][0][\"message\"][\"content\"] or \"\").strip()\n", + " usage_raw = result.get(\"usage\") or {{}}\n", + " p_tokens = int(usage_raw.get(\"prompt_tokens\") or 0)\n", + " c_tokens = int(usage_raw.get(\"completion_tokens\") or 0)\n", + " err = None\n", + " except Exception as e:\n", + " out_text = \"\"\n", + " p_tokens = c_tokens = 0\n", + " err = str(e)\n", + "\n", + " if USE_MLFLOW:\n", + " try:\n", + " dur_ms = int((time.time()-t0) * 1000)\n", + " with mlflow.start_run(run_name=\"chat\"):\n", + " mlflow.set_tags({{\n", + " \"model_repo\": self.model_repo,\n", + " \"model_file\": self.model_file,\n", + " \"framework\": \"llama-cpp-python\",\n", + " }})\n", + " mlflow.log_params({{\n", + " \"temperature\": temperature,\n", + " \"max_tokens\": max_tokens,\n", + " \"ctx\": CTX_LEN,\n", + " }})\n", + " if not (p_tokens and c_tokens):\n", + " p_tokens = p_tokens or max(1, len(\" \".join(m.get(\"content\",\"\") for m in messages).split()))\n", + " c_tokens = c_tokens or max(0, len(out_text.split()))\n", + " mlflow.log_metrics({{\n", + " \"duration_ms\": dur_ms,\n", + " \"prompt_tokens_approx\": p_tokens,\n", + " \"completion_tokens_approx\": c_tokens,\n", + " \"total_tokens_approx\": p_tokens + c_tokens,\n", + " }})\n", + " except Exception:\n", + " pass\n", + "\n", + " if err:\n", + " return JSONResponse(status_code=500, content={{\"error\": err, \"type\":\"generation_error\"}})\n", + "\n", + " usage = {{\n", + " \"prompt_tokens\": p_tokens,\n", + " \"completion_tokens\": c_tokens,\n", + " \"total_tokens\": p_tokens + c_tokens,\n", + " }}\n", + " return {{\n", + " \"id\": rid,\n", + " \"object\": \"chat.completion\",\n", + " \"created\": created,\n", + " \"model\": model_name,\n", + " \"choices\": [\n", + " {{\n", + " \"index\": 0,\n", + " \"message\": {{\"role\":\"assistant\",\"content\": out_text}},\n", + " \"finish_reason\": \"stop\"\n", + " }}\n", + " ],\n", + " \"usage\": usage\n", + " }}\n", + "\n", + "serve.run(OpenAICompatLlama.bind(), route_prefix=SERVE_ROUTE)\n", + "print(\"READY\", flush=True)\n", + "\"\"\").strip()\n", + "\n", + "payload = base64.b64encode(serve_py.encode()).decode()\n", + "entrypoint = 'python -c \"import base64,sys;exec(base64.b64decode(\\'{}\\').decode())\"'.format(payload)\n", + "\n", + "job = requests.post(\n", + " f\"{DASH_URL}/api/jobs/\",\n", + " json={\n", + " \"entrypoint\": entrypoint,\n", + " \"runtime_env\": runtime_env,\n", + " \"metadata\": {\"job_name\": \"serve-qwen2_5-llama_cpp-openai\"},\n", + " },\n", + " timeout=45\n", + ").json()\n", + "\n", + "print(\"Job:\", job.get(\"job_id\"))\n", + "print(\"Health:\", f\"http://{HEAD}:{SERVE_PORT}{SERVE_ROUTE}/healthz\")\n", + "print(\"Chat: \", f\"http://{HEAD}:{SERVE_PORT}{SERVE_ROUTE}/chat/completions\")" + ] + }, + { + "cell_type": "markdown", + "id": "a411c015-c802-4ca1-81bb-3f4790d9626a", + "metadata": {}, + "source": [ + "### Cell 4 - Basic client + latency test\n", + "\n", + "Calls /v1/healthz and then sends an OpenAI-style chat request to /v1/chat/completions with a short prompt. Prints latency and token usage, returning the assistant text." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3be634e2-a82f-42c9-8e31-57e6868a86ee", + "metadata": {}, + "outputs": [], + "source": [ + "import os, time, requests, json\n", + "\n", + "HEAD = os.environ.get(\"RAY_HEAD_SVC\", \"ai-starter-kit-kuberay-head-svc\")\n", + "SERVE_PORT = 8000\n", + "BASE_URL = f\"http://{HEAD}:{SERVE_PORT}/v1\"\n", + "\n", + "def health():\n", + " r = requests.get(f\"{BASE_URL}/healthz\", timeout=10)\n", + " print(\"Health:\", r.status_code, r.json())\n", + "\n", + "def chat(prompt, temperature=0.4, max_tokens=220, stop=None):\n", + " body = {\n", + " \"model\": \"qwen2.5-1.5b-instruct-gguf\",\n", + " \"temperature\": float(temperature),\n", + " \"max_tokens\": int(max_tokens),\n", + " \"messages\": [\n", + " {\"role\": \"system\", \"content\": \"You are Qwen2.5 Instruct running on a tiny CPU host. Be concise, complete sentences.\"},\n", + " {\"role\": \"user\", \"content\": prompt},\n", + " ],\n", + " }\n", + " if stop:\n", + " body[\"stop\"] = stop\n", + "\n", + " t0 = time.time()\n", + " r = requests.post(f\"{BASE_URL}/chat/completions\", json=body, timeout=300)\n", + " dt = time.time() - t0\n", + " r.raise_for_status()\n", + " out = r.json()[\"choices\"][0][\"message\"][\"content\"]\n", + " usage = r.json().get(\"usage\", {})\n", + " print(f\"\\nLatency: {dt:.2f}s | usage: {usage}\")\n", + " print(\"\\n---\\n\", out)\n", + " return out\n", + "\n", + "health()\n", + "_ = chat(\"Say 'test ok' then give me one short fun fact about llamas.\", stop=[\"<|im_end|>\"])" + ] + }, + { + "cell_type": "markdown", + "id": "553d2756-8949-43e3-8342-71387688e0fa", + "metadata": {}, + "source": [ + "### Cell 5 - Multi-agent (Autogen) pipeline\n", + "\n", + "Installs Autogen, configures OpenAIWrapper to hit Ray Serve /v1 endpoint, warms up the model, then runs a simple three-agent workflow (Researcher -> Writer -> Critic) to produce and refine a short report." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0f6713f3-8b60-40b2-ad3c-ebf6db4f66e1", + "metadata": {}, + "outputs": [], + "source": [ + "import os, requests, json, time\n", + "\n", + "HEAD = os.environ.get(\"RAY_HEAD_SVC\", \"ai-starter-kit-kuberay-head-svc\")\n", + "SERVE_PORT = 8000\n", + "BASE_URL = f\"http://{HEAD}:{SERVE_PORT}/v1\"\n", + "\n", + "def call_llm(role_prompt, user_message, temperature=0.4, max_tokens=150):\n", + " body = {\n", + " \"model\": \"qwen2.5-1.5b-instruct-gguf\",\n", + " \"temperature\": temperature,\n", + " \"max_tokens\": max_tokens,\n", + " \"messages\": [\n", + " {\"role\": \"system\", \"content\": role_prompt},\n", + " {\"role\": \"user\", \"content\": user_message}\n", + " ]\n", + " }\n", + " try:\n", + " r = requests.post(f\"{BASE_URL}/chat/completions\", json=body, timeout=120)\n", + " r.raise_for_status()\n", + " return r.json()[\"choices\"][0][\"message\"][\"content\"]\n", + " except Exception as e:\n", + " return f\"Error: {e}\"\n", + "\n", + "# Try to use autogen if available, otherwise use direct implementation\n", + "USE_AUTOGEN = False\n", + "\n", + "try:\n", + " import autogen\n", + " from autogen import AssistantAgent, UserProxyAgent\n", + " USE_AUTOGEN = True\n", + " print(\"Using autogen for multi-agent workflow\")\n", + "except ImportError:\n", + " try:\n", + " print(\"Installing autogen dependencies...\")\n", + " !pip install -q pyautogen~=0.2.35 python-dotenv tiktoken \"numpy<2,>=1.17.0\" --disable-pip-version-check 2>/dev/null\n", + " import autogen\n", + " from autogen import AssistantAgent, UserProxyAgent\n", + " USE_AUTOGEN = True\n", + " print(\"Autogen installed successfully\")\n", + " except:\n", + " print(\"Using direct implementation (autogen not available)\")\n", + " USE_AUTOGEN = False\n", + "\n", + "if USE_AUTOGEN:\n", + " config_list = [\n", + " {\n", + " \"model\": \"qwen2.5-1.5b-instruct-gguf\",\n", + " \"base_url\": BASE_URL,\n", + " \"api_key\": \"local\",\n", + " \"price\": [0.0, 0.0],\n", + " }\n", + " ]\n", + " \n", + " llm = autogen.OpenAIWrapper(config_list=config_list)\n", + " \n", + " try:\n", + " r = llm.create(messages=[{\"role\":\"user\",\"content\":\"Say 'test ok'.\"}], temperature=0.2, max_tokens=16)\n", + " print(\"Warmup:\", r.choices[0].message.content)\n", + " except Exception as e:\n", + " print(\"Warmup skipped:\", e)\n", + " \n", + " user_proxy = UserProxyAgent(\n", + " name=\"UserProxy\",\n", + " system_message=\"You are the human admin. Initiate the task.\",\n", + " code_execution_config=False,\n", + " human_input_mode=\"NEVER\",\n", + " )\n", + " \n", + " researcher = AssistantAgent(\n", + " name=\"Researcher\",\n", + " system_message=(\n", + " \"You are a researcher. Gather concise, verified facts on the topic. \"\n", + " \"Return 3-4 bullet points. Keep under 100 words total.\"\n", + " ),\n", + " llm_config={\"config_list\": config_list, \"temperature\": 0.35, \"max_tokens\": 140, \"timeout\": 120},\n", + " )\n", + " \n", + " writer = AssistantAgent(\n", + " name=\"Writer\",\n", + " system_message=(\n", + " \"You are a writer. Using the Researcher's notes, produce a clear report under 160 words.\"\n", + " ),\n", + " llm_config={\"config_list\": config_list, \"temperature\": 0.55, \"max_tokens\": 220, \"timeout\": 180},\n", + " )\n", + " \n", + " critic = AssistantAgent(\n", + " name=\"Critic\",\n", + " system_message=(\n", + " \"You are a critic. Review the Writer's report for accuracy and clarity. \"\n", + " \"Present the final polished text under 140 words.\"\n", + " ),\n", + " llm_config={\"config_list\": config_list, \"temperature\": 0.45, \"max_tokens\": 160, \"timeout\": 120},\n", + " )\n", + " \n", + " def run_sequential(task):\n", + " print(\"\\n\" + \"=\" * 60)\n", + " print(\"Running Multi-Agent Workflow (with autogen)\")\n", + " print(\"=\" * 60)\n", + " \n", + " research_response = researcher.generate_reply(messages=[{\"content\": task, \"role\": \"user\"}])\n", + " research_notes = research_response if isinstance(research_response, str) else research_response.get(\"content\", \"[no output]\")\n", + " print(\"\\n1. RESEARCHER:\")\n", + " print(\"-\" * 40)\n", + " print(research_notes)\n", + " \n", + " writer_prompt = f\"Using these research notes, write the report:\\n{research_notes}\"\n", + " writer_response = writer.generate_reply(messages=[{\"content\": writer_prompt, \"role\": \"user\"}])\n", + " report = writer_response if isinstance(writer_response, str) else writer_response.get(\"content\", \"[no output]\")\n", + " print(\"\\n2. WRITER:\")\n", + " print(\"-\" * 40)\n", + " print(report)\n", + " \n", + " critic_prompt = f\"Review this report:\\n{report}\"\n", + " critic_response = critic.generate_reply(messages=[{\"content\": critic_prompt, \"role\": \"user\"}])\n", + " final_text = critic_response if isinstance(critic_response, str) else critic_response.get(\"content\", \"[no output]\")\n", + " print(\"\\n3. CRITIC/EDITOR:\")\n", + " print(\"-\" * 40)\n", + " print(final_text)\n", + " return final_text\n", + " \n", + " task = \"Research the latest advancements in quantum computing as of 2025. Gather key facts, then write a short report.\"\n", + " final_output = run_sequential(task)\n", + " \n", + "else:\n", + " print(\"=\" * 60)\n", + " print(\"Running Multi-Agent Workflow (direct implementation)\")\n", + " print(\"=\" * 60)\n", + " \n", + " task = \"Research the latest advancements in quantum computing as of 2025.\"\n", + " \n", + " print(\"\\n1. RESEARCHER:\")\n", + " print(\"-\" * 40)\n", + " research_prompt = \"You are a researcher. Provide 3-4 key facts about the topic. Be concise and factual.\"\n", + " research_notes = call_llm(research_prompt, task, temperature=0.35, max_tokens=140)\n", + " print(research_notes)\n", + " time.sleep(1) \n", + " \n", + " print(\"\\n2. WRITER:\")\n", + " print(\"-\" * 40)\n", + " writer_prompt = \"You are a technical writer. Based on the following notes, write a brief report.\"\n", + " writer_task = f\"Write a report based on these notes:\\n{research_notes}\"\n", + " report = call_llm(writer_prompt, writer_task, temperature=0.55, max_tokens=220)\n", + " print(report)\n", + " time.sleep(1)\n", + " \n", + " print(\"\\n3. CRITIC/EDITOR:\")\n", + " print(\"-\" * 40)\n", + " critic_prompt = \"You are an editor. Review the report and provide a final polished version.\"\n", + " critic_task = f\"Review and improve this report:\\n{report}\"\n", + " final_output = call_llm(critic_prompt, critic_task, temperature=0.45, max_tokens=160)\n", + " print(final_output)\n", + "\n", + "print(\"\\n\" + \"=\" * 60)\n", + "print(\"Multi-agent workflow complete\")\n", + "print(\"=\" * 60)" + ] + }, + { + "cell_type": "markdown", + "id": "0af596cf-5ba6-42df-a030-61d7a20d6f7b", + "metadata": {}, + "source": [ + "### Cell 6 - MLFlow: connect to tracking server and list recent chat runs\n", + "\n", + "Installs MLflow, sets the tracking URI and experiment, then queries and prints the latest runs with key params/metrics (temperature, max_tokens, duration) to verify Serve logging." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "03a1b042-04df-4cd0-9099-4cc763ecfe9d", + "metadata": {}, + "outputs": [], + "source": [ + "!pip -q install mlflow==2.14.3 --disable-pip-version-check\n", + "\n", + "import os, mlflow\n", + "from datetime import datetime\n", + "\n", + "tracking_uri = os.getenv(\"MLFLOW_TRACKING_URI\", \"http://ai-starter-kit-mlflow:5000\")\n", + "mlflow.set_tracking_uri(tracking_uri)\n", + "print(f\"MLflow Tracking URI: {tracking_uri}\")\n", + "\n", + "exp_name = os.getenv(\"MLFLOW_EXPERIMENT_NAME\", \"ray-llama-cpp\")\n", + "exp = mlflow.set_experiment(exp_name)\n", + "print(f\"Experiment: {exp.name} (ID: {exp.experiment_id})\")\n", + "print(\"-\" * 60)\n", + "\n", + "client = mlflow.tracking.MlflowClient()\n", + "runs = client.search_runs(\n", + " exp.experiment_id, \n", + " order_by=[\"attributes.start_time DESC\"], \n", + " max_results=10\n", + ")\n", + "\n", + "if not runs:\n", + " print(\"No runs found. Run cells 4 or 5 first to generate inference requests.\")\n", + "else:\n", + " print(f\"\\nFound {len(runs)} recent runs:\")\n", + " print(\"-\" * 60)\n", + " \n", + " for i, run in enumerate(runs, 1):\n", + " start_time = datetime.fromtimestamp(run.info.start_time/1000).strftime('%Y-%m-%d %H:%M:%S')\n", + " duration = run.data.metrics.get('duration_ms', 'N/A')\n", + " temp = run.data.params.get('temperature', 'N/A')\n", + " max_tokens = run.data.params.get('max_tokens', 'N/A')\n", + " total_tokens = run.data.metrics.get('total_tokens_approx', 'N/A')\n", + " \n", + " print(f\"\\nRun {i}:\")\n", + " print(f\" ID: {run.info.run_id[:12]}...\")\n", + " print(f\" Time: {start_time}\")\n", + " print(f\" Status: {run.info.status}\")\n", + " print(f\" Temperature: {temp}\")\n", + " print(f\" Max Tokens: {max_tokens}\")\n", + " print(f\" Duration: {duration} ms\")\n", + " print(f\" Total Tokens: {total_tokens}\")\n", + " \n", + " print(\"\\n\" + \"=\" * 60)\n", + " print(\"SUMMARY:\")\n", + " successful = sum(1 for r in runs if r.info.status == 'FINISHED')\n", + " durations = [r.data.metrics.get('duration_ms', 0) for r in runs if r.data.metrics.get('duration_ms')]\n", + " avg_duration = sum(durations) / len(durations) if durations else 0\n", + " \n", + " print(f\" Total Runs: {len(runs)}\")\n", + " print(f\" Successful: {successful}\")\n", + " print(f\" Failed: {len(runs) - successful}\")\n", + " print(f\" Avg Duration: {avg_duration:.1f} ms\" if avg_duration else \" Avg Duration: N/A\")\n", + "\n", + "print(\"\\n\" + \"=\" * 60)\n", + "print(\"MLflow verification complete\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/ray.ipynb b/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/ray.ipynb new file mode 100644 index 000000000..dae93a357 --- /dev/null +++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/ray.ipynb @@ -0,0 +1,798 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "e4a6ac7c-5c73-42a9-8b74-420788321543", + "metadata": { + "collapsed": true, + "jupyter": { + "outputs_hidden": true + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Defaulting to user installation because normal site-packages is not writeable\n", + "Collecting ray==2.41.0\n", + " Downloading ray-2.41.0-cp311-cp311-manylinux2014_x86_64.whl.metadata (18 kB)\n", + "Collecting click>=7.0 (from ray==2.41.0)\n", + " Downloading click-8.2.1-py3-none-any.whl.metadata (2.5 kB)\n", + "Collecting filelock (from ray==2.41.0)\n", + " Downloading filelock-3.19.1-py3-none-any.whl.metadata (2.1 kB)\n", + "Requirement already satisfied: jsonschema in /opt/bitnami/miniconda/lib/python3.11/site-packages (from ray==2.41.0) (4.22.0)\n", + "Collecting msgpack<2.0.0,>=1.0.0 (from ray==2.41.0)\n", + " Downloading msgpack-1.1.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (8.4 kB)\n", + "Requirement already satisfied: packaging in /opt/bitnami/miniconda/lib/python3.11/site-packages (from ray==2.41.0) (23.2)\n", + "Collecting protobuf!=3.19.5,>=3.15.3 (from ray==2.41.0)\n", + " Downloading protobuf-6.32.0-cp39-abi3-manylinux2014_x86_64.whl.metadata (593 bytes)\n", + "Requirement already satisfied: pyyaml in /opt/bitnami/miniconda/lib/python3.11/site-packages (from ray==2.41.0) (6.0.1)\n", + "Requirement already satisfied: aiosignal in /opt/bitnami/miniconda/lib/python3.11/site-packages (from ray==2.41.0) (1.3.1)\n", + "Requirement already satisfied: frozenlist in /opt/bitnami/miniconda/lib/python3.11/site-packages (from ray==2.41.0) (1.4.1)\n", + "Requirement already satisfied: requests in /opt/bitnami/miniconda/lib/python3.11/site-packages (from ray==2.41.0) (2.31.0)\n", + "Requirement already satisfied: attrs>=22.2.0 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from jsonschema->ray==2.41.0) (23.2.0)\n", + "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from jsonschema->ray==2.41.0) (2023.12.1)\n", + "Requirement already satisfied: referencing>=0.28.4 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from jsonschema->ray==2.41.0) (0.35.1)\n", + "Requirement already satisfied: rpds-py>=0.7.1 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from jsonschema->ray==2.41.0) (0.18.1)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from requests->ray==2.41.0) (2.0.4)\n", + "Requirement already satisfied: idna<4,>=2.5 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from requests->ray==2.41.0) (3.7)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from requests->ray==2.41.0) (2.1.0)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from requests->ray==2.41.0) (2024.2.2)\n", + "Downloading ray-2.41.0-cp311-cp311-manylinux2014_x86_64.whl (67.3 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m67.3/67.3 MB\u001b[0m \u001b[31m25.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m00:01\u001b[0m\n", + "\u001b[?25hDownloading click-8.2.1-py3-none-any.whl (102 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m102.2/102.2 kB\u001b[0m \u001b[31m11.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading msgpack-1.1.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (429 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m430.0/430.0 kB\u001b[0m \u001b[31m41.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading protobuf-6.32.0-cp39-abi3-manylinux2014_x86_64.whl (322 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m322.0/322.0 kB\u001b[0m \u001b[31m33.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading filelock-3.19.1-py3-none-any.whl (15 kB)\n", + "Installing collected packages: protobuf, msgpack, filelock, click, ray\n", + "\u001b[33m WARNING: The scripts ray, rllib, serve and tune are installed in '/tmp/models-cache/bin' which is not on PATH.\n", + " Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\u001b[0m\u001b[33m\n", + "\u001b[0mSuccessfully installed click-8.2.1 filelock-3.19.1 msgpack-1.1.1 protobuf-6.32.0 ray-2.41.0\n" + ] + } + ], + "source": [ + "!pip install ray==2.41.0" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "788f1517-251c-4171-af7d-f4c7a5073d71", + "metadata": { + "collapsed": true, + "jupyter": { + "outputs_hidden": true + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Defaulting to user installation because normal site-packages is not writeable\n", + "Collecting numpy\n", + " Downloading numpy-2.3.2-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (62 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m62.1/62.1 kB\u001b[0m \u001b[31m3.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting mlflow\n", + " Downloading mlflow-3.3.1-py3-none-any.whl.metadata (30 kB)\n", + "Collecting tensorflow\n", + " Downloading tensorflow-2.20.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.5 kB)\n", + "Requirement already satisfied: ray[client,default,serve] in ./models-cache/lib/python3.11/site-packages (2.41.0)\n", + "Collecting mlflow-skinny==3.3.1 (from mlflow)\n", + " Downloading mlflow_skinny-3.3.1-py3-none-any.whl.metadata (31 kB)\n", + "Collecting mlflow-tracing==3.3.1 (from mlflow)\n", + " Downloading mlflow_tracing-3.3.1-py3-none-any.whl.metadata (19 kB)\n", + "Collecting Flask<4 (from mlflow)\n", + " Downloading flask-3.1.2-py3-none-any.whl.metadata (3.2 kB)\n", + "Requirement already satisfied: alembic!=1.10.0,<2 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from mlflow) (1.13.1)\n", + "Collecting cryptography<46,>=43.0.0 (from mlflow)\n", + " Downloading cryptography-45.0.6-cp311-abi3-manylinux_2_34_x86_64.whl.metadata (5.7 kB)\n", + "Requirement already satisfied: docker<8,>=4.0.0 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from mlflow) (7.1.0)\n", + "Collecting graphene<4 (from mlflow)\n", + " Downloading graphene-3.4.3-py2.py3-none-any.whl.metadata (6.9 kB)\n", + "Collecting gunicorn<24 (from mlflow)\n", + " Downloading gunicorn-23.0.0-py3-none-any.whl.metadata (4.4 kB)\n", + "Collecting matplotlib<4 (from mlflow)\n", + " Downloading matplotlib-3.10.5-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (11 kB)\n", + "Collecting pandas<3 (from mlflow)\n", + " Downloading pandas-2.3.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (91 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m91.2/91.2 kB\u001b[0m \u001b[31m12.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting pyarrow<22,>=4.0.0 (from mlflow)\n", + " Downloading pyarrow-21.0.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (3.3 kB)\n", + "Collecting scikit-learn<2 (from mlflow)\n", + " Downloading scikit_learn-1.7.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (11 kB)\n", + "Collecting scipy<2 (from mlflow)\n", + " Downloading scipy-1.16.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (61 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m62.0/62.0 kB\u001b[0m \u001b[31m8.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: sqlalchemy<3,>=1.4.0 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from mlflow) (2.0.30)\n", + "Collecting cachetools<7,>=5.0.0 (from mlflow-skinny==3.3.1->mlflow)\n", + " Downloading cachetools-6.2.0-py3-none-any.whl.metadata (5.4 kB)\n", + "Requirement already satisfied: click<9,>=7.0 in ./models-cache/lib/python3.11/site-packages (from mlflow-skinny==3.3.1->mlflow) (8.2.1)\n", + "Collecting cloudpickle<4 (from mlflow-skinny==3.3.1->mlflow)\n", + " Downloading cloudpickle-3.1.1-py3-none-any.whl.metadata (7.1 kB)\n", + "Collecting databricks-sdk<1,>=0.20.0 (from mlflow-skinny==3.3.1->mlflow)\n", + " Downloading databricks_sdk-0.64.0-py3-none-any.whl.metadata (39 kB)\n", + "Collecting fastapi<1 (from mlflow-skinny==3.3.1->mlflow)\n", + " Downloading fastapi-0.116.1-py3-none-any.whl.metadata (28 kB)\n", + "Collecting gitpython<4,>=3.1.9 (from mlflow-skinny==3.3.1->mlflow)\n", + " Downloading gitpython-3.1.45-py3-none-any.whl.metadata (13 kB)\n", + "Collecting importlib_metadata!=4.7.0,<9,>=3.7.0 (from mlflow-skinny==3.3.1->mlflow)\n", + " Downloading importlib_metadata-8.7.0-py3-none-any.whl.metadata (4.8 kB)\n", + "Collecting opentelemetry-api<3,>=1.9.0 (from mlflow-skinny==3.3.1->mlflow)\n", + " Downloading opentelemetry_api-1.36.0-py3-none-any.whl.metadata (1.5 kB)\n", + "Collecting opentelemetry-sdk<3,>=1.9.0 (from mlflow-skinny==3.3.1->mlflow)\n", + " Downloading opentelemetry_sdk-1.36.0-py3-none-any.whl.metadata (1.5 kB)\n", + "Requirement already satisfied: packaging<26 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from mlflow-skinny==3.3.1->mlflow) (23.2)\n", + "Requirement already satisfied: protobuf<7,>=3.12.0 in ./models-cache/lib/python3.11/site-packages (from mlflow-skinny==3.3.1->mlflow) (6.32.0)\n", + "Requirement already satisfied: pydantic<3,>=1.10.8 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from mlflow-skinny==3.3.1->mlflow) (2.7.3)\n", + "Requirement already satisfied: pyyaml<7,>=5.1 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from mlflow-skinny==3.3.1->mlflow) (6.0.1)\n", + "Requirement already satisfied: requests<3,>=2.17.3 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from mlflow-skinny==3.3.1->mlflow) (2.31.0)\n", + "Collecting sqlparse<1,>=0.4.0 (from mlflow-skinny==3.3.1->mlflow)\n", + " Downloading sqlparse-0.5.3-py3-none-any.whl.metadata (3.9 kB)\n", + "Requirement already satisfied: typing-extensions<5,>=4.0.0 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from mlflow-skinny==3.3.1->mlflow) (4.12.2)\n", + "Collecting uvicorn<1 (from mlflow-skinny==3.3.1->mlflow)\n", + " Downloading uvicorn-0.35.0-py3-none-any.whl.metadata (6.5 kB)\n", + "Collecting absl-py>=1.0.0 (from tensorflow)\n", + " Downloading absl_py-2.3.1-py3-none-any.whl.metadata (3.3 kB)\n", + "Collecting astunparse>=1.6.0 (from tensorflow)\n", + " Downloading astunparse-1.6.3-py2.py3-none-any.whl.metadata (4.4 kB)\n", + "Collecting flatbuffers>=24.3.25 (from tensorflow)\n", + " Downloading flatbuffers-25.2.10-py2.py3-none-any.whl.metadata (875 bytes)\n", + "Collecting gast!=0.5.0,!=0.5.1,!=0.5.2,>=0.2.1 (from tensorflow)\n", + " Downloading gast-0.6.0-py3-none-any.whl.metadata (1.3 kB)\n", + "Collecting google_pasta>=0.1.1 (from tensorflow)\n", + " Downloading google_pasta-0.2.0-py3-none-any.whl.metadata (814 bytes)\n", + "Collecting libclang>=13.0.0 (from tensorflow)\n", + " Downloading libclang-18.1.1-py2.py3-none-manylinux2010_x86_64.whl.metadata (5.2 kB)\n", + "Collecting opt_einsum>=2.3.2 (from tensorflow)\n", + " Downloading opt_einsum-3.4.0-py3-none-any.whl.metadata (6.3 kB)\n", + "Requirement already satisfied: setuptools in /opt/bitnami/miniconda/lib/python3.11/site-packages (from tensorflow) (69.5.1)\n", + "Requirement already satisfied: six>=1.12.0 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from tensorflow) (1.16.0)\n", + "Collecting termcolor>=1.1.0 (from tensorflow)\n", + " Downloading termcolor-3.1.0-py3-none-any.whl.metadata (6.4 kB)\n", + "Collecting wrapt>=1.11.0 (from tensorflow)\n", + " Downloading wrapt-1.17.3-cp311-cp311-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl.metadata (6.4 kB)\n", + "Collecting grpcio<2.0,>=1.24.3 (from tensorflow)\n", + " Downloading grpcio-1.74.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.8 kB)\n", + "Collecting tensorboard~=2.20.0 (from tensorflow)\n", + " Downloading tensorboard-2.20.0-py3-none-any.whl.metadata (1.8 kB)\n", + "Collecting keras>=3.10.0 (from tensorflow)\n", + " Downloading keras-3.11.3-py3-none-any.whl.metadata (5.9 kB)\n", + "Collecting h5py>=3.11.0 (from tensorflow)\n", + " Downloading h5py-3.14.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (2.7 kB)\n", + "Collecting ml_dtypes<1.0.0,>=0.5.1 (from tensorflow)\n", + " Downloading ml_dtypes-0.5.3-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (8.9 kB)\n", + "Requirement already satisfied: filelock in ./models-cache/lib/python3.11/site-packages (from ray[client,default,serve]) (3.19.1)\n", + "Requirement already satisfied: jsonschema in /opt/bitnami/miniconda/lib/python3.11/site-packages (from ray[client,default,serve]) (4.22.0)\n", + "Requirement already satisfied: msgpack<2.0.0,>=1.0.0 in ./models-cache/lib/python3.11/site-packages (from ray[client,default,serve]) (1.1.1)\n", + "Requirement already satisfied: aiosignal in /opt/bitnami/miniconda/lib/python3.11/site-packages (from ray[client,default,serve]) (1.3.1)\n", + "Requirement already satisfied: frozenlist in /opt/bitnami/miniconda/lib/python3.11/site-packages (from ray[client,default,serve]) (1.4.1)\n", + "Requirement already satisfied: aiohttp>=3.7 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from ray[client,default,serve]) (3.9.5)\n", + "Collecting aiohttp-cors (from ray[client,default,serve])\n", + " Downloading aiohttp_cors-0.8.1-py3-none-any.whl.metadata (20 kB)\n", + "Collecting colorful (from ray[client,default,serve])\n", + " Downloading colorful-0.5.7-py2.py3-none-any.whl.metadata (16 kB)\n", + "Collecting opencensus (from ray[client,default,serve])\n", + " Downloading opencensus-0.11.4-py2.py3-none-any.whl.metadata (12 kB)\n", + "Requirement already satisfied: prometheus-client>=0.7.1 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from ray[client,default,serve]) (0.20.0)\n", + "Collecting smart-open (from ray[client,default,serve])\n", + " Downloading smart_open-7.3.0.post1-py3-none-any.whl.metadata (24 kB)\n", + "Collecting virtualenv!=20.21.1,>=20.0.24 (from ray[client,default,serve])\n", + " Downloading virtualenv-20.34.0-py3-none-any.whl.metadata (4.6 kB)\n", + "Collecting py-spy>=0.2.0 (from ray[client,default,serve])\n", + " Downloading py_spy-0.4.1-py2.py3-none-manylinux_2_5_x86_64.manylinux1_x86_64.whl.metadata (510 bytes)\n", + "Collecting starlette (from ray[client,default,serve])\n", + " Downloading starlette-0.47.3-py3-none-any.whl.metadata (6.2 kB)\n", + "Collecting watchfiles (from ray[client,default,serve])\n", + " Downloading watchfiles-1.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.9 kB)\n", + "Requirement already satisfied: attrs>=17.3.0 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from aiohttp>=3.7->ray[client,default,serve]) (23.2.0)\n", + "Requirement already satisfied: multidict<7.0,>=4.5 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from aiohttp>=3.7->ray[client,default,serve]) (6.0.5)\n", + "Requirement already satisfied: yarl<2.0,>=1.0 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from aiohttp>=3.7->ray[client,default,serve]) (1.9.4)\n", + "Requirement already satisfied: Mako in /opt/bitnami/miniconda/lib/python3.11/site-packages (from alembic!=1.10.0,<2->mlflow) (1.3.5)\n", + "Requirement already satisfied: wheel<1.0,>=0.23.0 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from astunparse>=1.6.0->tensorflow) (0.43.0)\n", + "Requirement already satisfied: cffi>=1.14 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from cryptography<46,>=43.0.0->mlflow) (1.16.0)\n", + "Requirement already satisfied: urllib3>=1.26.0 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from docker<8,>=4.0.0->mlflow) (2.1.0)\n", + "Collecting blinker>=1.9.0 (from Flask<4->mlflow)\n", + " Downloading blinker-1.9.0-py3-none-any.whl.metadata (1.6 kB)\n", + "Collecting itsdangerous>=2.2.0 (from Flask<4->mlflow)\n", + " Downloading itsdangerous-2.2.0-py3-none-any.whl.metadata (1.9 kB)\n", + "Requirement already satisfied: jinja2>=3.1.2 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from Flask<4->mlflow) (3.1.4)\n", + "Requirement already satisfied: markupsafe>=2.1.1 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from Flask<4->mlflow) (2.1.5)\n", + "Collecting werkzeug>=3.1.0 (from Flask<4->mlflow)\n", + " Downloading werkzeug-3.1.3-py3-none-any.whl.metadata (3.7 kB)\n", + "Collecting graphql-core<3.3,>=3.1 (from graphene<4->mlflow)\n", + " Downloading graphql_core-3.2.6-py3-none-any.whl.metadata (11 kB)\n", + "Collecting graphql-relay<3.3,>=3.1 (from graphene<4->mlflow)\n", + " Downloading graphql_relay-3.2.0-py3-none-any.whl.metadata (12 kB)\n", + "Requirement already satisfied: python-dateutil<3,>=2.7.0 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from graphene<4->mlflow) (2.9.0.post0)\n", + "Collecting rich (from keras>=3.10.0->tensorflow)\n", + " Downloading rich-14.1.0-py3-none-any.whl.metadata (18 kB)\n", + "Collecting namex (from keras>=3.10.0->tensorflow)\n", + " Downloading namex-0.1.0-py3-none-any.whl.metadata (322 bytes)\n", + "Collecting optree (from keras>=3.10.0->tensorflow)\n", + " Downloading optree-0.17.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (33 kB)\n", + "Collecting contourpy>=1.0.1 (from matplotlib<4->mlflow)\n", + " Downloading contourpy-1.3.3-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (5.5 kB)\n", + "Collecting cycler>=0.10 (from matplotlib<4->mlflow)\n", + " Downloading cycler-0.12.1-py3-none-any.whl.metadata (3.8 kB)\n", + "Collecting fonttools>=4.22.0 (from matplotlib<4->mlflow)\n", + " Downloading fonttools-4.59.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (108 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m108.9/108.9 kB\u001b[0m \u001b[31m11.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting kiwisolver>=1.3.1 (from matplotlib<4->mlflow)\n", + " Downloading kiwisolver-1.4.9-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (6.3 kB)\n", + "Collecting pillow>=8 (from matplotlib<4->mlflow)\n", + " Downloading pillow-11.3.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (9.0 kB)\n", + "Collecting pyparsing>=2.3.1 (from matplotlib<4->mlflow)\n", + " Downloading pyparsing-3.2.3-py3-none-any.whl.metadata (5.0 kB)\n", + "Collecting pytz>=2020.1 (from pandas<3->mlflow)\n", + " Downloading pytz-2025.2-py2.py3-none-any.whl.metadata (22 kB)\n", + "Collecting tzdata>=2022.7 (from pandas<3->mlflow)\n", + " Downloading tzdata-2025.2-py2.py3-none-any.whl.metadata (1.4 kB)\n", + "Requirement already satisfied: annotated-types>=0.4.0 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from pydantic<3,>=1.10.8->mlflow-skinny==3.3.1->mlflow) (0.7.0)\n", + "Requirement already satisfied: pydantic-core==2.18.4 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from pydantic<3,>=1.10.8->mlflow-skinny==3.3.1->mlflow) (2.18.4)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from requests<3,>=2.17.3->mlflow-skinny==3.3.1->mlflow) (2.0.4)\n", + "Requirement already satisfied: idna<4,>=2.5 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from requests<3,>=2.17.3->mlflow-skinny==3.3.1->mlflow) (3.7)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from requests<3,>=2.17.3->mlflow-skinny==3.3.1->mlflow) (2024.2.2)\n", + "Collecting joblib>=1.2.0 (from scikit-learn<2->mlflow)\n", + " Downloading joblib-1.5.1-py3-none-any.whl.metadata (5.6 kB)\n", + "Collecting threadpoolctl>=3.1.0 (from scikit-learn<2->mlflow)\n", + " Downloading threadpoolctl-3.6.0-py3-none-any.whl.metadata (13 kB)\n", + "Requirement already satisfied: greenlet!=0.4.17 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from sqlalchemy<3,>=1.4.0->mlflow) (3.0.3)\n", + "Requirement already satisfied: anyio<5,>=3.6.2 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from starlette->ray[client,default,serve]) (4.4.0)\n", + "Collecting markdown>=2.6.8 (from tensorboard~=2.20.0->tensorflow)\n", + " Downloading markdown-3.8.2-py3-none-any.whl.metadata (5.1 kB)\n", + "Collecting tensorboard-data-server<0.8.0,>=0.7.0 (from tensorboard~=2.20.0->tensorflow)\n", + " Downloading tensorboard_data_server-0.7.2-py3-none-manylinux_2_31_x86_64.whl.metadata (1.1 kB)\n", + "Collecting distlib<1,>=0.3.7 (from virtualenv!=20.21.1,>=20.0.24->ray[client,default,serve])\n", + " Downloading distlib-0.4.0-py2.py3-none-any.whl.metadata (5.2 kB)\n", + "Requirement already satisfied: platformdirs<5,>=3.9.1 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from virtualenv!=20.21.1,>=20.0.24->ray[client,default,serve]) (3.10.0)\n", + "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from jsonschema->ray[client,default,serve]) (2023.12.1)\n", + "Requirement already satisfied: referencing>=0.28.4 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from jsonschema->ray[client,default,serve]) (0.35.1)\n", + "Requirement already satisfied: rpds-py>=0.7.1 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from jsonschema->ray[client,default,serve]) (0.18.1)\n", + "Collecting opencensus-context>=0.1.3 (from opencensus->ray[client,default,serve])\n", + " Downloading opencensus_context-0.1.3-py2.py3-none-any.whl.metadata (3.3 kB)\n", + "Collecting google-api-core<3.0.0,>=1.0.0 (from opencensus->ray[client,default,serve])\n", + " Downloading google_api_core-2.25.1-py3-none-any.whl.metadata (3.0 kB)\n", + "Requirement already satisfied: h11>=0.8 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from uvicorn[standard]; extra == \"serve\"->ray[client,default,serve]) (0.14.0)\n", + "Collecting httptools>=0.6.3 (from uvicorn[standard]; extra == \"serve\"->ray[client,default,serve])\n", + " Downloading httptools-0.6.4-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.6 kB)\n", + "Collecting python-dotenv>=0.13 (from uvicorn[standard]; extra == \"serve\"->ray[client,default,serve])\n", + " Downloading python_dotenv-1.1.1-py3-none-any.whl.metadata (24 kB)\n", + "Collecting uvloop>=0.15.1 (from uvicorn[standard]; extra == \"serve\"->ray[client,default,serve])\n", + " Downloading uvloop-0.21.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.9 kB)\n", + "Collecting websockets>=10.4 (from uvicorn[standard]; extra == \"serve\"->ray[client,default,serve])\n", + " Downloading websockets-15.0.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.8 kB)\n", + "Requirement already satisfied: sniffio>=1.1 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from anyio<5,>=3.6.2->starlette->ray[client,default,serve]) (1.3.1)\n", + "Requirement already satisfied: pycparser in /opt/bitnami/miniconda/lib/python3.11/site-packages (from cffi>=1.14->cryptography<46,>=43.0.0->mlflow) (2.21)\n", + "Collecting google-auth~=2.0 (from databricks-sdk<1,>=0.20.0->mlflow-skinny==3.3.1->mlflow)\n", + " Downloading google_auth-2.40.3-py2.py3-none-any.whl.metadata (6.2 kB)\n", + "Collecting gitdb<5,>=4.0.1 (from gitpython<4,>=3.1.9->mlflow-skinny==3.3.1->mlflow)\n", + " Downloading gitdb-4.0.12-py3-none-any.whl.metadata (1.2 kB)\n", + "Collecting googleapis-common-protos<2.0.0,>=1.56.2 (from google-api-core<3.0.0,>=1.0.0->opencensus->ray[client,default,serve])\n", + " Downloading googleapis_common_protos-1.70.0-py3-none-any.whl.metadata (9.3 kB)\n", + "Collecting proto-plus<2.0.0,>=1.22.3 (from google-api-core<3.0.0,>=1.0.0->opencensus->ray[client,default,serve])\n", + " Downloading proto_plus-1.26.1-py3-none-any.whl.metadata (2.2 kB)\n", + "Collecting zipp>=3.20 (from importlib_metadata!=4.7.0,<9,>=3.7.0->mlflow-skinny==3.3.1->mlflow)\n", + " Downloading zipp-3.23.0-py3-none-any.whl.metadata (3.6 kB)\n", + "Collecting opentelemetry-semantic-conventions==0.57b0 (from opentelemetry-sdk<3,>=1.9.0->mlflow-skinny==3.3.1->mlflow)\n", + " Downloading opentelemetry_semantic_conventions-0.57b0-py3-none-any.whl.metadata (2.4 kB)\n", + "Collecting markdown-it-py>=2.2.0 (from rich->keras>=3.10.0->tensorflow)\n", + " Downloading markdown_it_py-4.0.0-py3-none-any.whl.metadata (7.3 kB)\n", + "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /opt/bitnami/miniconda/lib/python3.11/site-packages (from rich->keras>=3.10.0->tensorflow) (2.18.0)\n", + "Collecting smmap<6,>=3.0.1 (from gitdb<5,>=4.0.1->gitpython<4,>=3.1.9->mlflow-skinny==3.3.1->mlflow)\n", + " Downloading smmap-5.0.2-py3-none-any.whl.metadata (4.3 kB)\n", + "Collecting cachetools<7,>=5.0.0 (from mlflow-skinny==3.3.1->mlflow)\n", + " Downloading cachetools-5.5.2-py3-none-any.whl.metadata (5.4 kB)\n", + "Collecting pyasn1-modules>=0.2.1 (from google-auth~=2.0->databricks-sdk<1,>=0.20.0->mlflow-skinny==3.3.1->mlflow)\n", + " Downloading pyasn1_modules-0.4.2-py3-none-any.whl.metadata (3.5 kB)\n", + "Collecting rsa<5,>=3.1.4 (from google-auth~=2.0->databricks-sdk<1,>=0.20.0->mlflow-skinny==3.3.1->mlflow)\n", + " Downloading rsa-4.9.1-py3-none-any.whl.metadata (5.6 kB)\n", + "Collecting mdurl~=0.1 (from markdown-it-py>=2.2.0->rich->keras>=3.10.0->tensorflow)\n", + " Downloading mdurl-0.1.2-py3-none-any.whl.metadata (1.6 kB)\n", + "Collecting pyasn1<0.7.0,>=0.6.1 (from pyasn1-modules>=0.2.1->google-auth~=2.0->databricks-sdk<1,>=0.20.0->mlflow-skinny==3.3.1->mlflow)\n", + " Downloading pyasn1-0.6.1-py3-none-any.whl.metadata (8.4 kB)\n", + "Downloading numpy-2.3.2-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (16.9 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m16.9/16.9 MB\u001b[0m \u001b[31m119.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n", + "\u001b[?25hDownloading mlflow-3.3.1-py3-none-any.whl (26.4 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m26.4/26.4 MB\u001b[0m \u001b[31m87.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m00:01\u001b[0m\n", + "\u001b[?25hDownloading mlflow_skinny-3.3.1-py3-none-any.whl (2.0 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.0/2.0 MB\u001b[0m \u001b[31m104.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading mlflow_tracing-3.3.1-py3-none-any.whl (1.1 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.1/1.1 MB\u001b[0m \u001b[31m67.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading tensorflow-2.20.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (620.6 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m620.6/620.6 MB\u001b[0m \u001b[31m2.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m00:01\u001b[0m\n", + "\u001b[?25hDownloading absl_py-2.3.1-py3-none-any.whl (135 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m135.8/135.8 kB\u001b[0m \u001b[31m16.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading astunparse-1.6.3-py2.py3-none-any.whl (12 kB)\n", + "Downloading cryptography-45.0.6-cp311-abi3-manylinux_2_34_x86_64.whl (4.5 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.5/4.5 MB\u001b[0m \u001b[31m121.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m\n", + "\u001b[?25hDownloading fastapi-0.116.1-py3-none-any.whl (95 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m95.6/95.6 kB\u001b[0m \u001b[31m10.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading flask-3.1.2-py3-none-any.whl (103 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m103.3/103.3 kB\u001b[0m \u001b[31m12.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading flatbuffers-25.2.10-py2.py3-none-any.whl (30 kB)\n", + "Downloading gast-0.6.0-py3-none-any.whl (21 kB)\n", + "Downloading google_pasta-0.2.0-py3-none-any.whl (57 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m57.5/57.5 kB\u001b[0m \u001b[31m7.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading graphene-3.4.3-py2.py3-none-any.whl (114 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m114.9/114.9 kB\u001b[0m \u001b[31m16.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading grpcio-1.74.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (6.2 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.2/6.2 MB\u001b[0m \u001b[31m114.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n", + "\u001b[?25hDownloading gunicorn-23.0.0-py3-none-any.whl (85 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m85.0/85.0 kB\u001b[0m \u001b[31m11.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading h5py-3.14.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (4.5 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.5/4.5 MB\u001b[0m \u001b[31m128.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m\n", + "\u001b[?25hDownloading keras-3.11.3-py3-none-any.whl (1.4 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.4/1.4 MB\u001b[0m \u001b[31m81.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading libclang-18.1.1-py2.py3-none-manylinux2010_x86_64.whl (24.5 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m24.5/24.5 MB\u001b[0m \u001b[31m102.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n", + "\u001b[?25hDownloading matplotlib-3.10.5-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (8.7 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m8.7/8.7 MB\u001b[0m \u001b[31m133.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n", + "\u001b[?25hDownloading ml_dtypes-0.5.3-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (4.9 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.9/4.9 MB\u001b[0m \u001b[31m108.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m\n", + "\u001b[?25hDownloading opt_einsum-3.4.0-py3-none-any.whl (71 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m71.9/71.9 kB\u001b[0m \u001b[31m10.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading pandas-2.3.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (12.4 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m12.4/12.4 MB\u001b[0m \u001b[31m138.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m0:01\u001b[0m\n", + "\u001b[?25hDownloading py_spy-0.4.1-py2.py3-none-manylinux_2_5_x86_64.manylinux1_x86_64.whl (2.8 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.8/2.8 MB\u001b[0m \u001b[31m116.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading pyarrow-21.0.0-cp311-cp311-manylinux_2_28_x86_64.whl (42.8 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m42.8/42.8 MB\u001b[0m \u001b[31m44.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m00:01\u001b[0m\n", + "\u001b[?25hDownloading scikit_learn-1.7.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (9.7 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m9.7/9.7 MB\u001b[0m \u001b[31m144.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n", + "\u001b[?25hDownloading scipy-1.16.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (35.4 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m35.4/35.4 MB\u001b[0m \u001b[31m56.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m00:01\u001b[0m\n", + "\u001b[?25hDownloading starlette-0.47.3-py3-none-any.whl (72 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m73.0/73.0 kB\u001b[0m \u001b[31m10.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading tensorboard-2.20.0-py3-none-any.whl (5.5 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.5/5.5 MB\u001b[0m \u001b[31m121.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m\n", + "\u001b[?25hDownloading termcolor-3.1.0-py3-none-any.whl (7.7 kB)\n", + "Downloading virtualenv-20.34.0-py3-none-any.whl (6.0 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.0/6.0 MB\u001b[0m \u001b[31m125.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m\n", + "\u001b[?25hDownloading wrapt-1.17.3-cp311-cp311-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl (82 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m82.4/82.4 kB\u001b[0m \u001b[31m11.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading aiohttp_cors-0.8.1-py3-none-any.whl (25 kB)\n", + "Downloading colorful-0.5.7-py2.py3-none-any.whl (201 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m201.5/201.5 kB\u001b[0m \u001b[31m24.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading opencensus-0.11.4-py2.py3-none-any.whl (128 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m128.2/128.2 kB\u001b[0m \u001b[31m18.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading smart_open-7.3.0.post1-py3-none-any.whl (61 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m61.9/61.9 kB\u001b[0m \u001b[31m9.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading watchfiles-1.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (453 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m453.1/453.1 kB\u001b[0m \u001b[31m49.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading blinker-1.9.0-py3-none-any.whl (8.5 kB)\n", + "Downloading cloudpickle-3.1.1-py3-none-any.whl (20 kB)\n", + "Downloading contourpy-1.3.3-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (355 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m355.2/355.2 kB\u001b[0m \u001b[31m40.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading cycler-0.12.1-py3-none-any.whl (8.3 kB)\n", + "Downloading databricks_sdk-0.64.0-py3-none-any.whl (703 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m703.4/703.4 kB\u001b[0m \u001b[31m52.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading distlib-0.4.0-py2.py3-none-any.whl (469 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m469.0/469.0 kB\u001b[0m \u001b[31m44.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading fonttools-4.59.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (5.0 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.0/5.0 MB\u001b[0m \u001b[31m118.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m\n", + "\u001b[?25hDownloading gitpython-3.1.45-py3-none-any.whl (208 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m208.2/208.2 kB\u001b[0m \u001b[31m25.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading google_api_core-2.25.1-py3-none-any.whl (160 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m160.8/160.8 kB\u001b[0m \u001b[31m21.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading graphql_core-3.2.6-py3-none-any.whl (203 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m203.4/203.4 kB\u001b[0m \u001b[31m28.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading graphql_relay-3.2.0-py3-none-any.whl (16 kB)\n", + "Downloading httptools-0.6.4-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (459 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m459.8/459.8 kB\u001b[0m \u001b[31m44.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading importlib_metadata-8.7.0-py3-none-any.whl (27 kB)\n", + "Downloading itsdangerous-2.2.0-py3-none-any.whl (16 kB)\n", + "Downloading joblib-1.5.1-py3-none-any.whl (307 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m307.7/307.7 kB\u001b[0m \u001b[31m33.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading kiwisolver-1.4.9-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (1.4 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.4/1.4 MB\u001b[0m \u001b[31m80.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading markdown-3.8.2-py3-none-any.whl (106 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m106.8/106.8 kB\u001b[0m \u001b[31m14.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading opencensus_context-0.1.3-py2.py3-none-any.whl (5.1 kB)\n", + "Downloading opentelemetry_api-1.36.0-py3-none-any.whl (65 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m65.6/65.6 kB\u001b[0m \u001b[31m8.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading opentelemetry_sdk-1.36.0-py3-none-any.whl (119 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m120.0/120.0 kB\u001b[0m \u001b[31m16.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading opentelemetry_semantic_conventions-0.57b0-py3-none-any.whl (201 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m201.6/201.6 kB\u001b[0m \u001b[31m23.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading pillow-11.3.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (6.6 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.6/6.6 MB\u001b[0m \u001b[31m136.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m\n", + "\u001b[?25hDownloading pyparsing-3.2.3-py3-none-any.whl (111 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m111.1/111.1 kB\u001b[0m \u001b[31m14.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading python_dotenv-1.1.1-py3-none-any.whl (20 kB)\n", + "Downloading pytz-2025.2-py2.py3-none-any.whl (509 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m509.2/509.2 kB\u001b[0m \u001b[31m52.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading sqlparse-0.5.3-py3-none-any.whl (44 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m44.4/44.4 kB\u001b[0m \u001b[31m6.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading tensorboard_data_server-0.7.2-py3-none-manylinux_2_31_x86_64.whl (6.6 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.6/6.6 MB\u001b[0m \u001b[31m133.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m\n", + "\u001b[?25hDownloading threadpoolctl-3.6.0-py3-none-any.whl (18 kB)\n", + "Downloading tzdata-2025.2-py2.py3-none-any.whl (347 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m347.8/347.8 kB\u001b[0m \u001b[31m40.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading uvicorn-0.35.0-py3-none-any.whl (66 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m66.4/66.4 kB\u001b[0m \u001b[31m9.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading uvloop-0.21.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (4.0 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.0/4.0 MB\u001b[0m \u001b[31m135.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading websockets-15.0.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (182 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m182.3/182.3 kB\u001b[0m \u001b[31m24.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading werkzeug-3.1.3-py3-none-any.whl (224 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m224.5/224.5 kB\u001b[0m \u001b[31m29.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading namex-0.1.0-py3-none-any.whl (5.9 kB)\n", + "Downloading optree-0.17.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (402 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m402.0/402.0 kB\u001b[0m \u001b[31m43.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading rich-14.1.0-py3-none-any.whl (243 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m243.4/243.4 kB\u001b[0m \u001b[31m31.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading gitdb-4.0.12-py3-none-any.whl (62 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m62.8/62.8 kB\u001b[0m \u001b[31m8.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading google_auth-2.40.3-py2.py3-none-any.whl (216 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m216.1/216.1 kB\u001b[0m \u001b[31m25.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading cachetools-5.5.2-py3-none-any.whl (10 kB)\n", + "Downloading googleapis_common_protos-1.70.0-py3-none-any.whl (294 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m294.5/294.5 kB\u001b[0m \u001b[31m39.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading markdown_it_py-4.0.0-py3-none-any.whl (87 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m87.3/87.3 kB\u001b[0m \u001b[31m12.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading proto_plus-1.26.1-py3-none-any.whl (50 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m50.2/50.2 kB\u001b[0m \u001b[31m7.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading zipp-3.23.0-py3-none-any.whl (10 kB)\n", + "Downloading mdurl-0.1.2-py3-none-any.whl (10.0 kB)\n", + "Downloading pyasn1_modules-0.4.2-py3-none-any.whl (181 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m181.3/181.3 kB\u001b[0m \u001b[31m23.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading rsa-4.9.1-py3-none-any.whl (34 kB)\n", + "Downloading smmap-5.0.2-py3-none-any.whl (24 kB)\n", + "Downloading pyasn1-0.6.1-py3-none-any.whl (83 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m83.1/83.1 kB\u001b[0m \u001b[31m10.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hInstalling collected packages: pytz, py-spy, opencensus-context, namex, libclang, flatbuffers, distlib, colorful, zipp, wrapt, werkzeug, websockets, virtualenv, uvloop, uvicorn, tzdata, threadpoolctl, termcolor, tensorboard-data-server, sqlparse, smmap, python-dotenv, pyparsing, pyasn1, pyarrow, proto-plus, pillow, optree, opt_einsum, numpy, mdurl, markdown, kiwisolver, joblib, itsdangerous, httptools, gunicorn, grpcio, graphql-core, googleapis-common-protos, google_pasta, gast, fonttools, cycler, cloudpickle, cachetools, blinker, astunparse, absl-py, watchfiles, tensorboard, starlette, smart-open, scipy, rsa, pyasn1-modules, pandas, ml_dtypes, markdown-it-py, importlib_metadata, h5py, graphql-relay, gitdb, Flask, cryptography, contourpy, scikit-learn, rich, opentelemetry-api, matplotlib, graphene, google-auth, gitpython, fastapi, aiohttp-cors, opentelemetry-semantic-conventions, keras, google-api-core, databricks-sdk, tensorflow, opentelemetry-sdk, opencensus, mlflow-tracing, mlflow-skinny, mlflow\n", + "\u001b[33m WARNING: The script websockets is installed in '/tmp/models-cache/bin' which is not on PATH.\n", + " Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\u001b[0m\u001b[33m\n", + "\u001b[0m\u001b[33m WARNING: The script virtualenv is installed in '/tmp/models-cache/bin' which is not on PATH.\n", + " Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\u001b[0m\u001b[33m\n", + "\u001b[0m\u001b[33m WARNING: The script uvicorn is installed in '/tmp/models-cache/bin' which is not on PATH.\n", + " Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\u001b[0m\u001b[33m\n", + "\u001b[0m\u001b[33m WARNING: The script sqlformat is installed in '/tmp/models-cache/bin' which is not on PATH.\n", + " Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\u001b[0m\u001b[33m\n", + "\u001b[0m\u001b[33m WARNING: The script dotenv is installed in '/tmp/models-cache/bin' which is not on PATH.\n", + " Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\u001b[0m\u001b[33m\n", + "\u001b[0m\u001b[33m WARNING: The scripts f2py and numpy-config are installed in '/tmp/models-cache/bin' which is not on PATH.\n", + " Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\u001b[0m\u001b[33m\n", + "\u001b[0m\u001b[33m WARNING: The script markdown_py is installed in '/tmp/models-cache/bin' which is not on PATH.\n", + " Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\u001b[0m\u001b[33m\n", + "\u001b[0m\u001b[33m WARNING: The script gunicorn is installed in '/tmp/models-cache/bin' which is not on PATH.\n", + " Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\u001b[0m\u001b[33m\n", + "\u001b[0m\u001b[33m WARNING: The scripts fonttools, pyftmerge, pyftsubset and ttx are installed in '/tmp/models-cache/bin' which is not on PATH.\n", + " Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\u001b[0m\u001b[33m\n", + "\u001b[0m\u001b[33m WARNING: The script watchfiles is installed in '/tmp/models-cache/bin' which is not on PATH.\n", + " Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\u001b[0m\u001b[33m\n", + "\u001b[0m\u001b[33m WARNING: The script tensorboard is installed in '/tmp/models-cache/bin' which is not on PATH.\n", + " Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\u001b[0m\u001b[33m\n", + "\u001b[0m\u001b[33m WARNING: The scripts pyrsa-decrypt, pyrsa-encrypt, pyrsa-keygen, pyrsa-priv2pub, pyrsa-sign and pyrsa-verify are installed in '/tmp/models-cache/bin' which is not on PATH.\n", + " Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\u001b[0m\u001b[33m\n", + "\u001b[0m\u001b[33m WARNING: The script markdown-it is installed in '/tmp/models-cache/bin' which is not on PATH.\n", + " Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\u001b[0m\u001b[33m\n", + "\u001b[0m\u001b[33m WARNING: The script flask is installed in '/tmp/models-cache/bin' which is not on PATH.\n", + " Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\u001b[0m\u001b[33m\n", + "\u001b[0m\u001b[33m WARNING: The script fastapi is installed in '/tmp/models-cache/bin' which is not on PATH.\n", + " Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\u001b[0m\u001b[33m\n", + "\u001b[0m\u001b[33m WARNING: The scripts import_pb_to_tensorboard, saved_model_cli, tensorboard, tf_upgrade_v2, tflite_convert and toco are installed in '/tmp/models-cache/bin' which is not on PATH.\n", + " Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\u001b[0m\u001b[33m\n", + "\u001b[0m\u001b[33m WARNING: The script mlflow is installed in '/tmp/models-cache/bin' which is not on PATH.\n", + " Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\u001b[0m\u001b[33m\n", + "\u001b[0m\u001b[33m WARNING: The script mlflow is installed in '/tmp/models-cache/bin' which is not on PATH.\n", + " Consider adding this directory to PATH or, if you prefer to suppress this warning, use --no-warn-script-location.\u001b[0m\u001b[33m\n", + "\u001b[0m\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", + "pyopenssl 24.1.0 requires cryptography<43,>=41.0.5, but you have cryptography 45.0.6 which is incompatible.\u001b[0m\u001b[31m\n", + "\u001b[0mSuccessfully installed Flask-3.1.2 absl-py-2.3.1 aiohttp-cors-0.8.1 astunparse-1.6.3 blinker-1.9.0 cachetools-5.5.2 cloudpickle-3.1.1 colorful-0.5.7 contourpy-1.3.3 cryptography-45.0.6 cycler-0.12.1 databricks-sdk-0.64.0 distlib-0.4.0 fastapi-0.116.1 flatbuffers-25.2.10 fonttools-4.59.1 gast-0.6.0 gitdb-4.0.12 gitpython-3.1.45 google-api-core-2.25.1 google-auth-2.40.3 google_pasta-0.2.0 googleapis-common-protos-1.70.0 graphene-3.4.3 graphql-core-3.2.6 graphql-relay-3.2.0 grpcio-1.74.0 gunicorn-23.0.0 h5py-3.14.0 httptools-0.6.4 importlib_metadata-8.7.0 itsdangerous-2.2.0 joblib-1.5.1 keras-3.11.3 kiwisolver-1.4.9 libclang-18.1.1 markdown-3.8.2 markdown-it-py-4.0.0 matplotlib-3.10.5 mdurl-0.1.2 ml_dtypes-0.5.3 mlflow-3.3.1 mlflow-skinny-3.3.1 mlflow-tracing-3.3.1 namex-0.1.0 numpy-2.3.2 opencensus-0.11.4 opencensus-context-0.1.3 opentelemetry-api-1.36.0 opentelemetry-sdk-1.36.0 opentelemetry-semantic-conventions-0.57b0 opt_einsum-3.4.0 optree-0.17.0 pandas-2.3.2 pillow-11.3.0 proto-plus-1.26.1 py-spy-0.4.1 pyarrow-21.0.0 pyasn1-0.6.1 pyasn1-modules-0.4.2 pyparsing-3.2.3 python-dotenv-1.1.1 pytz-2025.2 rich-14.1.0 rsa-4.9.1 scikit-learn-1.7.1 scipy-1.16.1 smart-open-7.3.0.post1 smmap-5.0.2 sqlparse-0.5.3 starlette-0.47.3 tensorboard-2.20.0 tensorboard-data-server-0.7.2 tensorflow-2.20.0 termcolor-3.1.0 threadpoolctl-3.6.0 tzdata-2025.2 uvicorn-0.35.0 uvloop-0.21.0 virtualenv-20.34.0 watchfiles-1.1.0 websockets-15.0.1 werkzeug-3.1.3 wrapt-1.17.3 zipp-3.23.0\n" + ] + } + ], + "source": [ + "!pip install numpy mlflow tensorflow \"ray[serve,default,client]\"" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "e8deec5c-6239-4087-8a4d-27c091e9fc3c", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2025-08-27 12:00:23.577265: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.\n", + "2025-08-27 12:00:23.626853: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", + "To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", + "2025-08-27 12:00:25.157402: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.\n", + "2025/08/27 12:00:27 INFO mlflow.tracking.fluent: Experiment with name 'Diabetes_Prediction_TensorFlow' does not exist. Creating a new experiment.\n", + "2025/08/27 12:00:27 WARNING mlflow.tracking.context.registry: Encountered unexpected error during resolving tags: 'getpwuid(): uid not found: 1001'\n", + "2025/08/27 12:00:27 WARNING mlflow.utils.git_utils: Failed to import Git (the Git executable is probably not on your PATH), so Git SHA is not available. Error: Failed to initialize: Bad git executable.\n", + "The git executable must be specified in one of the following ways:\n", + " - be included in your $PATH\n", + " - be set via $GIT_PYTHON_GIT_EXECUTABLE\n", + " - explicitly set via git.refresh()\n", + "\n", + "All git commands will error until this is rectified.\n", + "\n", + "This initial message can be silenced or aggravated in the future by setting the\n", + "$GIT_PYTHON_REFRESH environment variable. Use one of the following values:\n", + " - quiet|q|silence|s|silent|none|n|0: for no message or exception\n", + " - warn|w|warning|log|l|1: for a warning message (logging level CRITICAL, displayed by default)\n", + " - error|e|exception|raise|r|2: for a raised exception\n", + "\n", + "Example:\n", + " export GIT_PYTHON_REFRESH=quiet\n", + "\n", + "/tmp/models-cache/lib/python3.11/site-packages/keras/src/layers/core/dense.py:92: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.\n", + " super().__init__(activity_regularizer=activity_regularizer, **kwargs)\n", + "2025-08-27 12:00:29.352582: E external/local_xla/xla/stream_executor/cuda/cuda_platform.cc:51] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "🏃 View run classy-wren-479 at: http://ai-starter-kit-mlflow:5000/#/experiments/1/runs/7ca28b8521a049dc8a014d4235909db3\n", + "🧪 View experiment at: http://ai-starter-kit-mlflow:5000/#/experiments/1\n" + ] + } + ], + "source": [ + "import mlflow\n", + "import mlflow.tensorflow\n", + "import numpy as np\n", + "\n", + "from sklearn.datasets import load_diabetes\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.preprocessing import StandardScaler\n", + "\n", + "import tensorflow as tf\n", + "from tensorflow import keras\n", + "from tensorflow.keras import layers\n", + "\n", + "# -------------------\n", + "# Prepare Data\n", + "# -------------------\n", + "data = load_diabetes()\n", + "X = data.data\n", + "y = data.target.reshape(-1, 1)\n", + "\n", + "X_train, X_test, y_train, y_test = train_test_split(\n", + " X, y, test_size=0.2, random_state=42\n", + ")\n", + "\n", + "# -------------------\n", + "# Define Model\n", + "# -------------------\n", + "def create_model(input_dim):\n", + " model = keras.Sequential([\n", + " layers.Dense(64, activation=\"relu\", input_shape=(input_dim,)),\n", + " layers.Dense(32, activation=\"relu\"),\n", + " layers.Dense(1) # regression output\n", + " ])\n", + " model.compile(optimizer=\"adam\", loss=\"mse\", metrics=[\"mse\"])\n", + " return model\n", + "\n", + "input_dim = X_train.shape[1]\n", + "epochs = 50\n", + "batch_size = 32\n", + "\n", + "mlflow.set_experiment(\"Diabetes_Prediction_TensorFlow\")\n", + "\n", + "with mlflow.start_run():\n", + " mlflow.log_param(\"epochs\", epochs)\n", + " mlflow.log_param(\"batch_size\", batch_size)\n", + " mlflow.log_param(\"optimizer\", \"adam\")\n", + " mlflow.log_param(\"loss_fn\", \"mse\")\n", + " mlflow.log_param(\"input_features\", input_dim)\n", + "\n", + " model = create_model(input_dim)\n", + "\n", + " # Train\n", + " history = model.fit(\n", + " X_train, y_train,\n", + " validation_data=(X_test, y_test),\n", + " epochs=epochs,\n", + " batch_size=batch_size,\n", + " verbose=0\n", + " )\n", + "\n", + " # Evaluation\n", + " loss, mse = model.evaluate(X_test, y_test, verbose=0)\n", + " rmse = np.sqrt(mse)\n", + "\n", + " mlflow.log_metric(\"mse\", mse)\n", + " mlflow.log_metric(\"rmse\", rmse)" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "556ae0b2-6fa6-4271-9e7d-553cd7056aab", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/opt/bitnami/miniconda/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n", + "2025-08-27 12:00:35,162\tINFO util.py:154 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.\n", + "2025-08-27 12:00:35,747\tINFO worker.py:1514 -- Using address ray://ai-starter-kit-kuberay-head-svc:10001 set in the environment variable RAY_ADDRESS\n", + "2025-08-27 12:00:35,748\tINFO client_builder.py:244 -- Passing the following kwargs to ray.init() on the server: log_to_driver\n", + "SIGTERM handler is not set because current thread is not the main thread.\n", + "2025-08-27 12:00:40,043\tWARNING utils.py:1591 -- Python patch version mismatch: The cluster was started with:\n", + " Ray: 2.41.0\n", + " Python: 3.11.11\n", + "This process on Ray Client was started with:\n", + " Ray: 2.41.0\n", + " Python: 3.11.9\n", + "\n", + "\u001b[36m(ProxyActor pid=2818, ip=10.23.2.212)\u001b[0m INFO 2025-08-27 05:00:48,855 proxy 10.23.2.212 -- Proxy starting on node 08fb6ed7e6841998dfb9cc9f99c999cd4317663f09d65db617dbd3dc (HTTP port: 8000).\n", + "\u001b[36m(ProxyActor pid=2818)\u001b[0m INFO 2025-08-27 05:00:48,855 proxy 10.23.2.212 -- Proxy starting on node 08fb6ed7e6841998dfb9cc9f99c999cd4317663f09d65db617dbd3dc (HTTP port: 8000).\n", + "INFO 2025-08-27 12:00:49,116 serve 124 -- Started Serve in namespace \"serve\".\n", + "\u001b[36m(ProxyActor pid=2818, ip=10.23.2.212)\u001b[0m INFO 2025-08-27 05:00:49,087 proxy 10.23.2.212 -- Got updated endpoints: {}.\n", + "\u001b[36m(ProxyActor pid=2818)\u001b[0m INFO 2025-08-27 05:00:49,087 proxy 10.23.2.212 -- Got updated endpoints: {}.\n", + "\u001b[36m(ServeController pid=2739, ip=10.23.2.212)\u001b[0m INFO 2025-08-27 05:00:49,258 controller 2739 -- Deploying new version of Deployment(name='TensorFlowMLflowDeployment', app='default') (initial target replicas: 1).\n", + "\u001b[36m(ServeController pid=2739)\u001b[0m INFO 2025-08-27 05:00:49,258 controller 2739 -- Deploying new version of Deployment(name='TensorFlowMLflowDeployment', app='default') (initial target replicas: 1).\n", + "\u001b[36m(ProxyActor pid=2818, ip=10.23.2.212)\u001b[0m INFO 2025-08-27 05:00:49,262 proxy 10.23.2.212 -- Got updated endpoints: {Deployment(name='TensorFlowMLflowDeployment', app='default'): EndpointInfo(route='/predict', app_is_cross_language=False)}.\n", + "\u001b[36m(ProxyActor pid=2818)\u001b[0m INFO 2025-08-27 05:00:49,262 proxy 10.23.2.212 -- Got updated endpoints: {Deployment(name='TensorFlowMLflowDeployment', app='default'): EndpointInfo(route='/predict', app_is_cross_language=False)}.\n", + "\u001b[36m(ServeController pid=2739, ip=10.23.2.212)\u001b[0m INFO 2025-08-27 05:00:49,363 controller 2739 -- Adding 1 replica to Deployment(name='TensorFlowMLflowDeployment', app='default').\n", + "\u001b[36m(ServeController pid=2739)\u001b[0m INFO 2025-08-27 05:00:49,363 controller 2739 -- Adding 1 replica to Deployment(name='TensorFlowMLflowDeployment', app='default').\n", + "\u001b[36m(ServeController pid=2739, ip=10.23.2.212)\u001b[0m WARNING 2025-08-27 05:01:19,429 controller 2739 -- Deployment 'TensorFlowMLflowDeployment' in application 'default' has 1 replicas that have taken more than 30s to be scheduled. This may be due to waiting for the cluster to auto-scale or for a runtime environment to be installed. Resources required for each replica: {\"CPU\": 1}, total resources available: {\"CPU\": 7.0}. Use `ray status` for more details.\n", + "\u001b[36m(ServeController pid=2739)\u001b[0m WARNING 2025-08-27 05:01:19,429 controller 2739 -- Deployment 'TensorFlowMLflowDeployment' in application 'default' has 1 replicas that have taken more than 30s to be scheduled. This may be due to waiting for the cluster to auto-scale or for a runtime environment to be installed. Resources required for each replica: {\"CPU\": 1}, total resources available: {\"CPU\": 7.0}. Use `ray status` for more details.\n", + "\u001b[36m(ServeController pid=2739, ip=10.23.2.212)\u001b[0m WARNING 2025-08-27 05:01:49,531 controller 2739 -- Deployment 'TensorFlowMLflowDeployment' in application 'default' has 1 replicas that have taken more than 30s to be scheduled. This may be due to waiting for the cluster to auto-scale or for a runtime environment to be installed. Resources required for each replica: {\"CPU\": 1}, total resources available: {\"CPU\": 7.0}. Use `ray status` for more details.\n", + "\u001b[36m(ServeController pid=2739)\u001b[0m WARNING 2025-08-27 05:01:49,531 controller 2739 -- Deployment 'TensorFlowMLflowDeployment' in application 'default' has 1 replicas that have taken more than 30s to be scheduled. This may be due to waiting for the cluster to auto-scale or for a runtime environment to be installed. Resources required for each replica: {\"CPU\": 1}, total resources available: {\"CPU\": 7.0}. Use `ray status` for more details.\n", + "\u001b[36m(ServeReplica:default:TensorFlowMLflowDeployment pid=2265, ip=10.23.2.212)\u001b[0m 2025-08-27 05:02:02.743930: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", + "\u001b[36m(ServeReplica:default:TensorFlowMLflowDeployment pid=2265, ip=10.23.2.212)\u001b[0m To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", + "\u001b[36m(ServeReplica:default:TensorFlowMLflowDeployment pid=2265)\u001b[0m 2025-08-27 05:02:02.743930: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", + "\u001b[36m(ServeReplica:default:TensorFlowMLflowDeployment pid=2265)\u001b[0m To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", + "\u001b[36m(ServeReplica:default:TensorFlowMLflowDeployment pid=2265)\u001b[0m 2025-08-27 05:02:07.748054: E external/local_xla/xla/stream_executor/cuda/cuda_platform.cc:51] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)\n", + "\u001b[36m(ServeReplica:default:TensorFlowMLflowDeployment pid=2265, ip=10.23.2.212)\u001b[0m 2025-08-27 05:02:07.748054: E external/local_xla/xla/stream_executor/cuda/cuda_platform.cc:51] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[36m(ServeReplica:default:TensorFlowMLflowDeployment pid=2265)\u001b[0m Loading model...\n", + "\u001b[36m(ServeReplica:default:TensorFlowMLflowDeployment pid=2265)\u001b[0m Model loaded successfully.\n", + "\u001b[36m(ServeReplica:default:TensorFlowMLflowDeployment pid=2265, ip=10.23.2.212)\u001b[0m Loading model...\n", + "\u001b[36m(ServeReplica:default:TensorFlowMLflowDeployment pid=2265, ip=10.23.2.212)\u001b[0m Model loaded successfully.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO 2025-08-27 12:02:08,716 serve 124 -- Application 'default' is ready at http://127.0.0.1:8000/predict.\n", + "INFO 2025-08-27 12:02:08,718 serve 124 -- Deployed app 'default' successfully.\n", + "\u001b[36m(ServeReplica:default:TensorFlowMLflowDeployment pid=2265)\u001b[0m INFO 2025-08-27 05:02:08,967 default_TensorFlowMLflowDeployment 0zpbmyix 63d6b1b4-67f7-4a5c-ad92-6d7989e3cf5a -- CALL __call__ OK 150.7ms\n", + "\u001b[36m(ServeReplica:default:TensorFlowMLflowDeployment pid=2265, ip=10.23.2.212)\u001b[0m INFO 2025-08-27 05:02:08,967 default_TensorFlowMLflowDeployment 0zpbmyix 63d6b1b4-67f7-4a5c-ad92-6d7989e3cf5a -- CALL __call__ OK 150.7ms\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 85ms/step\n", + "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 85ms/step3.2.212)\u001b[0m \n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u001b[36m(ProxyActor pid=1775)\u001b[0m INFO 2025-08-27 05:02:09,833 proxy 10.23.2.213 -- Proxy starting on node 45179940bb5a43115519b525607191a8f1a059b70c5c61c14cee8a0f (HTTP port: 8000).\n", + "\u001b[36m(ProxyActor pid=1775, ip=10.23.2.212)\u001b[0m INFO 2025-08-27 05:02:09,833 proxy 10.23.2.213 -- Proxy starting on node 45179940bb5a43115519b525607191a8f1a059b70c5c61c14cee8a0f (HTTP port: 8000).\n", + "\u001b[36m(ProxyActor pid=1775)\u001b[0m INFO 2025-08-27 05:02:09,992 proxy 10.23.2.213 -- Got updated endpoints: {Deployment(name='TensorFlowMLflowDeployment', app='default'): EndpointInfo(route='/predict', app_is_cross_language=False)}.\n", + "\u001b[36m(ProxyActor pid=1775, ip=10.23.2.212)\u001b[0m INFO 2025-08-27 05:02:09,992 proxy 10.23.2.213 -- Got updated endpoints: {Deployment(name='TensorFlowMLflowDeployment', app='default'): EndpointInfo(route='/predict', app_is_cross_language=False)}.\n" + ] + } + ], + "source": [ + "import numpy as np\n", + "import mlflow.tensorflow\n", + "import tensorflow as tf\n", + "from starlette.requests import Request\n", + "from typing import Dict\n", + "\n", + "from ray import serve\n", + "import ray\n", + "\n", + "\n", + "@serve.deployment(\n", + " ray_actor_options={\n", + " \"runtime_env\": {\n", + " \"pip\": [\"tensorflow\"]\n", + " },\n", + " }\n", + ")\n", + "class TensorFlowMLflowDeployment:\n", + " def __init__(self):\n", + " print(\"Loading model...\")\n", + " self.model = model\n", + " print(\"Model loaded successfully.\")\n", + "\n", + " async def __call__(self, input_data) -> Dict:\n", + " try:\n", + " if isinstance(input_data, Request):\n", + " data = await input_data.json()\n", + " else:\n", + " data = input_data\n", + " features = data.get(\"features\", None)\n", + " if features is None:\n", + " return {\"error\": \"Missing 'features' in request\"}\n", + " X = np.array(features).reshape(1, -1)\n", + "\n", + " # Make prediction with TensorFlow model\n", + " prediction = self.model.predict(X).flatten().tolist()\n", + "\n", + " return {\"prediction\": prediction}\n", + " except Exception as e:\n", + " return {\"error\": str(e)}\n", + "\n", + "\n", + "# Bind and deploy\n", + "app = TensorFlowMLflowDeployment.bind()\n", + "handle = serve.run(app, route_prefix=\"/predict\")" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "e264af73-6634-412b-9cbc-86b79c18e775", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'prediction': [179.46218872070312]}" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "json_data = {\"features\": [0.0380759, 0.0506801, 0.0616962, 0.0218724, -0.0442235, -0.0348208, -0.0434008, -0.00259226, 0.0199084, -0.0176461]}\n", + "response = handle.remote(json_data)\n", + "await response" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/requirements.txt b/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/requirements.txt new file mode 100644 index 000000000..ec119b4e1 --- /dev/null +++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/requirements.txt @@ -0,0 +1,10 @@ +transformers +torch +tensorflow +huggingface_hub +numpy +ipywidgets +mlflow==2.19.0 +ollama +panel +ray==2.41.0 diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/welcome.ipynb b/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/welcome.ipynb new file mode 100644 index 000000000..19bc4bdb6 --- /dev/null +++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/files/welcome.ipynb @@ -0,0 +1,104 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 5, + "id": "8048aa56-4549-4afa-b8b0-d111cc7020c3", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[0.7645573019981384, 0.14142529666423798], [0.13549786806106567, 0.5999557375907898]]\n" + ] + } + ], + "source": [ + "# Requires transformers>=4.51.0\n", + "\n", + "import torch\n", + "import torch.nn.functional as F\n", + "\n", + "from torch import Tensor\n", + "from transformers import AutoTokenizer, AutoModel\n", + "\n", + "\n", + "def last_token_pool(last_hidden_states: Tensor,\n", + " attention_mask: Tensor) -> Tensor:\n", + " left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])\n", + " if left_padding:\n", + " return last_hidden_states[:, -1]\n", + " else:\n", + " sequence_lengths = attention_mask.sum(dim=1) - 1\n", + " batch_size = last_hidden_states.shape[0]\n", + " return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]\n", + "\n", + "\n", + "def get_detailed_instruct(task_description: str, query: str) -> str:\n", + " return f'Instruct: {task_description}\\nQuery:{query}'\n", + "\n", + "# Each query must come with a one-sentence instruction that describes the task\n", + "task = 'Given a web search query, retrieve relevant passages that answer the query'\n", + "\n", + "queries = [\n", + " get_detailed_instruct(task, 'What is the capital of China?'),\n", + " get_detailed_instruct(task, 'Explain gravity')\n", + "]\n", + "# No need to add instruction for retrieval documents\n", + "documents = [\n", + " \"The capital of China is Beijing.\",\n", + " \"Gravity is a force that attracts two bodies towards each other. It gives weight to physical objects and is responsible for the movement of planets around the sun.\"\n", + "]\n", + "input_texts = queries + documents\n", + "\n", + "tokenizer = AutoTokenizer.from_pretrained('Qwen/Qwen3-Embedding-0.6B', padding_side='left')\n", + "model = AutoModel.from_pretrained('Qwen/Qwen3-Embedding-0.6B')\n", + "\n", + "# We recommend enabling flash_attention_2 for better acceleration and memory saving.\n", + "# model = AutoModel.from_pretrained('Qwen/Qwen3-Embedding-0.6B', attn_implementation=\"flash_attention_2\", torch_dtype=torch.float16).cuda()\n", + "\n", + "max_length = 8192\n", + "\n", + "# Tokenize the input texts\n", + "batch_dict = tokenizer(\n", + " input_texts,\n", + " padding=True,\n", + " truncation=True,\n", + " max_length=max_length,\n", + " return_tensors=\"pt\",\n", + ")\n", + "batch_dict.to(model.device)\n", + "outputs = model(**batch_dict)\n", + "embeddings = last_token_pool(outputs.last_hidden_state, batch_dict['attention_mask'])\n", + "\n", + "# normalize embeddings\n", + "embeddings = F.normalize(embeddings, p=2, dim=1)\n", + "scores = (embeddings[:2] @ embeddings[2:].T)\n", + "print(scores.tolist())\n", + "# [[0.7645568251609802, 0.14142508804798126], [0.13549736142158508, 0.5999549627304077]]\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.10" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/networkpolicy.yaml b/ai/ai-starter-kit/helm-chart/ai-starter-kit/networkpolicy.yaml new file mode 100644 index 000000000..d985d3ba8 --- /dev/null +++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/networkpolicy.yaml @@ -0,0 +1,46 @@ +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: allow-singleuser-egress-to-ray + namespace: default +spec: + podSelector: + matchLabels: + app: jupyterhub + component: singleuser-server + release: ai-starter-kit + policyTypes: ["Egress"] + egress: + - to: + - podSelector: + matchLabels: + ray.io/node-type: head + ports: + - protocol: TCP + port: 8265 + - protocol: TCP + port: 8000 + - protocol: TCP + port: 10001 +--- +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: allow-singleuser-egress-to-mlflow + namespace: default +spec: + podSelector: + matchLabels: + app: jupyterhub + component: singleuser-server + release: ai-starter-kit + policyTypes: ["Egress"] + egress: + - to: + - podSelector: + matchLabels: + app.kubernetes.io/name: mlflow + app.kubernetes.io/instance: ai-starter-kit + ports: + - protocol: TCP + port: 5000 \ No newline at end of file diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/NOTES.txt b/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/NOTES.txt new file mode 100644 index 000000000..4e33a20ed --- /dev/null +++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/NOTES.txt @@ -0,0 +1 @@ +AI Starter Kit installed. Enjoy \ No newline at end of file diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/_helpers.tpl b/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/_helpers.tpl new file mode 100644 index 000000000..cf0c5e081 --- /dev/null +++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/_helpers.tpl @@ -0,0 +1,62 @@ +{{/* +Expand the name of the chart. +*/}} +{{- define "ai-starter-kit.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "ai-starter-kit.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "ai-starter-kit.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "ai-starter-kit.labels" -}} +helm.sh/chart: {{ include "ai-starter-kit.chart" . }} +{{ include "ai-starter-kit.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "ai-starter-kit.selectorLabels" -}} +app.kubernetes.io/name: {{ include "ai-starter-kit.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + +{{/* +Create the name of the service account to use +*/}} +{{- define "ai-starter-kit.serviceAccountName" -}} +{{- if .Values.serviceAccount.create }} +{{- default (include "ai-starter-kit.fullname" .) .Values.serviceAccount.name }} +{{- else }} +{{- default "default" .Values.serviceAccount.name }} +{{- end }} +{{- end }} diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/configmaps.yaml b/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/configmaps.yaml new file mode 100644 index 000000000..e03429ee9 --- /dev/null +++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/configmaps.yaml @@ -0,0 +1,18 @@ +--- +{{- /* +Create a single ConfigMap with all initialization files for the jupyterhub singleuser pod. +This ConfigMap is mounted as a volume. +*/ -}} +apiVersion: v1 +kind: ConfigMap +metadata: + name: ai-starter-kit-init-files + labels: + app.kubernetes.io/managed-by: {{ $.Release.Service | quote }} + app.kubernetes.io/instance: {{ $.Release.Name | quote }} + helm.sh/chart: "{{ $.Chart.Name }}-{{ $.Chart.Version }}" +data: +{{- range $path, $bytes := .Files.Glob "files/*" }} + {{ base $path | quote }}: |- +{{ $bytes | toString | nindent 4 }} +{{- end }} diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/generic-device-plugin.yaml b/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/generic-device-plugin.yaml new file mode 100644 index 000000000..3e387f5ce --- /dev/null +++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/generic-device-plugin.yaml @@ -0,0 +1,65 @@ +{{- if .Values.genericDevicePlugin.enabled }} +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: {{ include "ai-starter-kit.fullname" . }}-generic-device-plugin + namespace: kube-system + labels: + {{- include "ai-starter-kit.labels" . | nindent 4 }} + app.kubernetes.io/component: generic-device-plugin +spec: + selector: + matchLabels: + {{- include "ai-starter-kit.selectorLabels" . | nindent 6 }} + app.kubernetes.io/component: generic-device-plugin + template: + metadata: + labels: + {{- include "ai-starter-kit.selectorLabels" . | nindent 8 }} + app.kubernetes.io/component: generic-device-plugin + spec: + priorityClassName: system-node-critical + tolerations: + - operator: "Exists" + effect: "NoExecute" + - operator: "Exists" + effect: "NoSchedule" + containers: + - image: {{ .Values.genericDevicePlugin.image.repository | default "squat/generic-device-plugin" }}:{{ .Values.genericDevicePlugin.image.tag | default "latest" }} + imagePullPolicy: {{ .Values.genericDevicePlugin.image.pullPolicy | default "IfNotPresent" }} + name: generic-device-plugin + args: + - --device + - | + name: dri + groups: + - count: {{ .Values.genericDevicePlugin.device.count | default 4 }} + paths: + - path: /dev/dri + resources: + requests: + cpu: {{ .Values.genericDevicePlugin.resources.requests.cpu | default "50m" }} + memory: {{ .Values.genericDevicePlugin.resources.requests.memory | default "10Mi" }} + limits: + cpu: {{ .Values.genericDevicePlugin.resources.limits.cpu | default "50m" }} + memory: {{ .Values.genericDevicePlugin.resources.limits.memory | default "20Mi" }} + ports: + - containerPort: 8080 + name: http + securityContext: + privileged: true + volumeMounts: + - name: device-plugin + mountPath: /var/lib/kubelet/device-plugins + - name: dev + mountPath: /dev + volumes: + - name: device-plugin + hostPath: + path: /var/lib/kubelet/device-plugins + - name: dev + hostPath: + path: /dev + updateStrategy: + type: RollingUpdate +{{- end }} \ No newline at end of file diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/hf-secret.yaml b/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/hf-secret.yaml new file mode 100644 index 000000000..308b0a94a --- /dev/null +++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/hf-secret.yaml @@ -0,0 +1,13 @@ +{{- if .Values.huggingface.token }} +apiVersion: v1 +kind: Secret +metadata: + name: {{ .Release.Name }}-hf-token-secret + labels: + app.kubernetes.io/managed-by: {{ .Release.Service | quote }} + app.kubernetes.io/instance: {{ .Release.Name | quote }} + helm.sh/chart: "{{ .Chart.Name }}-{{ .Chart.Version }}" +type: Opaque +stringData: + token: {{ .Values.huggingface.token }} +{{- end }} \ No newline at end of file diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/local-pv.yaml b/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/local-pv.yaml new file mode 100644 index 000000000..0797b93e3 --- /dev/null +++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/local-pv.yaml @@ -0,0 +1,16 @@ +{{- if .Values.localPersistence.enabled }} +apiVersion: v1 +kind: PersistentVolume +metadata: + name: {{ .Release.Name }}-models-cache-pv + labels: + type: local +spec: + storageClassName: manual + capacity: + storage: {{ .Values.modelsCachePvc.size }} + accessModes: + - ReadWriteOnce + hostPath: + path: "{{ .Values.localPersistence.hostPath }}" +{{- end }} diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/pvc-mc-only.yaml b/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/pvc-mc-only.yaml new file mode 100644 index 000000000..99179f9f1 --- /dev/null +++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/pvc-mc-only.yaml @@ -0,0 +1,28 @@ +{{- if .Values.modelsCacheOnlyPvc.enabled -}} +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: {{ .Release.Name }}-models-cache-only-pvc + labels: + app.kubernetes.io/managed-by: {{ .Release.Service | quote }} + app.kubernetes.io/instance: {{ .Release.Name | quote }} + helm.sh/chart: "{{ .Chart.Name }}-{{ .Chart.Version }}" +spec: + accessModes: + {{- toYaml .Values.modelsCacheOnlyPvc.accessModes | nindent 4 }} + {{- if .Values.localPersistence.enabled }} + storageClassName: manual + {{- else }} + {{- /* + If storageClassName is set to a specific class, it will be used. + If storageClassName is set to an empty string (""), no storage class will be used for provisioning. + If storageClassName is null or omitted, the default storage class will be used. + */}} + {{- if or .Values.modelsCacheOnlyPvc.storageClassName (eq .Values.modelsCacheOnlyPvc.storageClassName "") }} + storageClassName: {{ .Values.modelsCacheOnlyPvc.storageClassName | quote }} + {{- end }} + {{- end }} + resources: + requests: + storage: {{ .Values.modelsCacheOnlyPvc.size }} +{{- end -}} diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/pvc-ray.yaml b/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/pvc-ray.yaml new file mode 100644 index 000000000..cb4ae5b1d --- /dev/null +++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/pvc-ray.yaml @@ -0,0 +1,28 @@ +{{- if .Values.rayPvc.enabled -}} +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: {{ .Release.Name }}-ray-pvc + labels: + app.kubernetes.io/managed-by: {{ .Release.Service | quote }} + app.kubernetes.io/instance: {{ .Release.Name | quote }} + helm.sh/chart: "{{ .Chart.Name }}-{{ .Chart.Version }}" +spec: + accessModes: + {{- toYaml .Values.rayPvc.accessModes | nindent 4 }} + {{- if .Values.localPersistence.enabled }} + storageClassName: manual + {{- else }} + {{- /* + If storageClassName is set to a specific class, it will be used. + If storageClassName is set to an empty string (""), no storage class will be used for provisioning. + If storageClassName is null or omitted, the default storage class will be used. + */}} + {{- if or .Values.rayPvc.storageClassName (eq .Values.rayPvc.storageClassName "") }} + storageClassName: {{ .Values.rayPvc.storageClassName | quote }} + {{- end }} + {{- end }} + resources: + requests: + storage: {{ .Values.rayPvc.size }} +{{- end -}} diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/pvc.yaml b/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/pvc.yaml new file mode 100644 index 000000000..36ba98fdc --- /dev/null +++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/pvc.yaml @@ -0,0 +1,28 @@ +{{- if .Values.modelsCachePvc.enabled -}} +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: {{ .Release.Name }}-models-cache-pvc + labels: + app.kubernetes.io/managed-by: {{ .Release.Service | quote }} + app.kubernetes.io/instance: {{ .Release.Name | quote }} + helm.sh/chart: "{{ .Chart.Name }}-{{ .Chart.Version }}" +spec: + accessModes: + {{- toYaml .Values.modelsCachePvc.accessModes | nindent 4 }} + {{- if .Values.localPersistence.enabled }} + storageClassName: manual + {{- else }} + {{- /* + If storageClassName is set to a specific class, it will be used. + If storageClassName is set to an empty string (""), no storage class will be used for provisioning. + If storageClassName is null or omitted, the default storage class will be used. + */}} + {{- if or .Values.modelsCachePvc.storageClassName (eq .Values.modelsCachePvc.storageClassName "") }} + storageClassName: {{ .Values.modelsCachePvc.storageClassName | quote }} + {{- end }} + {{- end }} + resources: + requests: + storage: {{ .Values.modelsCachePvc.size }} +{{- end -}} \ No newline at end of file diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/ramalama-deployment.yaml b/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/ramalama-deployment.yaml new file mode 100644 index 000000000..5650c35c4 --- /dev/null +++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/templates/ramalama-deployment.yaml @@ -0,0 +1,58 @@ +{{- if .Values.ramalama.enabled }} +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "ai-starter-kit.fullname" . }}-ramalama + labels: + {{- include "ai-starter-kit.labels" . | nindent 4 }} + app.kubernetes.io/component: ramalama +spec: + replicas: 1 + selector: + matchLabels: + {{- include "ai-starter-kit.selectorLabels" . | nindent 6 }} + app.kubernetes.io/component: ramalama + template: + metadata: + labels: + {{- include "ai-starter-kit.selectorLabels" . | nindent 8 }} + app.kubernetes.io/component: ramalama + spec: + {{- if .Values.ramalama.nodeSelector }} + nodeSelector: + {{- toYaml .Values.ramalama.nodeSelector | nindent 8 }} + {{- end }} + containers: + - name: ramalama + image: "{{ .Values.ramalama.image.repository }}:{{ .Values.ramalama.image.tag }}" + imagePullPolicy: {{ .Values.ramalama.image.pullPolicy }} + {{- if .Values.ramalama.command }} + command: + {{- toYaml .Values.ramalama.command | nindent 10 }} + {{- end }} + ports: + - containerPort: 8080 + protocol: TCP + {{- if .Values.ramalama.resources }} + resources: + {{- toYaml .Values.ramalama.resources | nindent 10 }} + {{- end }} +--- +apiVersion: v1 +kind: Service +metadata: + name: {{ include "ai-starter-kit.fullname" . }}-ramalama + labels: + {{- include "ai-starter-kit.labels" . | nindent 4 }} + app.kubernetes.io/component: ramalama +spec: + type: ClusterIP + ports: + - port: 8080 + targetPort: 8080 + protocol: TCP + name: http + selector: + {{- include "ai-starter-kit.selectorLabels" . | nindent 4 }} + app.kubernetes.io/component: ramalama +{{- end }} \ No newline at end of file diff --git a/ai/ai-starter-kit/helm-chart/ai-starter-kit/values.yaml b/ai/ai-starter-kit/helm-chart/ai-starter-kit/values.yaml new file mode 100644 index 000000000..3155ea642 --- /dev/null +++ b/ai/ai-starter-kit/helm-chart/ai-starter-kit/values.yaml @@ -0,0 +1,199 @@ +jupyterhub: + nameOverride: "jupyterhub" + # This value has to be null in order to apply release name on this chart's resources. + # https://github.com/jupyterhub/zero-to-jupyterhub-k8s/blob/b4b51301ac886511c643cc5d428b15ff38006bee/jupyterhub/values.yaml#L1 + fullnameOverride: + + singleuser: + networkPolicy: + enabled: false + defaultUrl: "/lab/tree/welcome.ipynb" + image: + name: jupyterhub/k8s-singleuser-sample + tag: "4.2.0" + initContainers: + # This init cntainer makes sure that home folder that we mount has correct owner + - name: chown-home-mount-dir + image: jupyterhub/k8s-singleuser-sample:4.2.0 + securityContext: + runAsUser: 0 + command: ["chown", "jovyan", "/home/jovyan"] + volumeMounts: + - name: home + mountPath: /home/jovyan + subPath: jupyterhub_workspace + + - name: model-initializer + image: jupyterhub/k8s-singleuser-sample:4.2.0 + env: + - name: HF_TOKEN + valueFrom: + secretKeyRef: + name: ai-starter-kit-hf-token-secret + key: token + command: + - /bin/sh + - -c + - | + set -e + pip install -r /tmp/requirements.txt + + python /tmp/download_models.py + + # populate workspace with initial notebook files + for f in /tmp/*.ipynb; do + if [ -f "$f" ]; then + # Use cp -n to not overwrite existing files. + cp -n "$f" /home/jovyan/ + fi + done + volumeMounts: + # This 'home' volume is created by the helm chart's 'homeMountPath' option. + # We mount it to initContainer too, so all downloads and installations are persisted in this mounted home folder. + - name: home + mountPath: /home/jovyan + subPath: jupyterhub_workspace + - name: init-files + mountPath: /tmp + readOnly: true + + storage: + type: static + static: + pvcName: "ai-starter-kit-models-cache-pvc" + subPath: "jupyterhub_workspace" + capacity: 20Gi + homeMountPath: /home/jovyan + extraVolumes: + - name: init-files + configMap: + name: "ai-starter-kit-init-files" + # This environment variables list have its own format: https://z2jh.jupyter.org/en/latest/resources/reference.html#singleuser-extraenv + extraEnv: + HF_TOKEN: + name: HF_TOKEN + valueFrom: + secretKeyRef: + name: ai-starter-kit-hf-token-secret + key: token + RAY_ADDRESS: "ray://ai-starter-kit-kuberay-head-svc:10001" + MLFLOW_TRACKING_URI: "http://ai-starter-kit-mlflow:5000" + hub: + networkPolicy: + enabled: false + db: + type: sqlite-pvc + pvc: + annotations: + # Without this helm will not keep the pvc after uninstallation + # https://github.com/jupyterhub/zero-to-jupyterhub-k8s/issues/3718 + helm.sh/resource-policy: keep + extraConfig: + 00-dummy-authenticator: | + c.DummyAuthenticator.password = "sneakypass" + 01-spawner-timeouts: | + c.KubeSpawner.start_timeout = 1800 + proxy: + chp: + networkPolicy: + enabled: false + traefik: + networkPolicy: + enabled: false + +ray-cluster: + enabled: false + image: + tag: "2.41.0-py312-cpu-aarch64" + head: + serviceType: ClusterIP + resources: + requests: + cpu: "1" + memory: "2G" + ephemeral-storage: 10Gi + limits: + cpu: "4" + memory: "8G" + ephemeral-storage: 10Gi + worker: + resources: + requests: + cpu: "1" + memory: "2G" + ephemeral-storage: 10Gi + limits: + cpu: "4" + memory: "8G" + ephemeral-storage: 10Gi + +mlflow: + resources: + requests: + cpu: 100m + memory: 512Mi + limits: + cpu: 1000m + memory: 1Gi + +huggingface: + # Provide your Hugging Face token here to download gated or private models. + # It is recommended to set this via --set or a separate values file, e.g., + # --set huggingface.token=hf_... + token: "" + +rayPvc: + enabled: false + storageClassName: "standard-rwo" + accessModes: + - ReadWriteOnce + size: 100Gi + +modelsCachePvc: + enabled: true + # To use the default StorageClass, set storageClassName to null or omit it. + # To use a specific StorageClass (e.g. "standard-rwo" on GKE), provide its name. + # To create a PVC that doesn't request any StorageClass, set it to an empty string (""). + storageClassName: "standard-rwo" + accessModes: + - ReadWriteOnce + size: 10Gi + +modelsCacheOnlyPvc: + enabled: false + +localPersistence: + # For local development with minikube, this allows persisting the models-cache + # on the host machine, surviving `minikube stop/start`. + # 1. Create a directory on your host: `mkdir -p /data/models-cache` + # 2. Start minikube with the mount: `minikube start --mount --mount-string="/data/models-cache:/data/models-cache"` + # 3. Set enabled to true below, or via `--set localPersistence.enabled=true` + enabled: true + # This path must match the destination path inside the minikube node. + hostPath: "/tmp/models-cache" + +ollama: + enabled: true + ollama: + models: + pull: + - gemma3 + persistentVolume: + enabled: true + existingClaim: "ai-starter-kit-models-cache-pvc" + subPath: "ollama" + + +ramalama: + enabled: true + command: + - /bin/sh + - -c + - ramalama pull qwen2.5:1.5b && ramalama serve qwen2.5:1.5b --port 8080 + image: + repository: "quay.io/ramalama/ramalama" + tag: "latest" + pullPolicy: IfNotPresent + +genericDevicePlugin: + enabled: false \ No newline at end of file