diff --git a/demo-notebooks/guided-demos/ipywidgets.ipynb b/demo-notebooks/guided-demos/ipywidgets.ipynb new file mode 100644 index 00000000..cd0fa1e5 --- /dev/null +++ b/demo-notebooks/guided-demos/ipywidgets.ipynb @@ -0,0 +1,392 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "8d4a42f6", + "metadata": {}, + "source": [ + "In this notebook, we will go through the basics of using the SDK to:\n", + " - Spin up a Ray cluster with our desired resources\n", + " - View the status and specs of our Ray cluster\n", + " - Take down the Ray cluster when finished" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "301094f1", + "metadata": {}, + "outputs": [], + "source": [ + "%pip uninstall codeflare-sdk -y\n", + "%pip install ../../dist/codeflare_sdk-0.0.0.dev0-py3-none-any.whl" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b55bc3ea-4ce3-49bf-bb1f-e209de8ca47a", + "metadata": {}, + "outputs": [], + "source": [ + "# Import pieces from codeflare-sdk\n", + "from codeflare_sdk import Cluster, ClusterConfiguration, TokenAuthentication, list_cluster_details" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "614daa0c", + "metadata": {}, + "outputs": [], + "source": [ + "# Create authentication object for user permissions\n", + "# IF unused, SDK will automatically check for default kubeconfig, then in-cluster config\n", + "# KubeConfigFileAuthentication can also be used to specify kubeconfig path manually\n", + "auth = TokenAuthentication(\n", + " token = \"XXXXX\",\n", + " server = \"XXXXX\",\n", + " skip_tls=False\n", + ")\n", + "auth.login()" + ] + }, + { + "cell_type": "markdown", + "id": "bc27f84c", + "metadata": {}, + "source": [ + "Here, we want to define our cluster by specifying the resources we require for our batch workload. Below, we define our cluster object (which generates a corresponding RayCluster).\n", + "\n", + "NOTE: We must specify the `image` which will be used in our RayCluster, we recommend you bring your own image which suits your purposes. \n", + "The example here is a community image." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0f4bc870-091f-4e11-9642-cba145710159", + "metadata": {}, + "outputs": [], + "source": [ + "# Create and configure our cluster object\n", + "# The SDK will try to find the name of your default local queue based on the annotation \"kueue.x-k8s.io/default-queue\": \"true\" unless you specify the local queue manually below\n", + "cluster = Cluster(ClusterConfiguration(\n", + " name='raytest1', \n", + " namespace='default', # Update to your namespace\n", + " head_gpus=0, # For GPU enabled workloads set the head_gpus and num_gpus\n", + " num_gpus=0,\n", + " num_workers=1,\n", + " min_cpus=1,\n", + " max_cpus=1,\n", + " min_memory=2,\n", + " max_memory=2,\n", + " image=\"quay.io/rhoai/ray:2.23.0-py39-cu121\",\n", + " write_to_file=False, # When enabled Ray Cluster yaml files are written to /HOME/.codeflare/resources \n", + " # local_queue=\"local-queue-name\" # Specify the local queue manually\n", + "))" + ] + }, + { + "cell_type": "markdown", + "id": "12eef53c", + "metadata": {}, + "source": [ + "Next, we want to bring our cluster up, so we call the `up()` function below to submit our Ray Cluster onto the queue, and begin the process of obtaining our resource cluster." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f0884bbc-c224-4ca0-98a0-02dfa09c2200", + "metadata": {}, + "outputs": [], + "source": [ + "# Bring up the cluster\n", + "cluster.up()" + ] + }, + { + "cell_type": "markdown", + "id": "657ebdfb", + "metadata": {}, + "source": [ + "Now, we want to check on the status of our resource cluster, and wait until it is finally ready for use." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "df71c1ed", + "metadata": {}, + "outputs": [], + "source": [ + "def format_status(status):\n", + " if status == \"Ready\":\n", + " return 'Ready ✓'\n", + " elif status == \"Suspended\":\n", + " return 'Suspended ~'\n", + " elif status == \"Starting\":\n", + " return 'Starting ⌛'\n", + " elif status == \"Failed\":\n", + " return 'Failed ✗'\n", + " else:\n", + " return status\n", + "\n", + "import ipywidgets as widgets\n", + "import pandas as pd\n", + "from IPython.display import display, HTML\n", + "data = {\n", + " \"name\": [\"RayTest1\", \"RayTest2\", \"RayTest3\", \"RayTest4\"],\n", + " \"namespace\": [\"default\", \"usernamespace\", \"usernamespace\", \"usernamespace\"],\n", + " \"head_gpu\": [0, 1, 2, 0],\n", + " \"worker_gpu\": [2, 0, 1, 0],\n", + " \"min_memory\": [2, 4, 4, 2],\n", + " \"max_memory\": [2, 4, 8, 4],\n", + " \"min_cpu\": [1, 2, 4, 2],\n", + " \"max_cpu\": [1, 4, 8, 2],\n", + " \"status\": [\"Ready\", \"Starting\", \"Suspended\", \"Failed\"]\n", + "}\n", + "df = pd.DataFrame(data)\n", + "\n", + "# format to add icons\n", + "df['status'] = df['status'].apply(format_status)\n", + "\n", + "my_output = widgets.Output()\n", + "my_output\n", + "classification_widget = widgets.ToggleButtons(\n", + " options=['RayTest1', \"RayTest2\", \"RayTest3\", \"RayTest4\"],\n", + " description='Select an existing cluster:',\n", + ")\n", + "\n", + "def on_click(change):\n", + " new_value = change[\"new\"]\n", + " my_output.clear_output()\n", + " with my_output:\n", + " display(HTML(df[df[\"name\"]==new_value][[\"name\", \"namespace\", \"head_gpu\", \"worker_gpu\", \"min_memory\", \"max_memory\", \"min_cpu\", \"max_cpu\", \"status\"]].to_html(escape=False, index=False, border=2)))\n", + "\n", + "classification_widget.observe(on_click, names=\"value\")\n", + "display(widgets.VBox([classification_widget, my_output]))\n", + "\n", + "\n", + "list_jobs_button = widgets.Button(\n", + " description='View Jobs',\n", + " icon='suitcase'\n", + " )\n", + "delete_button = widgets.Button(\n", + " description='Delete Cluster',\n", + " icon='trash'\n", + " )\n", + "ray_dashboard_button = widgets.Button(\n", + " description='Open Ray Dashboard',\n", + " icon='dashboard',\n", + " layout=widgets.Layout(width='auto'),\n", + " )\n", + "view_yaml_button = widgets.Button(\n", + " description='View YAML',\n", + " icon='file'\n", + " )\n", + "display(widgets.HBox([delete_button, list_jobs_button, view_yaml_button, ray_dashboard_button]))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "24e612ff", + "metadata": {}, + "outputs": [], + "source": [ + "list_cluster_details()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9dda874b", + "metadata": {}, + "outputs": [], + "source": [ + "def format_status(status):\n", + " if status == \"Ready\":\n", + " return 'Ready ✓'\n", + " elif status == \"Suspended\":\n", + " return 'Suspended ~'\n", + " elif status == \"Starting\":\n", + " return 'Starting ⌛'\n", + " elif status == \"Failed\":\n", + " return 'Failed ✗'\n", + " else:\n", + " return status\n", + "\n", + "import ipywidgets as widgets\n", + "import pandas as pd\n", + "from IPython.display import display, HTML\n", + "data = {\n", + " \"name\": [\"RayTest1\", \"RayTest2\", \"RayTest3\", \"RayTest4\"],\n", + " \"namespace\": [\"default\", \"usernamespace\", \"usernamespace\", \"usernamespace\"],\n", + " \"head_gpu\": [0, 1, 2, 0],\n", + " \"worker_gpu\": [2, 0, 1, 0],\n", + " \"min_memory\": [2, 4, 4, 2],\n", + " \"max_memory\": [2, 4, 8, 4],\n", + " \"min_cpu\": [1, 2, 4, 2],\n", + " \"max_cpu\": [1, 4, 8, 2],\n", + " \"status\": [\"Ready\", \"Starting\", \"Suspended\", \"Failed\"],\n", + " \"pods\": [\n", + " [{\"pod\": \"head\", \"name\": \"head-raytest1\", \"status\": \"Ready\"}, {\"pod\": \"worker\", \"name\": \"worker-raytest1-a\", \"status\": \"Ready\"}, {\"pod\": \"worker\", \"name\": \"worker-raytest1-b\", \"status\": \"Ready\"}],\n", + " [{\"pod\": \"head\", \"name\": \"head-raytest2\", \"status\": \"Ready\"}, {\"pod\": \"worker\", \"name\": \"worker-raytest2a\", \"status\": \"Starting\"}],\n", + " [{\"pod\": \"head\", \"name\": \"head-raytest3\", \"status\": \"Suspended\"}, {\"pod\": \"worker\", \"name\": \"worker-raytest3a\", \"status\": \"Suspended\"}],\n", + " [{\"pod\": \"head\", \"name\": \"head-raytest4\", \"status\": \"Failed\"}, {\"pod\": \"worker\", \"name\": \"worker-raytest4a\", \"status\": \"Failed\"}]\n", + " ]\n", + "}\n", + "df = pd.DataFrame(data)\n", + "\n", + "# format to add icons\n", + "df['status'] = df['status'].apply(format_status)\n", + "\n", + "my_output = widgets.Output()\n", + "my_output\n", + "classification_widget = widgets.ToggleButtons(\n", + " options=['RayTest1', \"RayTest2\", \"RayTest3\", \"RayTest4\"],\n", + " description='Select an existing cluster:',\n", + ")\n", + "\n", + "def on_click(change):\n", + " new_value = change[\"new\"]\n", + " my_output.clear_output()\n", + " with my_output:\n", + " selected_data = df[df[\"name\"] == new_value]\n", + " main_table = selected_data[[\"name\", \"namespace\", \"head_gpu\", \"worker_gpu\", \"min_memory\", \"max_memory\", \"min_cpu\", \"max_cpu\", \"status\"]].to_html(escape=False, index=False)\n", + " pod_rows = \"\"\n", + " for pod in selected_data[\"pods\"].values[0]:\n", + " pod_rows += f'
| Pod | Name | Status |
|---|