From 7848aefd0c0af3be131e320cd4211139fdbfb05f Mon Sep 17 00:00:00 2001 From: Akanksha Gupta Date: Thu, 4 Dec 2025 14:20:10 -0800 Subject: [PATCH 1/6] Add README for shared_pathways_service --- .../shared_pathways_service/README.md | 51 +++++++++++++++++++ 1 file changed, 51 insertions(+) create mode 100644 pathwaysutils/experimental/shared_pathways_service/README.md diff --git a/pathwaysutils/experimental/shared_pathways_service/README.md b/pathwaysutils/experimental/shared_pathways_service/README.md new file mode 100644 index 0000000..3ff6478 --- /dev/null +++ b/pathwaysutils/experimental/shared_pathways_service/README.md @@ -0,0 +1,51 @@ +# Shared Pathways Service + +Shared pathways service is a multi-tenant Pathways cluster with dedicated TPU +resources. This eliminates the need for complex cloud setup, allowing you to +get started from a familiar local environment (like a laptop or cloud VM) with +minimal overhead: Just wrap your Python entrypoint in a +`with isc_pathways.connect():` block!. + +## Requirements + +Make sure that your cluster is running the Resource Manager and Worker pods. +If not, you can use [pw-service-example.yaml](yamls/pw-service-example.yaml). +Make sure to modify the following values to deploy these pods: + +- A unique Jobset name for the cluster's Pathways pods +- GCS bucket path +- TPU type and topology +- Number of slices + +These fields are highlighted in the YAML file with trailing comments for easier +understanding. + +## Instructions + +1. Clone `pathwaysutils`. + +`git clone https://github.com/AI-Hypercomputer/pathways-utils.git` + +2. Import `isc_pathways.py` and move your workload under +`with isc_pathways.connect()` statement. Refer to +[run_connect_example.py](run_connect_example.py) for reference. Example code: + +``` + from pathwaysutils.experimental.shared_pathways_service import isc_pathways + + with isc_pathways.connect( + "my-cluster", + "my-project", + "region", + "gs://user-bucket", + "pathways-cluster-pathways-head-0-0.pathways-cluster:29001", + {"tpuv6e:2x2": 2}, + ) as tm: + import jax.numpy as jnp + import pathwaysutils + import pprint + + pathwaysutils.initialize() + orig_matrix = jnp.zeros(5) + ... +``` From 5cf1186b863830ea79a470a5f5d9032c4df9d333 Mon Sep 17 00:00:00 2001 From: Akanksha Gupta Date: Thu, 4 Dec 2025 14:39:52 -0800 Subject: [PATCH 2/6] Add that connect block deploys a proxy pod --- pathwaysutils/experimental/shared_pathways_service/README.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pathwaysutils/experimental/shared_pathways_service/README.md b/pathwaysutils/experimental/shared_pathways_service/README.md index 3ff6478..51dcaf6 100644 --- a/pathwaysutils/experimental/shared_pathways_service/README.md +++ b/pathwaysutils/experimental/shared_pathways_service/README.md @@ -49,3 +49,6 @@ understanding. orig_matrix = jnp.zeros(5) ... ``` + +The connect block will deploy a proxy pod to your GKE cluster and connect your local runtime environment to the proxy +pod via port-forwarding. From b669b65532d7c73546325c52eccb0de668501a54 Mon Sep 17 00:00:00 2001 From: Akanksha Gupta Date: Thu, 4 Dec 2025 18:00:07 -0800 Subject: [PATCH 3/6] fix comment --- pathwaysutils/experimental/shared_pathways_service/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pathwaysutils/experimental/shared_pathways_service/README.md b/pathwaysutils/experimental/shared_pathways_service/README.md index 51dcaf6..0a767f0 100644 --- a/pathwaysutils/experimental/shared_pathways_service/README.md +++ b/pathwaysutils/experimental/shared_pathways_service/README.md @@ -26,7 +26,7 @@ understanding. `git clone https://github.com/AI-Hypercomputer/pathways-utils.git` -2. Import `isc_pathways.py` and move your workload under +2. Import `isc_pathways` and move your workload under `with isc_pathways.connect()` statement. Refer to [run_connect_example.py](run_connect_example.py) for reference. Example code: From c3da4af13f2870fd128deb57da770ad4ac4d3609 Mon Sep 17 00:00:00 2001 From: Akanksha Gupta Date: Thu, 4 Dec 2025 20:09:35 -0800 Subject: [PATCH 4/6] Resolving commments in shared_pathways_service/README.md --- .../shared_pathways_service/README.md | 21 +++++++++++-------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/pathwaysutils/experimental/shared_pathways_service/README.md b/pathwaysutils/experimental/shared_pathways_service/README.md index 0a767f0..87d0484 100644 --- a/pathwaysutils/experimental/shared_pathways_service/README.md +++ b/pathwaysutils/experimental/shared_pathways_service/README.md @@ -1,15 +1,18 @@ # Shared Pathways Service -Shared pathways service is a multi-tenant Pathways cluster with dedicated TPU -resources. This eliminates the need for complex cloud setup, allowing you to -get started from a familiar local environment (like a laptop or cloud VM) with -minimal overhead: Just wrap your Python entrypoint in a -`with isc_pathways.connect():` block!. +The Shared Pathways Service accelerates developer iteration by providing a +persistent, multi-tenant TPU environment. This decouples service creation from +the development loop, allowing JAX clients to connect on-demand from a familiar +local environment (like a laptop or cloud VM) to a long-running Pathways +service that manages scheduling and error handling. ## Requirements -Make sure that your cluster is running the Resource Manager and Worker pods. -If not, you can use [pw-service-example.yaml](yamls/pw-service-example.yaml). +Make sure that your GKE cluster is running the Resource Manager and Worker pods. +You can follow the steps +[here](https://docs.cloud.google.com/ai-hypercomputer/docs/workloads/pathways-on-cloud/troubleshooting-pathways#health_monitoring) +to confirm the status of these pods. If you haven't started the Pathways pods +yet, you can use [pw-service-example.yaml](yamls/pw-service-example.yaml). Make sure to modify the following values to deploy these pods: - A unique Jobset name for the cluster's Pathways pods @@ -50,5 +53,5 @@ understanding. ... ``` -The connect block will deploy a proxy pod to your GKE cluster and connect your local runtime environment to the proxy -pod via port-forwarding. +The connect block will deploy a proxy pod dedicated to your client and connect +your local runtime environment to the proxy pod via port-forwarding. From ac233ca152017c97206886519b9c4390d5448c5b Mon Sep 17 00:00:00 2001 From: Akanksha Gupta Date: Thu, 4 Dec 2025 20:48:12 -0800 Subject: [PATCH 5/6] Fix instructions in Readme --- .../shared_pathways_service/README.md | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/pathwaysutils/experimental/shared_pathways_service/README.md b/pathwaysutils/experimental/shared_pathways_service/README.md index 87d0484..a9f4f36 100644 --- a/pathwaysutils/experimental/shared_pathways_service/README.md +++ b/pathwaysutils/experimental/shared_pathways_service/README.md @@ -29,7 +29,11 @@ understanding. `git clone https://github.com/AI-Hypercomputer/pathways-utils.git` -2. Import `isc_pathways` and move your workload under +2. Install portpicker + +`pip install portpicker` + +3. Import `isc_pathways` and move your workload under `with isc_pathways.connect()` statement. Refer to [run_connect_example.py](run_connect_example.py) for reference. Example code: @@ -37,12 +41,12 @@ understanding. from pathwaysutils.experimental.shared_pathways_service import isc_pathways with isc_pathways.connect( - "my-cluster", - "my-project", - "region", - "gs://user-bucket", - "pathways-cluster-pathways-head-0-0.pathways-cluster:29001", - {"tpuv6e:2x2": 2}, + cluster="my-cluster", + project="my-project", + region="region", + gcs_bucket="gs://user-bucket", + pathways_service="pathways-cluster-pathways-head-0-0.pathways-cluster:29001", + expected_tpu_instances={"tpuv6e:2x2": 2}, ) as tm: import jax.numpy as jnp import pathwaysutils From bc0c8b5145cfbbff6a6dc7fbfbbffefed0f2c8ea Mon Sep 17 00:00:00 2001 From: Akanksha Gupta Date: Fri, 5 Dec 2025 09:32:00 -0800 Subject: [PATCH 6/6] Open hyperlink in a new tab --- pathwaysutils/experimental/shared_pathways_service/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pathwaysutils/experimental/shared_pathways_service/README.md b/pathwaysutils/experimental/shared_pathways_service/README.md index a9f4f36..a46dcd4 100644 --- a/pathwaysutils/experimental/shared_pathways_service/README.md +++ b/pathwaysutils/experimental/shared_pathways_service/README.md @@ -10,7 +10,7 @@ service that manages scheduling and error handling. Make sure that your GKE cluster is running the Resource Manager and Worker pods. You can follow the steps -[here](https://docs.cloud.google.com/ai-hypercomputer/docs/workloads/pathways-on-cloud/troubleshooting-pathways#health_monitoring) +here to confirm the status of these pods. If you haven't started the Pathways pods yet, you can use [pw-service-example.yaml](yamls/pw-service-example.yaml). Make sure to modify the following values to deploy these pods: