Skip to content

Commit 6ab96dc

Browse files
feat: New Queued resources Samples: Create, Create Spot, Get, Delete operations + tests (#12716)
* Create all queued resources operations: Create, Create Spot, Get, Delete, Force Delete, Network, Startup Script, create_time_bound.
1 parent 8c85a22 commit 6ab96dc

12 files changed

+743
-2
lines changed

tpu/create_tpu_topology.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ def create_cloud_tpu_with_topology(
4444
node = tpu_v2.Node()
4545
# Here we are creating a TPU v3-8 with 2x2 topology.
4646
node.accelerator_config = tpu_v2.AcceleratorConfig(
47-
type_=tpu_v2.AcceleratorConfig.Type.V3,
47+
type_=tpu_v2.AcceleratorConfig.Type.V2,
4848
topology="2x2",
4949
)
5050
node.runtime_version = runtime_version

tpu/delete_tpu.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,4 +45,4 @@ def delete_cloud_tpu(project_id: str, zone: str, tpu_name: str = "tpu-name") ->
4545
if __name__ == "__main__":
4646
PROJECT_ID = os.getenv("GOOGLE_CLOUD_PROJECT")
4747
ZONE = "us-central1-b"
48-
delete_cloud_tpu(PROJECT_ID, ZONE, "tpu-name12")
48+
delete_cloud_tpu(PROJECT_ID, ZONE, "tpu-name")

tpu/queued_resources_create.py

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
# Copyright 2024 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# https://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
import os
15+
16+
from google.cloud.tpu_v2alpha1 import CreateQueuedResourceRequest, Node
17+
18+
19+
def create_queued_resource(
20+
project_id: str,
21+
zone: str,
22+
tpu_name: str,
23+
tpu_type: str = "v2-8",
24+
runtime_version: str = "tpu-vm-tf-2.17.0-pjrt",
25+
queued_resource_name: str = "resource-name",
26+
) -> Node:
27+
# [START tpu_queued_resources_create]
28+
from google.cloud import tpu_v2alpha1
29+
30+
# TODO(developer): Update and un-comment below lines
31+
# project_id = "your-project-id"
32+
# zone = "us-central1-b"
33+
# tpu_name = "tpu-name"
34+
# tpu_type = "v2-8"
35+
# runtime_version = "tpu-vm-tf-2.17.0-pjrt"
36+
# queued_resource_name = "resource-name"
37+
38+
node = tpu_v2alpha1.Node()
39+
node.accelerator_type = tpu_type
40+
# To see available runtime version use command:
41+
# gcloud compute tpus versions list --zone={ZONE}
42+
node.runtime_version = runtime_version
43+
44+
node_spec = tpu_v2alpha1.QueuedResource.Tpu.NodeSpec()
45+
node_spec.parent = f"projects/{project_id}/locations/{zone}"
46+
node_spec.node_id = tpu_name
47+
node_spec.node = node
48+
49+
resource = tpu_v2alpha1.QueuedResource()
50+
resource.tpu = tpu_v2alpha1.QueuedResource.Tpu(node_spec=[node_spec])
51+
52+
request = CreateQueuedResourceRequest(
53+
parent=f"projects/{project_id}/locations/{zone}",
54+
queued_resource_id=queued_resource_name,
55+
queued_resource=resource,
56+
)
57+
58+
client = tpu_v2alpha1.TpuClient()
59+
operation = client.create_queued_resource(request=request)
60+
61+
response = operation.result()
62+
print(response.name)
63+
print(response.state.state)
64+
# Example response:
65+
# projects/[project_id]/locations/[zone]/queuedResources/resource-name
66+
# State.WAITING_FOR_RESOURCES
67+
68+
# [END tpu_queued_resources_create]
69+
return response
70+
71+
72+
if __name__ == "__main__":
73+
PROJECT_ID = os.getenv("GOOGLE_CLOUD_PROJECT")
74+
ZONE = "us-central1-b"
75+
create_queued_resource(
76+
project_id=PROJECT_ID,
77+
zone=ZONE,
78+
tpu_name="tpu-name",
79+
queued_resource_name="resource-name",
80+
)
Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
# Copyright 2024 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# https://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
import os
15+
16+
from google.cloud.tpu_v2alpha1 import CreateQueuedResourceRequest, Node
17+
18+
19+
def create_queued_resource_network(
20+
project_id: str,
21+
zone: str,
22+
tpu_name: str,
23+
tpu_type: str = "v2-8",
24+
runtime_version: str = "tpu-vm-tf-2.17.0-pjrt",
25+
queued_resource_name: str = "resource-name",
26+
network: str = "default",
27+
) -> Node:
28+
# [START tpu_queued_resources_network]
29+
from google.cloud import tpu_v2alpha1
30+
31+
# TODO(developer): Update and un-comment below lines
32+
# project_id = "your-project-id"
33+
# zone = "us-central1-b"
34+
# tpu_name = "tpu-name"
35+
# tpu_type = "v2-8"
36+
# runtime_version = "tpu-vm-tf-2.17.0-pjrt"
37+
# queued_resource_name = "resource-name"
38+
# network = "default"
39+
40+
node = tpu_v2alpha1.Node()
41+
node.accelerator_type = tpu_type
42+
node.runtime_version = runtime_version
43+
# Setting network configuration
44+
node.network_config = tpu_v2alpha1.NetworkConfig(
45+
network=network, # Update if you want to use a specific network
46+
subnetwork="default", # Update if you want to use a specific subnetwork
47+
enable_external_ips=True,
48+
can_ip_forward=True,
49+
)
50+
51+
node_spec = tpu_v2alpha1.QueuedResource.Tpu.NodeSpec()
52+
node_spec.parent = f"projects/{project_id}/locations/{zone}"
53+
node_spec.node_id = tpu_name
54+
node_spec.node = node
55+
56+
resource = tpu_v2alpha1.QueuedResource()
57+
resource.tpu = tpu_v2alpha1.QueuedResource.Tpu(node_spec=[node_spec])
58+
59+
request = CreateQueuedResourceRequest(
60+
parent=f"projects/{project_id}/locations/{zone}",
61+
queued_resource_id=queued_resource_name,
62+
queued_resource=resource,
63+
)
64+
65+
client = tpu_v2alpha1.TpuClient()
66+
operation = client.create_queued_resource(request=request)
67+
68+
response = operation.result()
69+
print(response.name)
70+
print(response.tpu.node_spec[0].node.network_config)
71+
print(resource.tpu.node_spec[0].node.network_config.network == "default")
72+
# Example response:
73+
# network: "default"
74+
# subnetwork: "default"
75+
# enable_external_ips: true
76+
# can_ip_forward: true
77+
78+
# [END tpu_queued_resources_network]
79+
return response
80+
81+
82+
if __name__ == "__main__":
83+
PROJECT_ID = os.getenv("GOOGLE_CLOUD_PROJECT")
84+
ZONE = "us-central1-b"
85+
create_queued_resource_network(
86+
project_id=PROJECT_ID,
87+
zone=ZONE,
88+
tpu_name="tpu-name",
89+
queued_resource_name="resource-name",
90+
)
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
# Copyright 2024 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# https://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
import os
15+
16+
from google.cloud.tpu_v2alpha1 import CreateQueuedResourceRequest, Node
17+
18+
19+
def create_queued_resource_spot(
20+
project_id: str,
21+
zone: str,
22+
tpu_name: str,
23+
tpu_type: str = "v2-8",
24+
runtime_version: str = "tpu-vm-tf-2.17.0-pjrt",
25+
queued_resource_name: str = "resource-name",
26+
) -> Node:
27+
# [START tpu_queued_resources_create_spot]
28+
from google.cloud import tpu_v2alpha1
29+
30+
# TODO(developer): Update and un-comment below lines
31+
# project_id = "your-project-id"
32+
# zone = "us-central1-b"
33+
# tpu_name = "tpu-name"
34+
# tpu_type = "v2-8"
35+
# runtime_version = "tpu-vm-tf-2.17.0-pjrt"
36+
# queued_resource_name = "resource-name"
37+
38+
node = tpu_v2alpha1.Node()
39+
node.accelerator_type = tpu_type
40+
# To see available runtime version use command:
41+
# gcloud compute tpus versions list --zone={ZONE}
42+
node.runtime_version = runtime_version
43+
44+
node_spec = tpu_v2alpha1.QueuedResource.Tpu.NodeSpec()
45+
node_spec.parent = f"projects/{project_id}/locations/{zone}"
46+
node_spec.node_id = tpu_name
47+
node_spec.node = node
48+
49+
resource = tpu_v2alpha1.QueuedResource()
50+
resource.tpu = tpu_v2alpha1.QueuedResource.Tpu(node_spec=[node_spec])
51+
# Create a spot resource
52+
resource.spot = tpu_v2alpha1.QueuedResource.Spot()
53+
54+
request = CreateQueuedResourceRequest(
55+
parent=f"projects/{project_id}/locations/{zone}",
56+
queued_resource_id=queued_resource_name,
57+
queued_resource=resource,
58+
)
59+
60+
client = tpu_v2alpha1.TpuClient()
61+
operation = client.create_queued_resource(request=request)
62+
response = operation.result()
63+
64+
print(response.name)
65+
print(response.state.state)
66+
# Example response:
67+
# projects/[project_id]/locations/[zone]/queuedResources/resource-name
68+
# State.WAITING_FOR_RESOURCES
69+
70+
# [END tpu_queued_resources_create_spot]
71+
return response
72+
73+
74+
if __name__ == "__main__":
75+
PROJECT_ID = os.getenv("GOOGLE_CLOUD_PROJECT")
76+
ZONE = "us-central1-b"
77+
create_queued_resource_spot(
78+
project_id=PROJECT_ID,
79+
zone=ZONE,
80+
tpu_name="tpu-name",
81+
queued_resource_name="resource-name",
82+
)
Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
# Copyright 2024 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# https://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
import os
15+
16+
from google.cloud.tpu_v2alpha1 import CreateQueuedResourceRequest, Node
17+
18+
19+
def create_queued_resource_startup_script(
20+
project_id: str,
21+
zone: str,
22+
tpu_name: str,
23+
tpu_type: str = "v2-8",
24+
runtime_version: str = "tpu-vm-tf-2.17.0-pjrt",
25+
queued_resource_name: str = "resource-name",
26+
) -> Node:
27+
# [START tpu_queued_resources_startup_script]
28+
from google.cloud import tpu_v2alpha1
29+
30+
# TODO(developer): Update and un-comment below lines
31+
# project_id = "your-project-id"
32+
# zone = "us-central1-b"
33+
# tpu_name = "tpu-name"
34+
# tpu_type = "v2-8"
35+
# runtime_version = "tpu-vm-tf-2.17.0-pjrt"
36+
# queued_resource_name = "resource-name"
37+
38+
node = tpu_v2alpha1.Node()
39+
node.accelerator_type = tpu_type
40+
# To see available runtime version use command:
41+
# gcloud compute tpus versions list --zone={ZONE}
42+
node.runtime_version = runtime_version
43+
# This startup script updates numpy to the latest version and logs the output to a file.
44+
script = {
45+
"startup-script": """#!/bin/bash
46+
echo "Hello World" > /var/log/hello.log
47+
sudo pip3 install --upgrade numpy >> /var/log/hello.log 2>&1
48+
"""
49+
}
50+
node.metadata = script
51+
# Enabling external IPs for internet access from the TPU node for updating numpy
52+
node.network_config = tpu_v2alpha1.NetworkConfig(
53+
enable_external_ips=True,
54+
)
55+
56+
node_spec = tpu_v2alpha1.QueuedResource.Tpu.NodeSpec()
57+
node_spec.parent = f"projects/{project_id}/locations/{zone}"
58+
node_spec.node_id = tpu_name
59+
node_spec.node = node
60+
61+
resource = tpu_v2alpha1.QueuedResource()
62+
resource.tpu = tpu_v2alpha1.QueuedResource.Tpu(node_spec=[node_spec])
63+
64+
request = CreateQueuedResourceRequest(
65+
parent=f"projects/{project_id}/locations/{zone}",
66+
queued_resource_id=queued_resource_name,
67+
queued_resource=resource,
68+
)
69+
70+
client = tpu_v2alpha1.TpuClient()
71+
operation = client.create_queued_resource(request=request)
72+
73+
response = operation.result()
74+
print(response.name)
75+
print(response.tpu.node_spec[0].node.metadata)
76+
# Example response:
77+
# projects/[project_id]/locations/[zone]/queuedResources/resource-name
78+
# {'startup-script': '#!/bin/bash\n echo "Hello World" > /var/log/hello.log\n
79+
# sudo pip3 install --upgrade numpy >> /var/log/hello.log 2>&1\n '}
80+
81+
# [END tpu_queued_resources_startup_script]
82+
return response
83+
84+
85+
if __name__ == "__main__":
86+
PROJECT_ID = os.getenv("GOOGLE_CLOUD_PROJECT")
87+
ZONE = "us-central1-b"
88+
create_queued_resource_startup_script(
89+
project_id=PROJECT_ID,
90+
zone=ZONE,
91+
tpu_name="tpu-name",
92+
queued_resource_name="resource-name",
93+
)

0 commit comments

Comments
 (0)