Skip to content

Commit 3f1f9a1

Browse files
committed
Create queued resources operations: Create, Create Spot, Get, Delete
1 parent 418b0b4 commit 3f1f9a1

File tree

5 files changed

+328
-0
lines changed

5 files changed

+328
-0
lines changed

tpu/queued_resources_create.py

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
# Copyright 2024 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# https://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
import os
15+
16+
from google.cloud.tpu_v2alpha1 import CreateQueuedResourceRequest, Node
17+
18+
19+
def create_queued_resources(
20+
project_id: str,
21+
zone: str,
22+
tpu_name: str,
23+
tpu_type: str = "v2-8",
24+
runtime_version: str = "tpu-vm-tf-2.17.0-pjrt",
25+
queued_resource_name: str = "resource-name",
26+
) -> Node:
27+
# [START tpu_queued_resources_create]
28+
from google.cloud import tpu_v2alpha1
29+
30+
# TODO(developer): Update and un-comment below lines
31+
# project_id = "your-project-id"
32+
# zone = "us-central1-b"
33+
# tpu_name = "tpu-name"
34+
# tpu_type = "v2-8"
35+
# runtime_version = "tpu-vm-tf-2.17.0-pjrt"
36+
# queued_resource_name = "resource-name"
37+
38+
node = tpu_v2alpha1.Node()
39+
node.accelerator_type = tpu_type
40+
# To see available runtime version use command:
41+
# gcloud compute tpus versions list --zone={ZONE}
42+
node.runtime_version = runtime_version
43+
44+
node_spec = tpu_v2alpha1.QueuedResource.Tpu.NodeSpec()
45+
node_spec.parent = f"projects/{project_id}/locations/{zone}"
46+
node_spec.node_id = tpu_name
47+
node_spec.node = node
48+
49+
resource = tpu_v2alpha1.QueuedResource()
50+
resource.tpu = tpu_v2alpha1.QueuedResource.Tpu(node_spec=[node_spec])
51+
52+
request = CreateQueuedResourceRequest(
53+
parent=f"projects/{project_id}/locations/{zone}",
54+
queued_resource_id=queued_resource_name,
55+
queued_resource=resource,
56+
)
57+
58+
client = tpu_v2alpha1.TpuClient()
59+
operation = client.create_queued_resource(request=request)
60+
61+
response = operation.result()
62+
print(response.name)
63+
print(response.state.state)
64+
# Example response:
65+
# projects/[project_id]/locations/[zone]/queuedResources/resource-name
66+
# State.WAITING_FOR_RESOURCES
67+
68+
# [END tpu_queued_resources_create]
69+
return response
70+
71+
72+
if __name__ == "__main__":
73+
PROJECT_ID = os.getenv("GOOGLE_CLOUD_PROJECT")
74+
ZONE = "us-central1-b"
75+
create_queued_resources(
76+
project_id=PROJECT_ID,
77+
zone=ZONE,
78+
tpu_name="tpu-name",
79+
queued_resource_name="resource-name1",
80+
)
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
# Copyright 2024 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# https://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
import os
15+
16+
from google.cloud.tpu_v2alpha1 import CreateQueuedResourceRequest, Node
17+
18+
19+
def create_queued_resources_spot(
20+
project_id: str,
21+
zone: str,
22+
tpu_name: str,
23+
tpu_type: str = "v2-8",
24+
runtime_version: str = "tpu-vm-tf-2.17.0-pjrt",
25+
queued_resource_name: str = "resource-name",
26+
) -> Node:
27+
# [START tpu_queued_resources_create_spot]
28+
from google.cloud import tpu_v2alpha1
29+
30+
# TODO(developer): Update and un-comment below lines
31+
# project_id = "your-project-id"
32+
# zone = "us-central1-b"
33+
# tpu_name = "tpu-name"
34+
# tpu_type = "v2-8"
35+
# runtime_version = "tpu-vm-tf-2.17.0-pjrt"
36+
# queued_resource_name = "resource-name"
37+
38+
node = tpu_v2alpha1.Node()
39+
node.accelerator_type = tpu_type
40+
# To see available runtime version use command:
41+
# gcloud compute tpus versions list --zone={ZONE}
42+
node.runtime_version = runtime_version
43+
44+
node_spec = tpu_v2alpha1.QueuedResource.Tpu.NodeSpec()
45+
node_spec.parent = f"projects/{project_id}/locations/{zone}"
46+
node_spec.node_id = tpu_name
47+
node_spec.node = node
48+
49+
resource = tpu_v2alpha1.QueuedResource()
50+
resource.tpu = tpu_v2alpha1.QueuedResource.Tpu(node_spec=[node_spec])
51+
# Create a spot resource
52+
resource.spot = tpu_v2alpha1.QueuedResource.Spot()
53+
54+
request = CreateQueuedResourceRequest(
55+
parent=f"projects/{project_id}/locations/{zone}",
56+
queued_resource_id=queued_resource_name,
57+
queued_resource=resource,
58+
)
59+
60+
client = tpu_v2alpha1.TpuClient()
61+
operation = client.create_queued_resource(request=request)
62+
response = operation.result()
63+
64+
print(response.name)
65+
print(response.state.state)
66+
# Example response:
67+
# projects/[project_id]/locations/[zone]/queuedResources/resource-name
68+
# State.WAITING_FOR_RESOURCES
69+
70+
# [END tpu_queued_resources_create_spot]
71+
return response
72+
73+
74+
if __name__ == "__main__":
75+
PROJECT_ID = os.getenv("GOOGLE_CLOUD_PROJECT")
76+
ZONE = "us-central1-b"
77+
create_queued_resources_spot(
78+
project_id=PROJECT_ID,
79+
zone=ZONE,
80+
tpu_name="tpu-name",
81+
queued_resource_name="resource-name",
82+
)

tpu/queued_resources_delete.py

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
# Copyright 2024 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# https://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
import os
15+
16+
17+
def delete_queued_resources(
18+
project_id: str, zone: str, queued_resource_name: str
19+
) -> None:
20+
# [START tpu_queued_resource_delete]
21+
from google.cloud import tpu_v2alpha1
22+
23+
# TODO(developer): Update and un-comment below lines
24+
# project_id = "your-project-id"
25+
# zone = "us-central1-b"
26+
# queued_resource_name = "resource-name"
27+
28+
client = tpu_v2alpha1.TpuClient()
29+
name = (
30+
f"projects/{project_id}/locations/{zone}/queuedResources/{queued_resource_name}"
31+
)
32+
try:
33+
op = client.delete_queued_resource(name=name)
34+
op.result()
35+
print(f"Queued resource '{queued_resource_name}' successfully deleted.")
36+
except TypeError as e:
37+
print(f"Error deleting resource: {e}")
38+
print(f"Queued resource '{queued_resource_name}' successfully deleted.")
39+
except Exception as e:
40+
print(f"Error deleting resource: {e}")
41+
42+
# [END tpu_queued_resource_delete]
43+
44+
45+
if __name__ == "__main__":
46+
PROJECT_ID = os.environ["GOOGLE_CLOUD_PROJECT"]
47+
ZONE = "us-central1-b"
48+
delete_queued_resources(PROJECT_ID, ZONE, "resource-name")

tpu/queued_resources_get.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
# Copyright 2024 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# https://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
import os
15+
16+
from google.cloud.tpu_v2alpha1 import QueuedResource
17+
18+
19+
def get_queued_resources(
20+
project_id: str, zone: str, queued_resource_name: str
21+
) -> QueuedResource:
22+
# [START tpu_queued_resources_get]
23+
from google.cloud import tpu_v2alpha1
24+
25+
client = tpu_v2alpha1.TpuClient()
26+
name = (
27+
f"projects/{project_id}/locations/{zone}/queuedResources/{queued_resource_name}"
28+
)
29+
resource = client.get_queued_resource(name=name)
30+
print(resource.state.state)
31+
32+
# [END tpu_queued_resources_get]
33+
return resource
34+
35+
36+
if __name__ == "__main__":
37+
PROJECT_ID = os.environ["GOOGLE_CLOUD_PROJECT"]
38+
ZONE = "us-central1-b"
39+
get_queued_resources(PROJECT_ID, ZONE, "resource-name")

tpu/test_queued_resources.py

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
# Copyright 2024 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# https://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import os
16+
import time
17+
import uuid
18+
19+
from google.cloud.tpu_v2 import Node
20+
from google.cloud.tpu_v2alpha1 import QueuedResourceState as States
21+
22+
import delete_tpu
23+
import get_tpu
24+
import queued_resources_create
25+
import queued_resources_delete
26+
import queued_resources_get
27+
28+
29+
TPU_NAME = "test-tpu-" + uuid.uuid4().hex[:10]
30+
RESOURCE_NAME = "test-resource-" + uuid.uuid4().hex[:5]
31+
PROJECT_ID = os.getenv("GOOGLE_CLOUD_PROJECT")
32+
ZONE = "us-central1-b"
33+
TPU_TYPE = "v2-8"
34+
TPU_VERSION = "tpu-vm-tf-2.17.0-pjrt"
35+
36+
37+
STATUSES = [
38+
States.State.ACCEPTED,
39+
States.State.WAITING_FOR_RESOURCES,
40+
States.State.SUSPENDED,
41+
States.State.FAILED,
42+
]
43+
44+
45+
def clean_resource() -> None:
46+
while True:
47+
resource = queued_resources_get.get_queued_resources(
48+
PROJECT_ID, ZONE, RESOURCE_NAME
49+
)
50+
if resource.state.state in STATUSES:
51+
try:
52+
print(f"Attempting to delete resource '{RESOURCE_NAME}'...")
53+
queued_resources_delete.delete_queued_resources(
54+
PROJECT_ID, ZONE, RESOURCE_NAME
55+
)
56+
print("Resource and TPU successfully deleted. Exiting...")
57+
return True
58+
except Exception:
59+
continue
60+
print("Resource is not in a deletable state. Waiting...")
61+
time.sleep(60)
62+
try:
63+
node = get_tpu.get_cloud_tpu(PROJECT_ID, ZONE, TPU_NAME)
64+
print("NODE:", node.name)
65+
if node and node.state == Node.State.READY:
66+
print(f"Attempting to delete TPU '{TPU_NAME}'...")
67+
delete_tpu.delete_cloud_tpu(PROJECT_ID, ZONE, TPU_NAME)
68+
except Exception:
69+
continue
70+
71+
72+
def test_create_resource() -> None:
73+
try:
74+
resource = queued_resources_create.create_queued_resources(
75+
PROJECT_ID, ZONE, TPU_NAME, TPU_TYPE, TPU_VERSION, RESOURCE_NAME
76+
)
77+
assert RESOURCE_NAME in resource.name
78+
finally:
79+
assert clean_resource()

0 commit comments

Comments
 (0)