Skip to content

Commit 002b4ae

Browse files
feat(tpu): add tpu queued resources network (#9605)
* Added tpu_queued_resources_network sample * Changed CODEOWNERS * Update CreateQueuedResourceWithNetwork.java --------- Co-authored-by: Eric Schmidt <[email protected]>
1 parent 19dcf09 commit 002b4ae

File tree

7 files changed

+539
-0
lines changed

7 files changed

+539
-0
lines changed

.github/CODEOWNERS

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@
4444
/security-command-center @GoogleCloudPlatform/java-samples-reviewers @yoshi-approver @GoogleCloudPlatform/cloud-samples-reviewers @GoogleCloudPlatform/dee-infra @GoogleCloudPlatform/gcp-security-command-center
4545
/servicedirectory @GoogleCloudPlatform/java-samples-reviewers @yoshi-approver @GoogleCloudPlatform/cloud-samples-reviewers @GoogleCloudPlatform/dee-infra
4646
/webrisk @GoogleCloudPlatform/java-samples-reviewers @yoshi-approver @GoogleCloudPlatform/cloud-samples-reviewers @GoogleCloudPlatform/dee-infra
47+
/tpu @GoogleCloudPlatform/java-samples-reviewers @yoshi-approver @GoogleCloudPlatform/cloud-samples-reviewers @GoogleCloudPlatform/dee-infra
4748

4849
# DEE Platform Ops (DEEPO)
4950
/errorreporting @GoogleCloudPlatform/java-samples-reviewers @yoshi-approver @GoogleCloudPlatform/cloud-samples-reviewers

tpu/pom.xml

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<!--
3+
Copyright 2024 Google LLC
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
http://www.apache.org/licenses/LICENSE-2.0
8+
Unless required by applicable law or agreed to in writing, software
9+
distributed under the License is distributed on an "AS IS" BASIS,
10+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11+
See the License for the specific language governing permissions and
12+
limitations under the License.
13+
-->
14+
<project xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
15+
xmlns="http://maven.apache.org/POM/4.0.0"
16+
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
17+
<modelVersion>4.0.0</modelVersion>
18+
<groupId>com.example.tpu</groupId>
19+
<artifactId>gce-diregapic-samples</artifactId>
20+
<version>1.0-SNAPSHOT</version>
21+
22+
<!--
23+
The parent pom defines common style checks and testing strategies for our samples.
24+
Removing or replacing it should not affect the execution of the samples in anyway.
25+
-->
26+
<parent>
27+
<artifactId>shared-configuration</artifactId>
28+
<groupId>com.google.cloud.samples</groupId>
29+
<version>1.2.0</version>
30+
</parent>
31+
32+
<properties>
33+
<maven.compiler.source>11</maven.compiler.source>
34+
<maven.compiler.target>11</maven.compiler.target>
35+
</properties>
36+
37+
<dependencies>
38+
<dependency>
39+
<groupId>com.google.cloud</groupId>
40+
<artifactId>google-cloud-tpu</artifactId>
41+
<version>2.52.0</version>
42+
</dependency>
43+
44+
<dependency>
45+
<groupId>com.google.api</groupId>
46+
<artifactId>gax</artifactId>
47+
</dependency>
48+
49+
<!-- Test dependencies -->
50+
<dependency>
51+
<artifactId>google-cloud-storage</artifactId>
52+
<groupId>com.google.cloud</groupId>
53+
<scope>test</scope>
54+
</dependency>
55+
56+
<dependency>
57+
<artifactId>truth</artifactId>
58+
<groupId>com.google.truth</groupId>
59+
<scope>test</scope>
60+
<version>1.4.0</version>
61+
</dependency>
62+
<dependency>
63+
<artifactId>junit</artifactId>
64+
<groupId>junit</groupId>
65+
<scope>test</scope>
66+
<version>4.13.2</version>
67+
</dependency>
68+
69+
<!--
70+
JUnit Jupiter dependencies to run BeforeEach and AfterEach methods
71+
(in tandem with mvn surefire) before every test.
72+
Without these, mvn surefire skips these methods and leads to concurrency
73+
issues.
74+
-->
75+
<dependency>
76+
<groupId>org.junit.jupiter</groupId>
77+
<artifactId>junit-jupiter-engine</artifactId>
78+
<version>5.10.2</version>
79+
<scope>test</scope>
80+
</dependency>
81+
<dependency>
82+
<groupId>org.mockito</groupId>
83+
<artifactId>mockito-core</artifactId>
84+
<version>5.13.0</version>
85+
<scope>test</scope>
86+
</dependency>
87+
</dependencies>
88+
89+
<dependencyManagement>
90+
<dependencies>
91+
<dependency>
92+
<artifactId>libraries-bom</artifactId>
93+
<groupId>com.google.cloud</groupId>
94+
<scope>import</scope>
95+
<type>pom</type>
96+
<version>26.40.0</version>
97+
</dependency>
98+
</dependencies>
99+
</dependencyManagement>
100+
101+
</project>
Lines changed: 139 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,139 @@
1+
/*
2+
* Copyright 2024 Google LLC
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package tpu;
18+
19+
//[START tpu_queued_resources_network]
20+
import com.google.api.gax.retrying.RetrySettings;
21+
import com.google.cloud.tpu.v2alpha1.CreateQueuedResourceRequest;
22+
import com.google.cloud.tpu.v2alpha1.NetworkConfig;
23+
import com.google.cloud.tpu.v2alpha1.Node;
24+
import com.google.cloud.tpu.v2alpha1.QueuedResource;
25+
import com.google.cloud.tpu.v2alpha1.TpuClient;
26+
import com.google.cloud.tpu.v2alpha1.TpuSettings;
27+
import java.io.IOException;
28+
import java.util.concurrent.ExecutionException;
29+
import org.threeten.bp.Duration;
30+
31+
public class CreateQueuedResourceWithNetwork {
32+
public static void main(String[] args)
33+
throws IOException, ExecutionException, InterruptedException {
34+
// TODO(developer): Replace these variables before running the sample.
35+
// Project ID or project number of the Google Cloud project you want to create a node.
36+
String projectId = "YOUR_PROJECT_ID";
37+
// The zone in which to create the TPU.
38+
// For more information about supported TPU types for specific zones,
39+
// see https://cloud.google.com/tpu/docs/regions-zones
40+
String zone = "europe-west4-a";
41+
// The name for your TPU.
42+
String nodeName = "YOUR_TPU_NAME";
43+
// The accelerator type that specifies the version and size of the Cloud TPU you want to create.
44+
// For more information about supported accelerator types for each TPU version,
45+
// see https://cloud.google.com/tpu/docs/system-architecture-tpu-vm#versions.
46+
String tpuType = "v2-8";
47+
// Software version that specifies the version of the TPU runtime to install.
48+
// For more information see https://cloud.google.com/tpu/docs/runtimes
49+
String tpuSoftwareVersion = "tpu-vm-tf-2.14.1";
50+
// The name for your Queued Resource.
51+
String queuedResourceId = "QUEUED_RESOURCE_ID";
52+
// The name of the network you want the node to connect to.
53+
// The network should be assigned to your project.
54+
String networkName = "YOUR_COMPUTE_TPU_NETWORK";
55+
56+
createQueuedResourceWithNetwork(projectId, zone, queuedResourceId, nodeName,
57+
tpuType, tpuSoftwareVersion, networkName);
58+
}
59+
60+
// Creates a Queued Resource with network configuration.
61+
public static QueuedResource createQueuedResourceWithNetwork(
62+
String projectId, String zone, String queuedResourceId, String nodeName,
63+
String tpuType, String tpuSoftwareVersion, String networkName)
64+
throws IOException, ExecutionException, InterruptedException {
65+
// With these settings the client library handles the Operation's polling mechanism
66+
// and prevent CancellationException error
67+
TpuSettings.Builder clientSettings =
68+
TpuSettings.newBuilder();
69+
clientSettings
70+
.createQueuedResourceSettings()
71+
.setRetrySettings(
72+
RetrySettings.newBuilder()
73+
.setInitialRetryDelay(Duration.ofMillis(5000L))
74+
.setRetryDelayMultiplier(2.0)
75+
.setInitialRpcTimeout(Duration.ZERO)
76+
.setRpcTimeoutMultiplier(1.0)
77+
.setMaxRetryDelay(Duration.ofMillis(45000L))
78+
.setTotalTimeout(Duration.ofHours(24L))
79+
.build());
80+
// Initialize client that will be used to send requests. This client only needs to be created
81+
// once, and can be reused for multiple requests.
82+
try (TpuClient tpuClient = TpuClient.create(clientSettings.build())) {
83+
String parent = String.format("projects/%s/locations/%s", projectId, zone);
84+
String region = zone.substring(0, zone.length() - 2);
85+
86+
// Specify the network and subnetwork that you want to connect your TPU to.
87+
NetworkConfig networkConfig =
88+
NetworkConfig.newBuilder()
89+
.setEnableExternalIps(true)
90+
.setNetwork(String.format("projects/%s/global/networks/%s", projectId, networkName))
91+
.setSubnetwork(
92+
String.format(
93+
"projects/%s/regions/%s/subnetworks/%s", projectId, region, networkName))
94+
.build();
95+
96+
// Create a node
97+
Node node =
98+
Node.newBuilder()
99+
.setName(nodeName)
100+
.setAcceleratorType(tpuType)
101+
.setRuntimeVersion(tpuSoftwareVersion)
102+
.setNetworkConfig(networkConfig)
103+
.setQueuedResource(
104+
String.format(
105+
"projects/%s/locations/%s/queuedResources/%s",
106+
projectId, zone, queuedResourceId))
107+
.build();
108+
109+
// Create queued resource
110+
QueuedResource queuedResource =
111+
QueuedResource.newBuilder()
112+
.setName(queuedResourceId)
113+
.setTpu(
114+
QueuedResource.Tpu.newBuilder()
115+
.addNodeSpec(
116+
QueuedResource.Tpu.NodeSpec.newBuilder()
117+
.setParent(parent)
118+
.setNode(node)
119+
.setNodeId(nodeName)
120+
.build())
121+
.build())
122+
.build();
123+
124+
CreateQueuedResourceRequest request =
125+
CreateQueuedResourceRequest.newBuilder()
126+
.setParent(parent)
127+
.setQueuedResource(queuedResource)
128+
.setQueuedResourceId(queuedResourceId)
129+
.build();
130+
131+
QueuedResource response = tpuClient.createQueuedResourceAsync(request).get();
132+
// You can wait until TPU Node is READY,
133+
// and check its status using getTpuVm() from "tpu_vm_get" sample.
134+
System.out.println("Queued Resource created: " + queuedResourceId);
135+
return response;
136+
}
137+
}
138+
}
139+
//[END tpu_queued_resources_network]
Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
/*
2+
* Copyright 2024 Google LLC
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package tpu;
18+
19+
//[START tpu_queued_resources_delete_force]
20+
21+
import com.google.api.gax.retrying.RetrySettings;
22+
import com.google.api.gax.rpc.UnknownException;
23+
import com.google.cloud.tpu.v2alpha1.DeleteQueuedResourceRequest;
24+
import com.google.cloud.tpu.v2alpha1.TpuClient;
25+
import com.google.cloud.tpu.v2alpha1.TpuSettings;
26+
import java.io.IOException;
27+
import java.util.concurrent.ExecutionException;
28+
import org.threeten.bp.Duration;
29+
30+
public class DeleteForceQueuedResource {
31+
public static void main(String[] args) {
32+
// TODO(developer): Replace these variables before running the sample.
33+
// Project ID or project number of the Google Cloud project.
34+
String projectId = "YOUR_PROJECT_ID";
35+
// The zone in which the TPU was created.
36+
String zone = "europe-west4-a";
37+
// The name for your Queued Resource.
38+
String queuedResourceId = "QUEUED_RESOURCE_ID";
39+
40+
deleteForceQueuedResource(projectId, zone, queuedResourceId);
41+
}
42+
43+
// Deletes a Queued Resource asynchronously with --force flag.
44+
public static void deleteForceQueuedResource(
45+
String projectId, String zone, String queuedResourceId) {
46+
String name = String.format("projects/%s/locations/%s/queuedResources/%s",
47+
projectId, zone, queuedResourceId);
48+
// With these settings the client library handles the Operation's polling mechanism
49+
// and prevent CancellationException error
50+
TpuSettings.Builder clientSettings =
51+
TpuSettings.newBuilder();
52+
clientSettings
53+
.deleteQueuedResourceSettings()
54+
.setRetrySettings(
55+
RetrySettings.newBuilder()
56+
.setInitialRetryDelay(Duration.ofMillis(5000L))
57+
.setRetryDelayMultiplier(2.0)
58+
.setInitialRpcTimeout(Duration.ZERO)
59+
.setRpcTimeoutMultiplier(1.0)
60+
.setMaxRetryDelay(Duration.ofMillis(45000L))
61+
.setTotalTimeout(Duration.ofHours(24L))
62+
.build());
63+
64+
// Initialize client that will be used to send requests. This client only needs to be created
65+
// once, and can be reused for multiple requests.
66+
try (TpuClient tpuClient = TpuClient.create(clientSettings.build())) {
67+
DeleteQueuedResourceRequest request =
68+
DeleteQueuedResourceRequest.newBuilder().setName(name).setForce(true).build();
69+
70+
tpuClient.deleteQueuedResourceAsync(request).get();
71+
72+
} catch (UnknownException | InterruptedException | ExecutionException | IOException e) {
73+
System.out.println(e.getMessage());
74+
}
75+
System.out.printf("Deleted Queued Resource: %s\n", name);
76+
}
77+
}
78+
//[END tpu_queued_resources_delete_force]
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
/*
2+
* Copyright 2024 Google LLC
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package tpu;
18+
19+
//[START tpu_queued_resources_get]
20+
21+
import com.google.cloud.tpu.v2alpha1.GetQueuedResourceRequest;
22+
import com.google.cloud.tpu.v2alpha1.QueuedResource;
23+
import com.google.cloud.tpu.v2alpha1.TpuClient;
24+
import java.io.IOException;
25+
26+
public class GetQueuedResource {
27+
public static void main(String[] args) throws IOException {
28+
// TODO(developer): Replace these variables before running the sample.
29+
// Project ID or project number of the Google Cloud project.
30+
String projectId = "YOUR_PROJECT_ID";
31+
// The zone in which the TPU was created.
32+
String zone = "europe-west4-a";
33+
// The name for your Queued Resource.
34+
String queuedResourceId = "QUEUED_RESOURCE_ID";
35+
36+
getQueuedResource(projectId, zone, queuedResourceId);
37+
}
38+
39+
// Get a Queued Resource.
40+
public static QueuedResource getQueuedResource(
41+
String projectId, String zone, String queuedResourceId) throws IOException {
42+
String name = String.format("projects/%s/locations/%s/queuedResources/%s",
43+
projectId, zone, queuedResourceId);
44+
// Initialize client that will be used to send requests. This client only needs to be created
45+
// once, and can be reused for multiple requests.
46+
try (TpuClient tpuClient = TpuClient.create()) {
47+
GetQueuedResourceRequest request =
48+
GetQueuedResourceRequest.newBuilder().setName(name).build();
49+
50+
return tpuClient.getQueuedResource(request);
51+
}
52+
}
53+
}
54+
//[END tpu_queued_resources_get]

0 commit comments

Comments
 (0)