Skip to content

Commit 3035ff6

Browse files
Added tpu_queued_resources_network sample
1 parent f4a9938 commit 3035ff6

10 files changed

+998
-0
lines changed

tpu/pom.xml

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<!--
3+
Copyright 2024 Google LLC
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
http://www.apache.org/licenses/LICENSE-2.0
8+
Unless required by applicable law or agreed to in writing, software
9+
distributed under the License is distributed on an "AS IS" BASIS,
10+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11+
See the License for the specific language governing permissions and
12+
limitations under the License.
13+
-->
14+
<project xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
15+
xmlns="http://maven.apache.org/POM/4.0.0"
16+
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
17+
<modelVersion>4.0.0</modelVersion>
18+
<groupId>com.example.tpu</groupId>
19+
<artifactId>gce-diregapic-samples</artifactId>
20+
<version>1.0-SNAPSHOT</version>
21+
22+
<!--
23+
The parent pom defines common style checks and testing strategies for our samples.
24+
Removing or replacing it should not affect the execution of the samples in anyway.
25+
-->
26+
<parent>
27+
<artifactId>shared-configuration</artifactId>
28+
<groupId>com.google.cloud.samples</groupId>
29+
<version>1.2.0</version>
30+
</parent>
31+
32+
<properties>
33+
<maven.compiler.source>11</maven.compiler.source>
34+
<maven.compiler.target>11</maven.compiler.target>
35+
</properties>
36+
37+
<dependencies>
38+
<dependency>
39+
<groupId>com.google.cloud</groupId>
40+
<artifactId>google-cloud-tpu</artifactId>
41+
<version>2.52.0</version>
42+
</dependency>
43+
44+
<dependency>
45+
<groupId>com.google.api</groupId>
46+
<artifactId>gax</artifactId>
47+
</dependency>
48+
49+
<!-- Test dependencies -->
50+
<dependency>
51+
<artifactId>google-cloud-storage</artifactId>
52+
<groupId>com.google.cloud</groupId>
53+
<scope>test</scope>
54+
</dependency>
55+
56+
<dependency>
57+
<artifactId>truth</artifactId>
58+
<groupId>com.google.truth</groupId>
59+
<scope>test</scope>
60+
<version>1.4.0</version>
61+
</dependency>
62+
<dependency>
63+
<artifactId>junit</artifactId>
64+
<groupId>junit</groupId>
65+
<scope>test</scope>
66+
<version>4.13.2</version>
67+
</dependency>
68+
69+
<!--
70+
JUnit Jupiter dependencies to run BeforeEach and AfterEach methods
71+
(in tandem with mvn surefire) before every test.
72+
Without these, mvn surefire skips these methods and leads to concurrency
73+
issues.
74+
-->
75+
<dependency>
76+
<groupId>org.junit.jupiter</groupId>
77+
<artifactId>junit-jupiter-engine</artifactId>
78+
<version>5.10.2</version>
79+
<scope>test</scope>
80+
</dependency>
81+
<dependency>
82+
<groupId>org.mockito</groupId>
83+
<artifactId>mockito-core</artifactId>
84+
<version>5.13.0</version>
85+
<scope>test</scope>
86+
</dependency>
87+
</dependencies>
88+
89+
<dependencyManagement>
90+
<dependencies>
91+
<dependency>
92+
<artifactId>libraries-bom</artifactId>
93+
<groupId>com.google.cloud</groupId>
94+
<scope>import</scope>
95+
<type>pom</type>
96+
<version>26.40.0</version>
97+
</dependency>
98+
</dependencies>
99+
</dependencyManagement>
100+
101+
</project>
Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,135 @@
1+
package tpu;
2+
3+
//[START tpu_queued_resources_network]
4+
5+
import com.google.api.gax.retrying.RetrySettings;
6+
import com.google.cloud.tpu.v2alpha1.CreateQueuedResourceRequest;
7+
import com.google.cloud.tpu.v2alpha1.NetworkConfig;
8+
import com.google.cloud.tpu.v2alpha1.Node;
9+
import com.google.cloud.tpu.v2alpha1.QueuedResource;
10+
import com.google.cloud.tpu.v2alpha1.TpuClient;
11+
import com.google.cloud.tpu.v2alpha1.TpuSettings;
12+
import java.io.IOException;
13+
import java.util.concurrent.ExecutionException;
14+
import org.threeten.bp.Duration;
15+
16+
public class CreateQueuedResourceWithNetwork {
17+
18+
private final TpuClient tpuClient;
19+
public CreateQueuedResourceWithNetwork(TpuClient tpuClient) {
20+
this.tpuClient = tpuClient;
21+
}
22+
public static void main(String[] args)
23+
throws IOException, ExecutionException, InterruptedException {
24+
// TODO(developer): Replace these variables before running the sample.
25+
// Project ID or project number of the Google Cloud project you want to create a node.
26+
String projectId = "tyaho-softserve-project";//"your-project";
27+
// The zone in which to create the TPU.
28+
// For more information about supported TPU types for specific zones,
29+
// see https://cloud.google.com/tpu/docs/regions-zones
30+
String zone = "europe-west4-a";
31+
// The name for your TPU.
32+
String nodeName = "your-node-id";
33+
// The accelerator type that specifies the version and size of the Cloud TPU you want to create.
34+
// For more information about supported accelerator types for each TPU version,
35+
// see https://cloud.google.com/tpu/docs/system-architecture-tpu-vm#versions.
36+
String tpuType = "v2-8";
37+
// Software version that specifies the version of the TPU runtime to install.
38+
// For more information see https://cloud.google.com/tpu/docs/runtimes
39+
String tpuSoftwareVersion = "tpu-vm-tf-2.14.1";
40+
// The name for your Queued Resource.
41+
String queuedResourceName = "your-queued-resource-id";
42+
TpuClient tpuClient = TpuClient.create();
43+
CreateQueuedResourceWithNetwork creator = new CreateQueuedResourceWithNetwork(tpuClient);
44+
45+
creator.createQueuedResourceWithNetwork(projectId, zone, queuedResourceName, nodeName, tpuType, tpuSoftwareVersion);
46+
47+
}
48+
49+
// Creates a Queued Resource with network configuration.
50+
public static void createQueuedResourceWithNetwork(
51+
String projectId,
52+
String zone,
53+
String queuedResourceName,
54+
String nodeName,
55+
String tpuType,
56+
String tpuSoftwareVersion)
57+
throws IOException, ExecutionException, InterruptedException {
58+
// With these settings the client library handles the Operation's polling mechanism
59+
// and prevent CancellationException error
60+
TpuSettings.Builder clientSettings =
61+
TpuSettings.newBuilder();
62+
clientSettings
63+
.createQueuedResourceSettings()
64+
.setRetrySettings(
65+
RetrySettings.newBuilder()
66+
.setInitialRetryDelay(Duration.ofMillis(5000L))
67+
.setRetryDelayMultiplier(2.0)
68+
.setInitialRpcTimeout(Duration.ZERO)
69+
.setRpcTimeoutMultiplier(1.0)
70+
.setMaxRetryDelay(Duration.ofMillis(45000L))
71+
.setTotalTimeout(Duration.ofHours(24L))
72+
.build());
73+
// Initialize client that will be used to send requests. This client only needs to be created
74+
// once, and can be reused for multiple requests.
75+
try (TpuClient tpuClient = TpuClient.create(clientSettings.build())) {
76+
String parent = String.format("projects/%s/locations/%s", projectId, zone);
77+
String region = zone.substring(0, zone.length() - 2);
78+
// The name of the network you want the node to connect to. The network should be assigned to your project.
79+
String networkName = "compute-tpu-network";
80+
// Specify the network and subnetwork that you want to connect your TPU to.
81+
82+
NetworkConfig networkConfig =
83+
NetworkConfig.newBuilder()
84+
.setEnableExternalIps(true)
85+
.setNetwork(String.format("projects/%s/global/networks/%s", projectId, networkName))
86+
.setSubnetwork(
87+
String.format(
88+
"projects/%s/regions/%s/subnetworks/%s", projectId, region, networkName))
89+
.build();
90+
91+
// Create a node
92+
Node node =
93+
Node.newBuilder()
94+
.setName(nodeName)
95+
.setAcceleratorType(tpuType)
96+
.setRuntimeVersion(tpuSoftwareVersion)
97+
.setNetworkConfig(networkConfig)
98+
.setQueuedResource(
99+
String.format(
100+
"projects/%s/locations/%s/queuedResources/%s",
101+
projectId, zone, queuedResourceName))
102+
.build();
103+
104+
// Create queued resource
105+
QueuedResource queuedResource =
106+
QueuedResource.newBuilder()
107+
.setName(queuedResourceName)
108+
.setTpu(
109+
QueuedResource.Tpu.newBuilder()
110+
.addNodeSpec(
111+
QueuedResource.Tpu.NodeSpec.newBuilder()
112+
.setParent(parent)
113+
.setNode(node)
114+
.setNodeId(nodeName)
115+
.build())
116+
.build())
117+
.build();
118+
119+
CreateQueuedResourceRequest request =
120+
CreateQueuedResourceRequest.newBuilder()
121+
.setParent(parent)
122+
.setQueuedResource(queuedResource)
123+
.setQueuedResourceId(queuedResourceName)
124+
.build();
125+
126+
QueuedResource response = tpuClient.createQueuedResourceAsync(request).get();
127+
// You can wait until TPU Node is READY,
128+
// and check its status using getTpuVm() from "tpu_vm_get" sample.
129+
System.out.printf(
130+
"Queued resource %s with specified network created.\n", queuedResourceName);
131+
System.out.println(response);
132+
}
133+
}
134+
}
135+
//[END tpu_queued_resources_network
Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
package tpu;
2+
3+
import com.google.api.gax.retrying.RetrySettings;
4+
import com.google.cloud.tpu.v2alpha1.CreateQueuedResourceRequest;
5+
import com.google.cloud.tpu.v2alpha1.Node;
6+
import com.google.cloud.tpu.v2alpha1.QueuedResource;
7+
import com.google.cloud.tpu.v2alpha1.TpuClient;
8+
import com.google.cloud.tpu.v2alpha1.TpuSettings;
9+
import org.threeten.bp.Duration;
10+
11+
import java.io.IOException;
12+
import java.util.HashMap;
13+
import java.util.Map;
14+
import java.util.concurrent.ExecutionException;
15+
16+
public class CreateQueuedResourceWithStartupScript {
17+
public static void main(String[] args)
18+
throws IOException, ExecutionException, InterruptedException {
19+
// TODO(developer): Replace these variables before running the sample.
20+
// Project ID or project number of the Google Cloud project you want to create a node.
21+
String projectId = "tyaho-softserve-project";//"your-project";
22+
// The zone in which to create the TPU.
23+
// For more information about supported TPU types for specific zones,
24+
// see https://cloud.google.com/tpu/docs/regions-zones
25+
String zone = "europe-west4-a";
26+
// The name for your TPU.
27+
String nodeName = "your-node-id";
28+
// The accelerator type that specifies the version and size of the Cloud TPU you want to create.
29+
// For more information about supported accelerator types for each TPU version,
30+
// see https://cloud.google.com/tpu/docs/system-architecture-tpu-vm#versions.
31+
String tpuType = "v2-8";
32+
// Software version that specifies the version of the TPU runtime to install.
33+
// For more information see https://cloud.google.com/tpu/docs/runtimes
34+
String tpuSoftwareVersion = "tpu-vm-tf-2.14.1";
35+
// The name for your Queued Resource.
36+
String queuedResourceId = "your-queued-resource-id";
37+
38+
createQueuedResource(
39+
projectId, zone, queuedResourceId, nodeName, tpuType, tpuSoftwareVersion);
40+
}
41+
42+
// Creates a Queued Resource
43+
public static void createQueuedResource(
44+
String projectId,
45+
String zone,
46+
String queuedResourceId,
47+
String nodeName,
48+
String tpuType,
49+
String tpuSoftwareVersion)
50+
throws IOException, ExecutionException, InterruptedException {
51+
// With these settings the client library handles the Operation's polling mechanism
52+
// and prevent CancellationException error
53+
TpuSettings.Builder clientSettings =
54+
TpuSettings.newBuilder();
55+
clientSettings
56+
.createQueuedResourceSettings()
57+
.setRetrySettings(
58+
RetrySettings.newBuilder()
59+
.setInitialRetryDelay(Duration.ofMillis(5000L))
60+
.setRetryDelayMultiplier(2.0)
61+
.setInitialRpcTimeout(Duration.ZERO)
62+
.setRpcTimeoutMultiplier(1.0)
63+
.setMaxRetryDelay(Duration.ofMillis(45000L))
64+
.setTotalTimeout(Duration.ofHours(24L))
65+
.build());
66+
// Initialize client that will be used to send requests. This client only needs to be created
67+
// once, and can be reused for multiple requests.
68+
try (TpuClient tpuClient = TpuClient.create(clientSettings.build())) {
69+
String parent = String.format("projects/%s/locations/%s", projectId, zone);
70+
String startupScript = "your-startup-script";
71+
72+
// Add startup script to metadata
73+
Map<String, String> metadata = new HashMap<>();
74+
metadata.put("startup-script", startupScript);
75+
76+
Node node =
77+
Node.newBuilder()
78+
.setName(nodeName)
79+
.setAcceleratorType(tpuType)
80+
.setRuntimeVersion(tpuSoftwareVersion)
81+
.setQueuedResource(
82+
String.format(
83+
"projects/%s/locations/%s/queuedResources/%s",
84+
projectId, zone, queuedResourceId))
85+
.putAllMetadata(metadata)
86+
.build();
87+
88+
QueuedResource queuedResource =
89+
QueuedResource.newBuilder()
90+
.setName(queuedResourceId)
91+
.setTpu(
92+
QueuedResource.Tpu.newBuilder()
93+
.addNodeSpec(
94+
QueuedResource.Tpu.NodeSpec.newBuilder()
95+
.setParent(parent)
96+
.setNode(node)
97+
.setNodeId(nodeName)
98+
.build())
99+
.build())
100+
.build();
101+
102+
CreateQueuedResourceRequest request =
103+
CreateQueuedResourceRequest.newBuilder()
104+
.setParent(parent)
105+
.setQueuedResourceId(queuedResourceId)
106+
.setQueuedResource(queuedResource)
107+
.build();
108+
109+
QueuedResource response = tpuClient.createQueuedResourceAsync(request).get();
110+
// You can wait until TPU Node is READY,
111+
// and check its status using getTpuVm() from "tpu_vm_get" sample.
112+
System.out.printf("Queued Resource created: %s\n", queuedResourceId);
113+
}
114+
}
115+
}

0 commit comments

Comments
 (0)