Skip to content

Commit 5f8e04a

Browse files
Joanna Gryczgryczj
authored andcommitted
feat: add tpu_vm_create_topology
1 parent 0e00257 commit 5f8e04a

File tree

8 files changed

+301
-0
lines changed

8 files changed

+301
-0
lines changed

.github/workflows/tpu.yaml

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
# Copyright 2024 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
name: tpu
16+
on:
17+
push:
18+
branches:
19+
- main
20+
paths:
21+
- 'tpu/**'
22+
- '.github/workflows/tpu.yaml'
23+
- '.github/workflows/test.yaml'
24+
pull_request:
25+
types:
26+
- opened
27+
- reopened
28+
- synchronize
29+
- labeled
30+
paths:
31+
- 'tpu/**'
32+
- '.github/workflows/tpu.yaml'
33+
- '.github/workflows/test.yaml'
34+
schedule:
35+
- cron: '0 0 * * 0'
36+
jobs:
37+
test:
38+
# Ref: https://github.com/google-github-actions/auth#usage
39+
permissions:
40+
contents: 'read'
41+
id-token: 'write'
42+
if: github.event.action != 'labeled' || github.event.label.name == 'actions:force-run'
43+
uses: ./.github/workflows/test.yaml
44+
with:
45+
name: 'tpu'
46+
path: 'tpu'
47+
flakybot:
48+
# Ref: https://github.com/google-github-actions/auth#usage
49+
permissions:
50+
contents: 'read'
51+
id-token: 'write'
52+
if: github.event_name == 'schedule' && always() # always() submits logs even if tests fail
53+
uses: ./.github/workflows/flakybot.yaml
54+
needs: [test]

.github/workflows/utils/workflows.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,7 @@
9090
"speech",
9191
"talent",
9292
"texttospeech",
93+
"tpu",
9394
"translate",
9495
"video-intelligence",
9596
"vision/productSearch",

CODEOWNERS

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ recaptcha_enterprise @GoogleCloudPlatform/dee-infra @GoogleCloudPlatform/nodejs-
2424
recaptcha_enterprise/demosite @GoogleCloudPlatform/dee-infra @GoogleCloudPlatform/recaptcha-customer-obsession-reviewers @GoogleCloudPlatform/nodejs-samples-reviewers @GoogleCloudPlatform/cloud-samples-reviewers
2525
secret-manager @GoogleCloudPlatform/dee-infra @GoogleCloudPlatform/nodejs-samples-reviewers @GoogleCloudPlatform/cloud-samples-reviewers @GoogleCloudPlatform/cloud-secrets-team
2626
service-directory @GoogleCloudPlatform/dee-infra @GoogleCloudPlatform/nodejs-samples-reviewers @GoogleCloudPlatform/cloud-samples-reviewers
27+
tpu @GoogleCloudPlatform/dee-infra @GoogleCloudPlatform/nodejs-samples-reviewers @GoogleCloudPlatform/cloud-samples-reviewers
2728
webrisk @GoogleCloudPlatform/dee-infra @GoogleCloudPlatform/nodejs-samples-reviewers @GoogleCloudPlatform/cloud-samples-reviewers
2829

2930
# SoDa teams

tpu/package.json

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
{
2+
"name": "nodejs-docs-samples-tpu",
3+
"license": "Apache-2.0",
4+
"author": "Google Inc.",
5+
"engines": {
6+
"node": ">=16.0.0"
7+
},
8+
"repository": "googleapis/nodejs-tpu",
9+
"private": true,
10+
"files": [
11+
"*.js"
12+
],
13+
"scripts": {
14+
"test": "c8 mocha -p -j 2 test --timeout 1200000"
15+
},
16+
"dependencies": {
17+
"@google-cloud/tpu": "^3.5.0"
18+
},
19+
"devDependencies": {
20+
"c8": "^10.0.0",
21+
"mocha": "^10.0.0"
22+
}
23+
}

tpu/test/.eslintrc

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
---
2+
env:
3+
mocha: true

tpu/test/util.js

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
/*
2+
* Copyright 2024 Google LLC
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* https://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
const {TpuClient} = require('@google-cloud/tpu').v2;
18+
19+
const tpuClient = new TpuClient();
20+
21+
/**
22+
* Get nodes created more than one hour ago.
23+
*/
24+
async function getStaleNodes(prefix, zone) {
25+
const projectId = await tpuClient.getProjectId();
26+
const result = [];
27+
const currentDate = new Date();
28+
currentDate.setHours(currentDate.getHours() - 3);
29+
30+
const listNodesAsyncRequest = tpuClient.listNodesAsync({
31+
parent: `projects/${projectId}/locations/${zone}`,
32+
});
33+
34+
for await (const tpuObject of listNodesAsyncRequest) {
35+
const name = tpuObject.name.split('/').slice(-1)[0];
36+
const data = new Date(tpuObject.createTime.nanos / 1000000);
37+
if (data < currentDate && name.startsWith(prefix)) {
38+
result.push({
39+
nodeName: name,
40+
timestamp: tpuObject.createTime,
41+
});
42+
}
43+
}
44+
45+
return result;
46+
}
47+
48+
async function deleteNode(zone, nodeName) {
49+
const projectId = await tpuClient.getProjectId();
50+
51+
const request = {
52+
name: `projects/${projectId}/locations/${zone}/nodes/${nodeName}`,
53+
};
54+
55+
console.log('Deleting node: ', nodeName);
56+
57+
const [operation] = await tpuClient.deleteNode(request);
58+
59+
// Wait for the delete operation to complete.
60+
await operation.promise();
61+
}
62+
63+
module.exports = {
64+
getStaleNodes,
65+
deleteNode,
66+
};

tpu/test/vmTopology.test.js

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
/*
2+
* Copyright 2024 Google LLC
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* https://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
'use strict';
18+
19+
const path = require('path');
20+
const assert = require('node:assert/strict');
21+
const {before, after, describe, it} = require('mocha');
22+
const cp = require('child_process');
23+
const {getStaleNodes, deleteNode} = require('./util');
24+
25+
const execSync = cmd => cp.execSync(cmd, {encoding: 'utf-8'});
26+
const cwd = path.join(__dirname, '..');
27+
28+
describe('Compute tpu with topology', async () => {
29+
const nodePrefix = 'topology-node-name-2a2b3c';
30+
const nodeName = `${nodePrefix}${Math.floor(Math.random() * 1000 + 1)}`;
31+
const zone = 'europe-west4-a';
32+
const tpuSoftwareVersion = 'tpu-vm-tf-2.17.0-pod-pjrt';
33+
34+
before(async () => {
35+
// Cleanup resources
36+
const nodes = await getStaleNodes(nodePrefix, zone);
37+
await Promise.all(nodes.map(node => deleteNode(zone, node.nodeName)));
38+
});
39+
40+
after(async () => {
41+
// Delete node
42+
await deleteNode(zone, nodeName);
43+
});
44+
45+
it('should create a new tpu', () => {
46+
const response = execSync(
47+
`node ./vmCreateTopology.js ${nodeName} ${zone} ${tpuSoftwareVersion}`,
48+
{
49+
cwd,
50+
}
51+
);
52+
assert(response.includes(`TPU VM: ${nodeName} created.`));
53+
});
54+
});

tpu/vmCreateTopology.js

Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
/*
2+
* Copyright 2024 Google LLC
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* https://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
'use strict';
18+
19+
async function main(nodeName, zone, tpuSoftwareVersion) {
20+
// [START tpu_vm_create_topology]
21+
// Import the TPU library
22+
const {TpuClient} = require('@google-cloud/tpu').v2;
23+
const {Node, NetworkConfig, AcceleratorConfig} =
24+
require('@google-cloud/tpu').protos.google.cloud.tpu.v2;
25+
26+
// Instantiate a tpuClient
27+
const tpuClient = new TpuClient();
28+
29+
/**
30+
* TODO(developer): Update/uncomment these variables before running the sample.
31+
*/
32+
// Project ID or project number of the Google Cloud project you want to create a node.
33+
const projectId = await tpuClient.getProjectId();
34+
35+
// The name of the network you want the TPU node to connect to. The network should be assigned to your project.
36+
const networkName = 'compute-tpu-network';
37+
38+
// The region of the network, that you want the TPU node to connect to.
39+
const region = 'europe-west4';
40+
41+
// The name for your TPU.
42+
// nodeName = 'node-name-1';
43+
44+
// The zone in which to create the TPU.
45+
// For more information about supported TPU types for specific zones,
46+
// see https://cloud.google.com/tpu/docs/regions-zones
47+
// zone = 'europe-west4-a';
48+
49+
// Software version that specifies the version of the TPU runtime to install. For more information,
50+
// see https://cloud.google.com/tpu/docs/runtimes
51+
// tpuSoftwareVersion = 'tpu-vm-tf-2.17.0-pod-pjrt';
52+
53+
// The version of the Cloud TPU you want to create.
54+
// Available options: TYPE_UNSPECIFIED = 0, V2 = 2, V3 = 4, V4 = 7
55+
const tpuVersion = AcceleratorConfig.Type.V2;
56+
57+
// The physical topology of your TPU slice.
58+
// For more information about topology for each TPU version,
59+
// see https://cloud.google.com/tpu/docs/system-architecture-tpu-vm#versions.
60+
const topology = '2x2';
61+
62+
async function callCreateTpuVMTopology() {
63+
// Create a node
64+
const node = new Node({
65+
name: nodeName,
66+
zone,
67+
// acceleratorType: tpuType,
68+
runtimeVersion: tpuSoftwareVersion,
69+
// Define network
70+
networkConfig: new NetworkConfig({
71+
enableExternalIps: true,
72+
network: `projects/${projectId}/global/networks/${networkName}`,
73+
subnetwork: `projects/${projectId}/regions/${region}/subnetworks/${networkName}`,
74+
}),
75+
acceleratorConfig: new AcceleratorConfig({
76+
type: tpuVersion,
77+
topology: topology,
78+
}),
79+
});
80+
81+
const parent = `projects/${projectId}/locations/${zone}`;
82+
const request = {parent, node, nodeId: nodeName};
83+
84+
const [operation] = await tpuClient.createNode(request);
85+
86+
// Wait for the create operation to complete.
87+
const [response] = await operation.promise();
88+
89+
console.log(JSON.stringify(response));
90+
console.log(`TPU VM: ${nodeName} created.`);
91+
}
92+
await callCreateTpuVMTopology();
93+
// [END tpu_vm_create_topology]
94+
}
95+
96+
main(...process.argv.slice(2)).catch(err => {
97+
console.error(err);
98+
process.exitCode = 1;
99+
});

0 commit comments

Comments
 (0)