Skip to content

Commit ed47978

Browse files
committed
add file to support multi node
1 parent 2666307 commit ed47978

File tree

3 files changed

+28
-97
lines changed

3 files changed

+28
-97
lines changed

python-tensorflow/Dockerfile.2.4-py36-cuda11.1

Lines changed: 0 additions & 97 deletions
This file was deleted.
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
#!/bin/bash
2+
BAI_MULTINODE_CONFIG_TF=$(/opt/backend.ai/bin/python /opt/container/setup_multinode.py)
3+
if [ -z "$BAI_MULTINODE_CONFIG_TF" ];
4+
then
5+
echo "";
6+
else
7+
echo ${BAI_MULTINODE_CONFIG_TF}
8+
export TF_CONFIG="${BAI_MULTINODE_CONFIG_TF}"
9+
fi
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
import os
2+
import json
3+
4+
if 'BACKENDAI_CLUSTER_HOST' in os.environ: # Start mutli-instance setup.
5+
env = {}
6+
env['cluster'] = {}
7+
env['cluster']['worker'] = []
8+
for container in os.environ['BACKENDAI_CLUSTER_HOSTS'].split(","):
9+
env['cluster']['worker'].append(container + ":2220")
10+
env['task'] = {}
11+
if os.environ['BACKENDAI_CLUSTER_ROLE'] == 'main':
12+
env['task']['type'] = "worker" # Was chief. but recent TF choose first worker as chief.
13+
env['task']["index"] = str(int(os.environ['BACKENDAI_CLUSTER_IDX']) - 1) # Index starts from 0
14+
else:
15+
env['task']['type'] = "worker"
16+
env['task']["index"] = os.environ['BACKENDAI_CLUSTER_IDX']
17+
print(json.dumps(env))
18+
else:
19+
print("")

0 commit comments

Comments
 (0)