Skip to content

Commit 2438fe7

Browse files
committed
set TF_CONFIG for 'chief' clusters; fix multi-gpu allocation
1 parent 9593955 commit 2438fe7

File tree

2 files changed

+4
-4
lines changed

2 files changed

+4
-4
lines changed

tensorflowonspark/TFSparkNode.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -275,7 +275,7 @@ def _mapfn(iter):
275275
cluster_spec[njob] = hosts
276276

277277
# update TF_CONFIG if cluster spec has a 'master' node (i.e. tf.estimator)
278-
if 'master' in cluster_spec:
278+
if 'master' in cluster_spec or 'chief' in cluster_spec:
279279
tf_config = json.dumps({
280280
'cluster': cluster_spec,
281281
'task': {'type': job_name, 'index': task_index},

tensorflowonspark/gpu_info.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -96,9 +96,9 @@ def parse_gpu(gpu_str):
9696
proposed_gpus = free_gpus[:num_gpu]
9797
else:
9898
# ordered by worker index
99-
if worker_index + num_gpu > num_available:
100-
worker_index = worker_index % num_available
101-
proposed_gpus = free_gpus[worker_index:(worker_index + num_gpu)]
99+
if worker_index * num_gpu + num_gpu > num_available:
100+
worker_index = worker_index * num_gpu % num_available
101+
proposed_gpus = free_gpus[worker_index * num_gpu:(worker_index * num_gpu + num_gpu)]
102102
logging.info("Proposed GPUs: {}".format(proposed_gpus))
103103

104104
return ','.join(str(x) for x in proposed_gpus)

0 commit comments

Comments
 (0)