Skip to content

Commit c63630a

Browse files
authored
Merge pull request #9 from takaomoriyama/main
Avoid selecting port number less than 20000
2 parents eb42a12 + 80adaa6 commit c63630a

File tree

1 file changed

+6
-8
lines changed

1 file changed

+6
-8
lines changed

ray_launch_cluster.sh

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,8 @@ function getfreeport()
77
{
88
CHECK="do while"
99
while [[ ! -z $CHECK ]]; do
10-
port=$(( ( RANDOM % 60000 ) + 1025 ))
11-
CHECK=$(sudo netstat -ap | grep $port)
10+
port=$(( ( RANDOM % 40000 ) + 20000 ))
11+
CHECK=$(netstat -a | grep $port)
1212
done
1313
echo $port
1414
}
@@ -79,7 +79,7 @@ if [ -z "$CUDA_VISIBLE_DEVICES" ]
7979
then
8080
num_gpu=0
8181
else
82-
num_gpu=$CUDA_VISIBLE_DEVICES
82+
num_gpu=`echo $CUDA_VISIBLE_DEVICES | tr , ' ' | wc -w`
8383
fi
8484

8585
#Assumption only one head node and more than one
@@ -103,8 +103,7 @@ fi
103103

104104
num_cpu_for_head=${associative[$head_node]}
105105

106-
command_launch="blaunch -z ${hosts[0]} ray start --head --port $port --dashboard-port $dashboard_port --object-store-memory $object_store_mem"
107-
106+
command_launch="blaunch -z ${hosts[0]} ray start --head --port $port --dashboard-port $dashboard_port --num-cpus $num_cpu_for_head --num-gpus $num_gpu --object-store-memory $object_store_mem"
108107

109108
$command_launch &
110109

@@ -131,8 +130,7 @@ do
131130

132131
sleep 10
133132
num_cpu=${associative[$host]}
134-
#command_for_worker="blaunch -z $host ray start --address $head_node:$port --num-cpus $num_cpu --num-gpus $num_gpu --object-store-memory $object_store_mem"
135-
command_for_worker="blaunch -z $host ray start --address $head_node:$port --object-store-memory $object_store_mem"
133+
command_for_worker="blaunch -z $host ray start --address $head_node:$port --num-cpus $num_cpu --num-gpus $num_gpu --object-store-memory $object_store_mem"
136134

137135

138136
$command_for_worker &
@@ -157,4 +155,4 @@ else
157155
echo "Done"
158156
echo "Shutting down the Job"
159157
bkill $LSB_JOBID
160-
fi
158+
fi

0 commit comments

Comments
 (0)