File tree Expand file tree Collapse file tree 1 file changed +6
-8
lines changed
Expand file tree Collapse file tree 1 file changed +6
-8
lines changed Original file line number Diff line number Diff line change @@ -7,8 +7,8 @@ function getfreeport()
77{
88 CHECK=" do while"
99 while [[ ! -z $CHECK ]]; do
10- port=$(( ( RANDOM % 60000 ) + 1025 ))
11- CHECK=$( sudo netstat -ap | grep $port )
10+ port=$(( ( RANDOM % 40000 ) + 20000 ))
11+ CHECK=$( netstat -a | grep $port )
1212 done
1313 echo $port
1414}
@@ -79,7 +79,7 @@ if [ -z "$CUDA_VISIBLE_DEVICES" ]
7979then
8080 num_gpu=0
8181else
82- num_gpu=$CUDA_VISIBLE_DEVICES
82+ num_gpu=` echo $CUDA_VISIBLE_DEVICES | tr , ' ' | wc -w `
8383fi
8484
8585# Assumption only one head node and more than one
103103
104104num_cpu_for_head=${associative[$head_node]}
105105
106- command_launch=" blaunch -z ${hosts[0]} ray start --head --port $port --dashboard-port $dashboard_port --object-store-memory $object_store_mem "
107-
106+ command_launch=" blaunch -z ${hosts[0]} ray start --head --port $port --dashboard-port $dashboard_port --num-cpus $num_cpu_for_head --num-gpus $num_gpu --object-store-memory $object_store_mem "
108107
109108$command_launch &
110109
131130
132131 sleep 10
133132 num_cpu=${associative[$host]}
134- # command_for_worker="blaunch -z $host ray start --address $head_node:$port --num-cpus $num_cpu --num-gpus $num_gpu --object-store-memory $object_store_mem"
135- command_for_worker=" blaunch -z $host ray start --address $head_node :$port --object-store-memory $object_store_mem "
133+ command_for_worker=" blaunch -z $host ray start --address $head_node :$port --num-cpus $num_cpu --num-gpus $num_gpu --object-store-memory $object_store_mem "
136134
137135
138136 $command_for_worker &
157155 echo " Done"
158156 echo " Shutting down the Job"
159157 bkill $LSB_JOBID
160- fi
158+ fi
You can’t perform that action at this time.
0 commit comments