Skip to content

Commit af369a3

Browse files
authored
fix: Move ray worker port range start from 20001 to 53001 (#235)
Signed-off-by: Terry Kong <[email protected]>
1 parent 756152c commit af369a3

File tree

3 files changed

+55
-4
lines changed

3 files changed

+55
-4
lines changed

ray.sub

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,11 @@ NODE_MANAGER_PORT=${NODE_MANAGER_PORT:-8077}
2626
DASHBOARD_AGENT_PORT=${DASHBOARD_AGENT_PORT:-52365}
2727
DASHBOARD_AGENT_GRPC_PORT=${DASHBOARD_AGENT_GRPC_PORT:-52366}
2828
METRICS_PORT=${METRICS_PORT:-9002}
29-
# NOTE: Ports start above 20000 since 10001-10257 frequently ran into conflicts
30-
MIN_WORKER_PORT=${MIN_WORKER_PORT:-20001}
31-
MAX_WORKER_PORT=${MAX_WORKER_PORT:-20257}
29+
# On our clusters, the largest port range on an idle worker appeared between 52367-64607
30+
# (not including the other ports set by this script). So this range is chosen to be
31+
# somewhere in the middle
32+
MIN_WORKER_PORT=${MIN_WORKER_PORT:-53001}
33+
MAX_WORKER_PORT=${MAX_WORKER_PORT:-53257}
3234
########################################################
3335

3436
# Defaults to placing uv cache inside the SLURM_SUBMIT_DIR

tools/copyright.sh

100644100755
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
1919
# Move to the project root
2020
cd $SCRIPT_DIR/..
2121
find_files_with_missing_copyright() {
22-
find ./nemo_reinforcer/ ./docs/*.py ./examples/ ./tests/ -type f -name '*.py' | while read path; do
22+
find ./nemo_reinforcer/ ./docs/*.py ./examples/ ./tests/ ./tools/ -type f -name '*.py' | while read path; do
2323
echo -en $path"\t"
2424
head -2 $path | grep -iv 'coding=' | head -1
2525
done \
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import socket
16+
17+
18+
def is_free(port):
19+
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
20+
return s.connect_ex(("localhost", port)) != 0
21+
22+
23+
# Print header
24+
print("Size\tRange")
25+
print("-" * 20)
26+
27+
start = None
28+
for port in range(1024, 65536):
29+
if is_free(port):
30+
if start is None:
31+
start = port
32+
else:
33+
if start is not None:
34+
if start == port - 1:
35+
size = 1
36+
print(f"{size:4d}\t{start}")
37+
else:
38+
size = port - start
39+
print(f"{size:4d}\t{start}-{port - 1}")
40+
start = None
41+
42+
# If it ends on a free range, print it
43+
if start is not None:
44+
if start == 65535:
45+
size = 1
46+
print(f"{size:4d}\t{start}")
47+
else:
48+
size = 65536 - start
49+
print(f"{size:4d}\t{start}-65535")

0 commit comments

Comments
 (0)