We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 33c0e0b commit 9d1006eCopy full SHA for 9d1006e
nemo_run/run/torchx_backend/components/torchrun.py
@@ -119,7 +119,7 @@ def torchrun(
119
master_addr = os.environ["MASTER_ADDR"]
120
master_port = os.environ["MASTER_PORT"]
121
rdzv_endpoint = torchx_dist._noquote(master_addr + ":" + master_port)
122
- random.seed(rdzv_id)
+ random.seed(rdzv_endpoint)
123
else:
124
rdzv_endpoint = torchx_dist._noquote(f"$${ExecutorMacros.HEAD_NODE_IP_VAR}:{rdzv_port}")
125
0 commit comments