Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions src/forge/controller/provisioner.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,15 +177,18 @@ async def get_proc_mesh(
# We can't currently do this because HostMesh only supports single
# proc_mesh creation at the moment. This will be possible once
# we have "proper HostMesh support".
def bootstrap(gpu_ids: int):
def bootstrap(gpu_ids: list[str]):
# This works for single host, needed for vLLM currently.
import os

os.environ["CUDA_VISIBLE_DEVICES"] = ",".join(gpu_ids)
os.environ["MASTER_ADDR"] = socket.gethostname()
# Multiple actors trying to call _get_port doesn't work
# os.environ["MASTER_PORT"] = _get_port()
os.environ["MASTER_PORT"] = "12345"

# Setting the last digit to the first GPU id allows us to i.e.
# create multiple vLLM instances on the same local host.
os.environ["MASTER_PORT"] = f"1234{gpu_ids[0]}"
os.environ["HYPERACTOR_MESSAGE_DELIVERY_TIMEOUT_SECS"] = "600"
os.environ["HYPERACTOR_CODE_MAX_FRAME_LENGTH"] = "1073741824"

Expand Down
Loading