Skip to content
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions src/forge/controller/provisioner.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,15 +177,18 @@ async def get_proc_mesh(
# We can't currently do this because HostMesh only supports single
# proc_mesh creation at the moment. This will be possible once
# we have "proper HostMesh support".
def bootstrap(gpu_ids: int):
def bootstrap(gpu_ids: list[int]):
# This works for single host, needed for vLLM currently.
import os

os.environ["CUDA_VISIBLE_DEVICES"] = ",".join(gpu_ids)
os.environ["MASTER_ADDR"] = socket.gethostname()
# Multiple actors trying to call _get_port doesn't work
# os.environ["MASTER_PORT"] = _get_port()
os.environ["MASTER_PORT"] = "12345"

# Setting the last digit to the first GPU id allows us to i.e.
# create multiple vLLM instances on the same local host.
os.environ["MASTER_PORT"] = f"1234{gpu_ids[0]}"
os.environ["HYPERACTOR_MESSAGE_DELIVERY_TIMEOUT_SECS"] = "600"
os.environ["HYPERACTOR_CODE_MAX_FRAME_LENGTH"] = "1073741824"

Expand Down
Loading