Skip to content

Commit cbaae89

Browse files
Merge branch 'pettingzoo_shared_vf' of https://github.com/MatthewCWeston/rllib_current_attention into pettingzoo_shared_vf
2 parents b936f52 + e7b02c4 commit cbaae89

File tree

240 files changed

+58967
-6076
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

240 files changed

+58967
-6076
lines changed

ci/build/build-ray-docker.sh

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,15 +14,14 @@ CPU_TMP="$(mktemp -d)"
1414

1515
cp -r .whl "${CPU_TMP}/.whl"
1616
cp docker/ray/Dockerfile "${CPU_TMP}/Dockerfile"
17-
cp python/requirements_compiled.txt "${CPU_TMP}/."
17+
cp python/"${CONSTRAINTS_FILE}" "${CPU_TMP}/requirements_compiled.txt"
1818

1919
# Build the image.
2020
cd "${CPU_TMP}"
2121
tar --mtime="UTC 2020-01-01" -c -f - . \
2222
| docker build --progress=plain \
2323
--build-arg FULL_BASE_IMAGE="$SOURCE_IMAGE" \
2424
--build-arg WHEEL_PATH=".whl/${WHEEL_NAME}" \
25-
--build-arg CONSTRAINTS_FILE="$CONSTRAINTS_FILE" \
2625
--label "io.ray.ray-version=$RAY_VERSION" \
2726
--label "io.ray.ray-commit=$RAY_COMMIT" \
2827
-t "$DEST_IMAGE" -f Dockerfile -

ci/lint/pydoclint-baseline.txt

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -192,8 +192,6 @@ python/ray/_private/services.py
192192
DOC101: Function `start_raylet`: Docstring contains fewer arguments than in function signature.
193193
DOC107: Function `start_raylet`: The option `--arg-type-hints-in-signature` is `True` but not all args in the signature have type hints
194194
DOC103: Function `start_raylet`: Docstring arguments are different from function arguments. (Or could be other formatting issues: https://jsh9.github.io/pydoclint/violation_codes.html#notes-on-doc103 ). Arguments in the function signature but not in the docstring: [cluster_id: str, socket_to_use: Optional[int]].
195-
DOC101: Function `determine_plasma_store_config`: Docstring contains fewer arguments than in function signature.
196-
DOC103: Function `determine_plasma_store_config`: Docstring arguments are different from function arguments. (Or could be other formatting issues: https://jsh9.github.io/pydoclint/violation_codes.html#notes-on-doc103 ). Arguments in the function signature but not in the docstring: [temp_dir: str].
197195
DOC101: Function `start_monitor`: Docstring contains fewer arguments than in function signature.
198196
DOC103: Function `start_monitor`: Docstring arguments are different from function arguments. (Or could be other formatting issues: https://jsh9.github.io/pydoclint/violation_codes.html#notes-on-doc103 ). Arguments in the function signature but not in the docstring: [autoscaler_v2: bool, fate_share: Optional[bool]].
199197
DOC101: Function `start_ray_client_server`: Docstring contains fewer arguments than in function signature.

ci/raydepsets/cli.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,15 @@ def __init__(
9696
check: Optional[bool] = False,
9797
build_all_configs: Optional[bool] = False,
9898
):
99+
"""Initialize the dependency set manager.
100+
101+
Args:
102+
config_path: Path to the depsets config file.
103+
workspace_dir: Path to the workspace directory.
104+
uv_cache_dir: Directory to cache uv dependencies.
105+
check: Whether to check if lock files are up to date.
106+
build_all_configs: Whether to build all configs or just the specified one.
107+
"""
99108
self.workspace = Workspace(workspace_dir)
100109
self.config = self.workspace.load_configs(config_path)
101110
self.config_name = os.path.basename(config_path)
@@ -109,6 +118,7 @@ def __init__(
109118
self.copy_to_temp_dir()
110119

111120
def get_output_paths(self) -> List[Path]:
121+
"""Get all output paths for depset nodes in topological order."""
112122
output_paths = []
113123
for node in topological_sort(self.build_graph):
114124
if self.build_graph.nodes[node]["node_type"] == "depset":
@@ -126,6 +136,7 @@ def copy_to_temp_dir(self):
126136
)
127137

128138
def get_diffs(self) -> List[str]:
139+
"""Compare current lock files with previously saved copies and return unified diffs."""
129140
diffs = []
130141
for output_path in self.output_paths:
131142
new_lock_file_fp, old_lock_file_fp = self.get_source_and_dest(output_path)
@@ -142,6 +153,7 @@ def get_diffs(self) -> List[str]:
142153
return diffs
143154

144155
def diff_lock_files(self):
156+
"""Check if lock files are up to date and raise an error if not."""
145157
diffs = self.get_diffs()
146158
if len(diffs) > 0:
147159
raise RuntimeError(
@@ -151,9 +163,11 @@ def diff_lock_files(self):
151163
click.echo("Lock files are up to date.")
152164

153165
def get_source_and_dest(self, output_path: str) -> tuple[Path, Path]:
166+
"""Get the source workspace path and temporary destination path for a lock file."""
154167
return (self.get_path(output_path), (Path(self.temp_dir) / output_path))
155168

156169
def _build(self, build_all_configs: Optional[bool] = False):
170+
"""Build the dependency graph from config depsets."""
157171
for depset in self.config.depsets:
158172
if depset.operation == "compile":
159173
self.build_graph.add_node(
@@ -201,11 +215,13 @@ def _build(self, build_all_configs: Optional[bool] = False):
201215
self.subgraph_config_nodes()
202216

203217
def subgraph_dependency_nodes(self, depset_name: str):
218+
"""Reduce the build graph to only include the specified depset and its ancestors."""
204219
dependency_nodes = networkx_ancestors(self.build_graph, depset_name)
205220
nodes = dependency_nodes | {depset_name}
206221
self.build_graph = self.build_graph.subgraph(nodes).copy()
207222

208223
def subgraph_config_nodes(self):
224+
"""Reduce the build graph to nodes matching the current config and their ancestors."""
209225
# Get all nodes that have the target config name
210226
config_nodes = [
211227
node
@@ -224,6 +240,7 @@ def subgraph_config_nodes(self):
224240
self.build_graph = self.build_graph.subgraph(nodes).copy()
225241

226242
def execute(self, single_depset_name: Optional[str] = None):
243+
"""Execute all depsets in topological order, optionally limited to a single depset."""
227244
if single_depset_name:
228245
# check if the depset exists
229246
_get_depset(self.config.depsets, single_depset_name)
@@ -240,6 +257,7 @@ def execute(self, single_depset_name: Optional[str] = None):
240257
def exec_uv_cmd(
241258
self, cmd: str, args: List[str], stdin: Optional[bytes] = None
242259
) -> str:
260+
"""Execute a uv pip command with the given arguments."""
243261
cmd = [self._uv_binary, "pip", cmd, *args]
244262
click.echo(f"Executing command: {' '.join(cmd)}")
245263
status = subprocess.run(
@@ -252,6 +270,7 @@ def exec_uv_cmd(
252270
return status.stdout.decode("utf-8")
253271

254272
def execute_pre_hook(self, pre_hook: str):
273+
"""Execute a pre-hook shell command."""
255274
status = subprocess.run(
256275
shlex.split(pre_hook),
257276
cwd=self.workspace.dir,
@@ -265,6 +284,7 @@ def execute_pre_hook(self, pre_hook: str):
265284
click.echo(f"Executed pre_hook {pre_hook} successfully")
266285

267286
def execute_depset(self, depset: Depset):
287+
"""Execute a single depset based on its operation type (compile, subset, or expand)."""
268288
if depset.operation == "compile":
269289
self.compile(
270290
constraints=depset.constraints,
@@ -389,15 +409,18 @@ def expand(
389409
)
390410

391411
def read_lock_file(self, file_path: Path) -> List[str]:
412+
"""Read and return the contents of a lock file as a list of lines."""
392413
if not file_path.exists():
393414
raise RuntimeError(f"Lock file {file_path} does not exist")
394415
with open(file_path, "r") as f:
395416
return f.readlines()
396417

397418
def get_path(self, path: str) -> Path:
419+
"""Convert a relative path to an absolute path within the workspace."""
398420
return Path(self.workspace.dir) / path
399421

400422
def check_subset_exists(self, source_depset: Depset, requirements: List[str]):
423+
"""Verify that all requirements exist in the source depset."""
401424
for req in requirements:
402425
if req not in self.get_expanded_depset_requirements(source_depset.name, []):
403426
raise RuntimeError(
@@ -424,15 +447,18 @@ def get_expanded_depset_requirements(
424447
return list(set(requirements_list))
425448

426449
def cleanup(self):
450+
"""Remove the temporary directory used for lock file comparisons."""
427451
if self.temp_dir:
428452
shutil.rmtree(self.temp_dir)
429453

430454

431455
def _get_bytes(packages: List[str]) -> bytes:
456+
"""Convert a list of package names to newline-separated UTF-8 bytes."""
432457
return ("\n".join(packages) + "\n").encode("utf-8")
433458

434459

435460
def _get_depset(depsets: List[Depset], name: str) -> Depset:
461+
"""Find and return a depset by name from a list of depsets."""
436462
for depset in depsets:
437463
if depset.name == name:
438464
return depset
@@ -452,6 +478,7 @@ def _flatten_flags(flags: List[str]) -> List[str]:
452478

453479

454480
def _override_uv_flags(flags: List[str], args: List[str]) -> List[str]:
481+
"""Override existing uv flags in args with new values from flags."""
455482
flag_names = {f.split()[0] for f in flags if f.startswith("--")}
456483
new_args = []
457484
skip_next = False
@@ -468,6 +495,7 @@ def _override_uv_flags(flags: List[str], args: List[str]) -> List[str]:
468495

469496

470497
def _uv_binary():
498+
"""Get the path to the uv binary for the current platform."""
471499
r = runfiles.Create()
472500
system = platform.system()
473501
processor = platform.processor()

ci/raydepsets/configs/rayimg.depsets.yaml

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,16 @@ build_arg_sets:
88
py312:
99
PYTHON_VERSION: "3.12"
1010
PYTHON_SHORT: "312"
11+
py313:
12+
PYTHON_VERSION: "3.13"
13+
PYTHON_SHORT: "313"
1114

1215
depsets:
1316
- name: ray_img_depset_${PYTHON_SHORT}
1417
requirements:
15-
- release/ray_release/byod/ray_dev.in
18+
- python/deplocks/ray_img/ray_dev.in
1619
constraints:
17-
- /tmp/ray-deps/requirements_compiled.txt
20+
- /tmp/ray-deps/requirements_compiled_py${PYTHON_VERSION}.txt
1821
output: python/deplocks/ray_img/ray_img_py${PYTHON_SHORT}.lock
1922
operation: compile
2023
append_flags:
@@ -25,9 +28,10 @@ depsets:
2528
- py310
2629
- py311
2730
- py312
31+
- py313
2832
pre_hooks:
2933
- ci/raydepsets/pre_hooks/build-placeholder-wheel.sh
30-
- ci/raydepsets/pre_hooks/remove-compiled-headers.sh
34+
- ci/raydepsets/pre_hooks/remove-compiled-headers.sh ${PYTHON_VERSION}
3135

3236
- name: ray_base_extra_testdeps_${PYTHON_SHORT}
3337
operation: expand
@@ -36,7 +40,7 @@ depsets:
3640
- docker/base-deps/requirements.in
3741
- docker/base-extra/requirements.in
3842
constraints:
39-
- /tmp/ray-deps/requirements_compiled.txt
43+
- /tmp/ray-deps/requirements_compiled_py${PYTHON_VERSION}.txt
4044
depsets:
4145
- ray_img_depset_${PYTHON_SHORT}
4246
output: release/ray_release/byod/ray_base_extra_testdeps_py${PYTHON_VERSION}.lock
@@ -48,6 +52,7 @@ depsets:
4852
- py310
4953
- py311
5054
- py312
55+
- py313
5156

5257
- name: ray_base_extra_testdeps_cuda_${PYTHON_SHORT}
5358
operation: expand
@@ -56,7 +61,7 @@ depsets:
5661
- docker/base-deps/requirements.in
5762
- docker/base-extra/requirements.in
5863
constraints:
59-
- /tmp/ray-deps/requirements_compiled.txt
64+
- /tmp/ray-deps/requirements_compiled_py${PYTHON_VERSION}.txt
6065
depsets:
6166
- ray_img_depset_${PYTHON_SHORT}
6267
output: release/ray_release/byod/ray_base_extra_testdeps_cuda_py${PYTHON_VERSION}.lock
@@ -69,6 +74,7 @@ depsets:
6974
- py310
7075
- py311
7176
- py312
77+
- py313
7278

7379
- name: ray_base_extra_testdeps_gpu_${PYTHON_SHORT}
7480
operation: expand
@@ -77,7 +83,7 @@ depsets:
7783
- docker/base-deps/requirements.in
7884
- docker/base-extra/requirements.in
7985
constraints:
80-
- /tmp/ray-deps/requirements_compiled.txt
86+
- /tmp/ray-deps/requirements_compiled_py${PYTHON_VERSION}.txt
8187
depsets:
8288
- ray_img_depset_${PYTHON_SHORT}
8389
output: release/ray_release/byod/ray_base_extra_testdeps_gpu_py${PYTHON_VERSION}.lock
@@ -96,7 +102,7 @@ depsets:
96102
- docker/base-deps/requirements.in
97103
- docker/base-extra/requirements.in
98104
constraints:
99-
- /tmp/ray-deps/requirements_compiled.txt
105+
- /tmp/ray-deps/requirements_compiled_py${PYTHON_VERSION}.txt
100106
depsets:
101107
- ray_img_depset_${PYTHON_SHORT}
102108
output: release/ray_release/byod/ray_ml_base_extra_testdeps_py${PYTHON_VERSION}.lock
@@ -114,7 +120,7 @@ depsets:
114120
- docker/base-deps/requirements.in
115121
- docker/base-extra/requirements.in
116122
constraints:
117-
- /tmp/ray-deps/requirements_compiled.txt
123+
- /tmp/ray-deps/requirements_compiled_py${PYTHON_VERSION}.txt
118124
depsets:
119125
- ray_img_depset_${PYTHON_SHORT}
120126
output: release/ray_release/byod/ray_ml_base_extra_testdeps_cuda_py${PYTHON_VERSION}.lock
@@ -142,6 +148,7 @@ depsets:
142148
- py310
143149
- py311
144150
- py312
151+
- py313
145152
include_setuptools: true
146153

147154
# Second layer of the ray release images. Contains anyscale cli deps (and others)
@@ -161,3 +168,4 @@ depsets:
161168
- py310
162169
- py311
163170
- py312
171+
- py313

ci/raydepsets/pre_hooks/remove-compiled-headers.sh

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,17 @@
22

33
set -euo pipefail
44

5+
PYTHON_VERSION=${1:-}
6+
7+
if [[ -z "${PYTHON_VERSION}" ]]; then
8+
FILENAME="requirements_compiled.txt"
9+
else
10+
FILENAME="requirements_compiled_py${PYTHON_VERSION}.txt"
11+
fi
12+
513
mkdir -p /tmp/ray-deps
614

715
# Remove the GPU constraints
8-
cp python/requirements_compiled.txt /tmp/ray-deps/requirements_compiled.txt
9-
sed -e '/^--extra-index-url /d' -e '/^--find-links /d' /tmp/ray-deps/requirements_compiled.txt > /tmp/ray-deps/requirements_compiled.txt.tmp
10-
mv /tmp/ray-deps/requirements_compiled.txt.tmp /tmp/ray-deps/requirements_compiled.txt
16+
cp "python/${FILENAME}" "/tmp/ray-deps/${FILENAME}"
17+
sed -e '/^--extra-index-url /d' -e '/^--find-links /d' "/tmp/ray-deps/${FILENAME}" > "/tmp/ray-deps/${FILENAME}.tmp"
18+
mv "/tmp/ray-deps/${FILENAME}.tmp" "/tmp/ray-deps/${FILENAME}"

cpp/src/ray/runtime/object/native_object_store.cc

Lines changed: 29 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -76,14 +76,37 @@ void NativeObjectStore::CheckException(const std::string &meta_str,
7676
throw RayWorkerException(std::move(data_str));
7777
} else if (meta_str == std::to_string(ray::rpc::ErrorType::ACTOR_DIED)) {
7878
throw RayActorException(std::move(data_str));
79-
} else if (meta_str == std::to_string(ray::rpc::ErrorType::OBJECT_UNRECONSTRUCTABLE) ||
80-
meta_str == std::to_string(ray::rpc::ErrorType::OBJECT_LOST) ||
81-
meta_str == std::to_string(ray::rpc::ErrorType::OWNER_DIED) ||
82-
meta_str == std::to_string(ray::rpc::ErrorType::OBJECT_DELETED)) {
83-
// TODO: Differentiate object errors.
84-
throw UnreconstructableException(std::move(data_str));
8579
} else if (meta_str == std::to_string(ray::rpc::ErrorType::TASK_EXECUTION_EXCEPTION)) {
8680
throw RayTaskException(std::move(data_str));
81+
} else if (meta_str == std::to_string(ray::rpc::ErrorType::OBJECT_LOST) ||
82+
meta_str == std::to_string(ray::rpc::ErrorType::OWNER_DIED) ||
83+
meta_str == std::to_string(ray::rpc::ErrorType::OBJECT_DELETED) ||
84+
meta_str ==
85+
std::to_string(ray::rpc::ErrorType::OBJECT_UNRECONSTRUCTABLE_PUT) ||
86+
meta_str ==
87+
std::to_string(
88+
ray::rpc::ErrorType::OBJECT_UNRECONSTRUCTABLE_RETRIES_DISABLED) ||
89+
meta_str ==
90+
std::to_string(
91+
ray::rpc::ErrorType::OBJECT_UNRECONSTRUCTABLE_LINEAGE_EVICTED) ||
92+
meta_str ==
93+
std::to_string(ray::rpc::ErrorType::
94+
OBJECT_UNRECONSTRUCTABLE_MAX_ATTEMPTS_EXCEEDED) ||
95+
meta_str ==
96+
std::to_string(ray::rpc::ErrorType::OBJECT_UNRECONSTRUCTABLE_BORROWED) ||
97+
meta_str == std::to_string(
98+
ray::rpc::ErrorType::OBJECT_UNRECONSTRUCTABLE_LOCAL_MODE) ||
99+
meta_str ==
100+
std::to_string(
101+
ray::rpc::ErrorType::OBJECT_UNRECONSTRUCTABLE_REF_NOT_FOUND) ||
102+
meta_str ==
103+
std::to_string(
104+
ray::rpc::ErrorType::OBJECT_UNRECONSTRUCTABLE_TASK_CANCELLED) ||
105+
meta_str ==
106+
std::to_string(
107+
ray::rpc::ErrorType::OBJECT_UNRECONSTRUCTABLE_LINEAGE_DISABLED)) {
108+
// TODO: Differentiate object error
109+
throw UnreconstructableException(std::move(data_str));
87110
}
88111
}
89112

cpp/src/ray/util/process_helper.cc

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -114,16 +114,12 @@ void ProcessHelper::RayStart(CoreWorkerOptions::TaskExecutionCallback callback)
114114
ConfigInternal::Instance().plasma_store_socket_name =
115115
node_info.object_store_socket_name();
116116
ConfigInternal::Instance().node_manager_port = node_info.node_manager_port();
117+
ConfigInternal::Instance().UpdateSessionDir(node_info.session_dir());
117118
}
118119
RAY_CHECK(!ConfigInternal::Instance().raylet_socket_name.empty());
119120
RAY_CHECK(!ConfigInternal::Instance().plasma_store_socket_name.empty());
120121
RAY_CHECK(ConfigInternal::Instance().node_manager_port > 0);
121122

122-
if (ConfigInternal::Instance().worker_type == WorkerType::DRIVER) {
123-
auto session_dir = *global_state_accessor->GetInternalKV("session", "session_dir");
124-
ConfigInternal::Instance().UpdateSessionDir(session_dir);
125-
}
126-
127123
gcs::GcsClientOptions gcs_options =
128124
gcs::GcsClientOptions(bootstrap_ip,
129125
bootstrap_port,

0 commit comments

Comments
 (0)