Skip to content

Commit 66d1feb

Browse files
douglasdavislgraypre-commit-ci[bot]
authored
feat(draft): add report= argument for uproot.dask; trigger report collection (take 2!) (#1058)
* add report= to trigger dask-awkward creating graceful-failure-report * add mock_empty * whoops * fix mock_empty * backend is passed in upstream; use OSError * Update src/uproot/_dask.py Co-authored-by: Lindsey Gray <[email protected]> * use oop interface * need allowed_exceptions * time and functools * whoops * add to report; small fixes * style: pre-commit fixes * move some methods * whoops * None for success * monotonic -> time * ordering * fixup duration stuff * two impls... * call_time needs to be outside wrapper * rename argument; add function arg description to docstring * add test --------- Co-authored-by: Lindsey Gray <[email protected]> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
1 parent c5ff061 commit 66d1feb

File tree

3 files changed

+208
-28
lines changed

3 files changed

+208
-28
lines changed

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ requires-python = ">=3.8"
5151
[project.optional-dependencies]
5252
dev = [
5353
"boost_histogram>=0.13",
54-
"dask-awkward>=2023.10.0",
54+
"dask-awkward>=2023.12.1",
5555
"dask[array]",
5656
"hist>=1.2",
5757
"pandas",

src/uproot/_dask.py

Lines changed: 182 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
from __future__ import annotations
22

3+
import functools
34
import math
5+
import socket
6+
import time
47
from collections.abc import Callable, Iterable, Mapping
58

69
try:
@@ -38,6 +41,7 @@ def dask(
3841
allow_missing=False,
3942
open_files=True,
4043
form_mapping=None,
44+
allow_read_errors_with_report=False,
4145
**options,
4246
):
4347
"""
@@ -91,6 +95,10 @@ def dask(
9195
form_mapping (Callable[awkward.forms.Form] -> awkward.forms.Form | None): If not none
9296
and library="ak" then apply this remapping function to the awkward form of the input
9397
data. The form keys of the desired form should be available data in the input form.
98+
allow_read_errors_with_report (bool): If True, catch OSError exceptions and return an
99+
empty array for these nodes in the task graph. The return of this function then
100+
becomes a two element tuple, where the first return is the dask-awkward collection
101+
of interest and the second return is a report dask-awkward collection.
94102
options: See below.
95103
96104
Returns dask equivalents of the backends supported by uproot. If ``library='np'``,
@@ -259,6 +267,7 @@ def dask(
259267
interp_options,
260268
form_mapping,
261269
steps_per_file,
270+
allow_read_errors_with_report,
262271
)
263272
else:
264273
return _get_dak_array_delay_open(
@@ -274,6 +283,7 @@ def dask(
274283
interp_options,
275284
form_mapping,
276285
steps_per_file,
286+
allow_read_errors_with_report,
277287
)
278288
else:
279289
raise NotImplementedError()
@@ -890,6 +900,15 @@ class UprootReadMixin:
890900
form_mapping_info: ImplementsFormMappingInfo
891901
common_keys: frozenset[str]
892902
interp_options: dict[str, Any]
903+
allow_read_errors_with_report: bool
904+
905+
@property
906+
def allowed_exceptions(self):
907+
return (OSError,)
908+
909+
@property
910+
def return_report(self) -> bool:
911+
return self.allow_read_errors_with_report
893912

894913
def read_tree(self, tree: HasBranches, start: int, stop: int) -> AwkArray:
895914
assert start <= stop
@@ -946,6 +965,15 @@ def mock(self) -> AwkArray:
946965
behavior=self.form_mapping_info.behavior,
947966
)
948967

968+
def mock_empty(self, backend) -> AwkArray:
969+
awkward = uproot.extras.awkward()
970+
return awkward.to_backend(
971+
self.expected_form.length_zero_array(highlevel=False),
972+
backend=backend,
973+
highlevel=True,
974+
behavior=self.form_mapping_info.behavior,
975+
)
976+
949977
def prepare_for_projection(self) -> tuple[AwkArray, TypeTracerReport, dict]:
950978
awkward = uproot.extras.awkward()
951979
dask_awkward = uproot.extras.dask_awkward()
@@ -1011,6 +1039,53 @@ def project_keys(self: T, keys: frozenset[str]) -> T:
10111039
raise NotImplementedError
10121040

10131041

1042+
def _report_failure(exception, call_time, *args, **kwargs):
1043+
awkward = uproot.extras.awkward()
1044+
return awkward.Array(
1045+
[
1046+
{
1047+
"call_time": call_time,
1048+
"duration": None,
1049+
"args": [repr(a) for a in args],
1050+
"kwargs": [[k, repr(v)] for k, v in kwargs.items()],
1051+
"exception": type(exception).__name__,
1052+
"message": str(exception),
1053+
"fqdn": socket.getfqdn(),
1054+
"hostname": socket.gethostname(),
1055+
}
1056+
]
1057+
)
1058+
1059+
1060+
def _report_success(duration, *args, **kwargs):
1061+
awkward = uproot.extras.awkward()
1062+
return awkward.Array(
1063+
[
1064+
{
1065+
"call_time": None,
1066+
"duration": duration,
1067+
"args": [repr(a) for a in args],
1068+
"kwargs": [[k, repr(v)] for k, v in kwargs.items()],
1069+
"exception": None,
1070+
"message": None,
1071+
"fqdn": None,
1072+
"hostname": None,
1073+
}
1074+
]
1075+
)
1076+
1077+
1078+
def with_duration(f):
1079+
@functools.wraps(f)
1080+
def wrapper(*args, **kwargs):
1081+
start = time.monotonic()
1082+
result = f(*args, **kwargs)
1083+
stop = time.monotonic()
1084+
return result, (stop - start)
1085+
1086+
return wrapper
1087+
1088+
10141089
class _UprootRead(UprootReadMixin):
10151090
def __init__(
10161091
self,
@@ -1020,13 +1095,15 @@ def __init__(
10201095
base_form: Form,
10211096
expected_form: Form,
10221097
form_mapping_info: ImplementsFormMappingInfo,
1098+
allow_read_errors_with_report: bool,
10231099
) -> None:
10241100
self.ttrees = ttrees
10251101
self.common_keys = frozenset(common_keys)
10261102
self.interp_options = interp_options
10271103
self.base_form = base_form
10281104
self.expected_form = expected_form
10291105
self.form_mapping_info = form_mapping_info
1106+
self.allow_read_errors_with_report = allow_read_errors_with_report
10301107

10311108
def project_keys(self: T, keys: frozenset[str]) -> T:
10321109
return _UprootRead(
@@ -1036,11 +1113,39 @@ def project_keys(self: T, keys: frozenset[str]) -> T:
10361113
self.base_form,
10371114
self.expected_form,
10381115
self.form_mapping_info,
1116+
self.allow_read_errors_with_report,
10391117
)
10401118

1041-
def __call__(self, i_start_stop) -> AwkArray:
1119+
def __call__(self, i_start_stop):
10421120
i, start, stop = i_start_stop
1121+
if self.return_report:
1122+
call_time = time.time_ns()
1123+
try:
1124+
result, duration = with_duration(self._call_impl)(i, start, stop)
1125+
return (
1126+
result,
1127+
_report_success(
1128+
duration,
1129+
self.ttrees[i],
1130+
start,
1131+
stop,
1132+
),
1133+
)
1134+
except self.allowed_exceptions as err:
1135+
return (
1136+
self.mock_empty(backend="cpu"),
1137+
_report_failure(
1138+
err,
1139+
call_time,
1140+
self.ttrees[i],
1141+
start,
1142+
stop,
1143+
),
1144+
)
10431145

1146+
return self._call_impl(i, start, stop)
1147+
1148+
def _call_impl(self, i, start, stop):
10441149
return self.read_tree(self.ttrees[i], start, stop)
10451150

10461151

@@ -1055,6 +1160,7 @@ def __init__(
10551160
base_form: Form,
10561161
expected_form: Form,
10571162
form_mapping_info: ImplementsFormMappingInfo,
1163+
allow_read_errors_with_report: bool,
10581164
) -> None:
10591165
self.custom_classes = custom_classes
10601166
self.allow_missing = allow_missing
@@ -1064,15 +1170,11 @@ def __init__(
10641170
self.base_form = base_form
10651171
self.expected_form = expected_form
10661172
self.form_mapping_info = form_mapping_info
1173+
self.allow_read_errors_with_report = allow_read_errors_with_report
10671174

1068-
def __call__(self, blockwise_args) -> AwkArray:
1069-
(
1070-
file_path,
1071-
object_path,
1072-
i_step_or_start,
1073-
n_steps_or_stop,
1074-
is_chunk,
1075-
) = blockwise_args
1175+
def _call_impl(
1176+
self, file_path, object_path, i_step_or_start, n_steps_or_stop, is_chunk
1177+
):
10761178
ttree = uproot._util.regularize_object_path(
10771179
file_path,
10781180
object_path,
@@ -1105,6 +1207,50 @@ def __call__(self, blockwise_args) -> AwkArray:
11051207

11061208
return self.read_tree(ttree, start, stop)
11071209

1210+
def __call__(self, blockwise_args):
1211+
(
1212+
file_path,
1213+
object_path,
1214+
i_step_or_start,
1215+
n_steps_or_stop,
1216+
is_chunk,
1217+
) = blockwise_args
1218+
1219+
if self.return_report:
1220+
call_time = time.time_ns()
1221+
try:
1222+
result, duration = with_duration(self._call_impl)(
1223+
file_path, object_path, i_step_or_start, n_steps_or_stop, is_chunk
1224+
)
1225+
return (
1226+
result,
1227+
_report_success(
1228+
duration,
1229+
file_path,
1230+
object_path,
1231+
i_step_or_start,
1232+
n_steps_or_stop,
1233+
is_chunk,
1234+
),
1235+
)
1236+
except self.allowed_exceptions as err:
1237+
return (
1238+
self.mock_empty(backend="cpu"),
1239+
_report_failure(
1240+
err,
1241+
call_time,
1242+
file_path,
1243+
object_path,
1244+
i_step_or_start,
1245+
n_steps_or_stop,
1246+
is_chunk,
1247+
),
1248+
)
1249+
1250+
return self._call_impl(
1251+
file_path, object_path, i_step_or_start, n_steps_or_stop, is_chunk
1252+
)
1253+
11081254
def project_keys(self: T, keys: frozenset[str]) -> T:
11091255
return _UprootOpenAndRead(
11101256
self.custom_classes,
@@ -1115,6 +1261,7 @@ def project_keys(self: T, keys: frozenset[str]) -> T:
11151261
self.base_form,
11161262
self.expected_form,
11171263
self.form_mapping_info,
1264+
self.allow_read_errors_with_report,
11181265
)
11191266

11201267

@@ -1151,6 +1298,7 @@ def _get_dak_array(
11511298
interp_options,
11521299
form_mapping,
11531300
steps_per_file,
1301+
allow_read_errors_with_report,
11541302
):
11551303
dask_awkward = uproot.extras.dask_awkward()
11561304
awkward = uproot.extras.awkward()
@@ -1306,15 +1454,18 @@ def real_filter_branch(branch):
13061454
else:
13071455
expected_form, form_mapping_info = form_mapping(base_form)
13081456

1457+
fn = _UprootRead(
1458+
ttrees,
1459+
common_keys,
1460+
interp_options,
1461+
base_form=base_form,
1462+
expected_form=expected_form,
1463+
form_mapping_info=form_mapping_info,
1464+
allow_read_errors_with_report=allow_read_errors_with_report,
1465+
)
1466+
13091467
return dask_awkward.from_map(
1310-
_UprootRead(
1311-
ttrees,
1312-
common_keys,
1313-
interp_options,
1314-
base_form=base_form,
1315-
expected_form=expected_form,
1316-
form_mapping_info=form_mapping_info,
1317-
),
1468+
fn,
13181469
partition_args,
13191470
divisions=tuple(divisions),
13201471
label="from-uproot",
@@ -1334,6 +1485,7 @@ def _get_dak_array_delay_open(
13341485
interp_options,
13351486
form_mapping,
13361487
steps_per_file,
1488+
allow_read_errors_with_report,
13371489
):
13381490
dask_awkward = uproot.extras.dask_awkward()
13391491
awkward = uproot.extras.awkward()
@@ -1396,17 +1548,20 @@ def _get_dak_array_delay_open(
13961548
else:
13971549
expected_form, form_mapping_info = form_mapping(base_form)
13981550

1551+
fn = _UprootOpenAndRead(
1552+
custom_classes,
1553+
allow_missing,
1554+
real_options,
1555+
common_keys,
1556+
interp_options,
1557+
base_form=base_form,
1558+
expected_form=expected_form,
1559+
form_mapping_info=form_mapping_info,
1560+
allow_read_errors_with_report=allow_read_errors_with_report,
1561+
)
1562+
13991563
return dask_awkward.from_map(
1400-
_UprootOpenAndRead(
1401-
custom_classes,
1402-
allow_missing,
1403-
real_options,
1404-
common_keys,
1405-
interp_options,
1406-
base_form=base_form,
1407-
expected_form=expected_form,
1408-
form_mapping_info=form_mapping_info,
1409-
),
1564+
fn,
14101565
partition_args,
14111566
divisions=None if divisions is None else tuple(divisions),
14121567
label="from-uproot",
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
import pytest
2+
import skhep_testdata
3+
4+
import uproot
5+
6+
7+
dask = pytest.importorskip("dask")
8+
dask_awkward = pytest.importorskip("dask_awkward")
9+
10+
11+
def test_with_report():
12+
test_path1 = skhep_testdata.data_path("uproot-Zmumu.root") + ":events"
13+
test_path2 = skhep_testdata.data_path("uproot-Zmumu-uncompressed.root") + ":events"
14+
test_path3 = "/some/file/that/doesnt/exist"
15+
files = [test_path1, test_path2, test_path3]
16+
collection, report = uproot.dask(
17+
files,
18+
library="ak",
19+
open_files=False,
20+
allow_read_errors_with_report=True,
21+
)
22+
_, creport = dask.compute(collection, report)
23+
assert creport[0].exception is None # test_path1 is good
24+
assert creport[1].exception is None # test_path2 is good
25+
assert creport[2].exception == "FileNotFoundError" # test_path3 is a bad file

0 commit comments

Comments
 (0)