Skip to content

Commit d0fb5b9

Browse files
ariostasmrzimuCopilot
authored
fix: make inherited fields accessible (#1589)
* Fixed inherited fields being unreachable * Fixed arrays * Added test * fix: resolve duplicated field names in RNTuple by adding prefixes for inheritance (#1594) * fix: resolve duplicated field names in RNTuple by adding prefixes for inheritance * fix: enhance unique field name generation in RNTuple by refining prefix handling * remove type annotations * add protection in name-generation loop * invalidate sub-fields' path cache * remove not-needed `record_names=None` * Recursively invalidate path cache Co-authored-by: Andres Rios Tascon <ariostas@gmail.com> --------- Co-authored-by: Andres Rios Tascon <ariostas@gmail.com> * Remove unused import Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Use explicit check instead of assertion Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Use normalized name as starting point Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --------- Co-authored-by: Mingrun Li <74824770+mrzimu@users.noreply.github.com> Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
1 parent e9f59c6 commit d0fb5b9

File tree

3 files changed

+281
-51
lines changed

3 files changed

+281
-51
lines changed

src/uproot/behaviors/RNTuple.py

Lines changed: 16 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -465,16 +465,15 @@ def fields(self):
465465
if f.parent_field_id == i
466466
]
467467
else:
468-
fields = [
469-
rntuple.all_fields[i]
470-
for i, f in enumerate(rntuple.field_records)
471-
if f.parent_field_id == self._fid
472-
and f.parent_field_id != i
473-
and not rntuple.all_fields[i].is_ignored
474-
]
475-
# If the child field is anonymous, we return the grandchildren
476-
if len(fields) == 1 and fields[0].is_anonymous:
477-
fields = fields[0].fields
468+
fields = []
469+
for i, f in enumerate(rntuple.field_records):
470+
if f.parent_field_id != self._fid or f.parent_field_id == i:
471+
continue
472+
if rntuple.all_fields[i].is_anonymous:
473+
# for anonymous fields, we use their children instead
474+
fields.extend(rntuple.all_fields[i].fields)
475+
else:
476+
fields.append(rntuple.all_fields[i])
478477
self._fields = fields
479478
return self._fields
480479

@@ -489,7 +488,7 @@ def path(self):
489488
if isinstance(self, uproot.behaviors.RNTuple.RNTuple):
490489
return "."
491490
# For some anonymous fields, the path is not available
492-
if self.is_anonymous or self.is_ignored:
491+
if self.is_anonymous or self.in_variant:
493492
return None
494493
if self._path is None:
495494
path = self.name
@@ -1357,7 +1356,11 @@ def iteritems(
13571356
and (filter_typename is no_filter or filter_typename(field.typename))
13581357
and (filter_field is no_filter or filter_field(field))
13591358
):
1360-
if field.is_anonymous or (ignore_duplicates and field.name in keys_set):
1359+
if (
1360+
field.is_anonymous
1361+
or field.in_variant
1362+
or (ignore_duplicates and field.name in keys_set)
1363+
):
13611364
pass
13621365
else:
13631366
keys_set.add(field.name)
@@ -1373,7 +1376,7 @@ def iteritems(
13731376
):
13741377
k2 = (
13751378
f"{field.name}.{k1}"
1376-
if full_paths and not field.is_anonymous
1379+
if full_paths and not (field.is_anonymous or field.in_variant)
13771380
else k1
13781381
)
13791382
if filter_name is no_filter or _filter_name_deep(

src/uproot/models/RNTuple.py

Lines changed: 149 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,8 @@
1010
import re
1111
import struct
1212
import sys
13-
from collections import defaultdict
13+
from collections import Counter, defaultdict
14+
from itertools import groupby
1415
from typing import NamedTuple
1516

1617
import awkward as ak
@@ -160,6 +161,101 @@ def all_fields(self):
160161
"""
161162
if self._all_fields is None:
162163
self._all_fields = [RField(i, self) for i in range(len(self.field_records))]
164+
165+
# Collect fields that have the same path
166+
def _sort_key(f):
167+
return f.path if f.path else ""
168+
169+
sorted_by_path = sorted(self._all_fields, key=_sort_key)
170+
for path, group in groupby(sorted_by_path, key=_sort_key):
171+
if path == "":
172+
continue
173+
174+
group_list = list(group)
175+
if len(group_list) < 2:
176+
continue
177+
178+
# Step 1: Generate ancestor stack and remove top common parts
179+
ancestor_stacks = []
180+
for field in group_list:
181+
f = field
182+
tmp_stack = []
183+
while isinstance(f, RField):
184+
tmp_stack.append(f)
185+
f = f.parent
186+
ancestor_stacks.append(tmp_stack)
187+
188+
while all(
189+
stack[-1] == ancestor_stacks[0][-1] for stack in ancestor_stacks[1:]
190+
):
191+
for stack in ancestor_stacks:
192+
stack.pop()
193+
194+
# Step 2: Generate ancestor name stacks (e.g. ['member', 'Base', 'Child'])
195+
ancestor_name_stacks = []
196+
for name_stack in ancestor_stacks:
197+
ancestor_name_stacks.append(
198+
[name_stack[0].name]
199+
+ [field.record.type_name for field in name_stack[1:]]
200+
)
201+
202+
# Step 3: Generate unique field names by adding prefixes like `Base::` until the names are unique
203+
def _generate_fieldname(name_stack, n_prefix):
204+
"""
205+
['member'] -> 'member'
206+
['member', 'Base', 'Child', 'GrandChild'], n_prefix=0 -> 'member'
207+
['member', 'Base', 'Child', 'GrandChild'], n_prefix=1 -> 'GrandChild::member'
208+
['member', 'Base', 'Child', 'GrandChild'], n_prefix=2 -> 'GrandChild::Child::member'
209+
"""
210+
if len(name_stack) < 2:
211+
return name_stack[0]
212+
213+
field_name = name_stack[0]
214+
215+
n_prefix = min(n_prefix, len(name_stack) - 1)
216+
prefixes = name_stack[::-1][:n_prefix]
217+
218+
if prefixes:
219+
return "::".join(prefixes) + "::" + field_name
220+
else:
221+
return field_name
222+
223+
n_prefixes = [0 for _ in range(len(group_list))]
224+
new_names = [
225+
_generate_fieldname(name_stack, n_prefix)
226+
for name_stack, n_prefix in zip(
227+
ancestor_name_stacks, n_prefixes, strict=True
228+
)
229+
]
230+
231+
name_counter = Counter(new_names)
232+
while len(new_names) != len(name_counter):
233+
for i in range(len(n_prefixes)):
234+
# For non-unique names, add one more prefix and regenerate the name
235+
if name_counter[new_names[i]] > 1:
236+
n_prefixes[i] += 1
237+
238+
if n_prefixes[i] >= len(ancestor_name_stacks[i]):
239+
raise RuntimeError(
240+
"Ran out of prefixes to add, but names are still not unique."
241+
)
242+
243+
new_names[i] = _generate_fieldname(
244+
ancestor_name_stacks[i], n_prefixes[i]
245+
)
246+
name_counter = Counter(new_names)
247+
248+
# Step 4: Rename fields
249+
for field, new_name in zip(group_list, new_names, strict=True):
250+
field._name = new_name
251+
252+
# invalidate path cache
253+
to_invalidate = [field]
254+
while to_invalidate:
255+
f = to_invalidate.pop()
256+
f._path = None
257+
to_invalidate.extend(f.fields)
258+
163259
return self._all_fields
164260

165261
def _prepare_header_chunk(self):
@@ -527,7 +623,7 @@ def field_form(self, this_id, keys, ak_add_doc=False):
527623
or key == self.all_fields[i].path
528624
for key in keys
529625
)
530-
or self.all_fields[i].is_anonymous
626+
or self.all_fields[i].in_variant
531627
):
532628
recordlist.append(
533629
self.field_form(i, keys, ak_add_doc=ak_add_doc)
@@ -566,23 +662,33 @@ def field_form(self, this_id, keys, ak_add_doc=False):
566662
)
567663
elif structural_role == uproot.const.RNTupleFieldRole.RECORD:
568664
newids = []
569-
if this_id in self._related_ids:
570-
newids = self._related_ids[this_id]
665+
to_check = [this_id]
666+
while to_check:
667+
current_id = to_check.pop()
668+
if current_id in self._related_ids:
669+
children = self._related_ids[current_id]
670+
for child in children:
671+
if self.all_fields[child].is_anonymous:
672+
to_check.append(child)
673+
else:
674+
newids.append(child)
571675
# go find N in the rest, N is the # of fields in struct
572676
recordlist = []
573677
namelist = []
678+
record_names = []
574679
for i in newids:
575680
if (
576681
any(
577682
key.startswith(f"{self.all_fields[i].path}.")
578683
or key == self.all_fields[i].path
579684
for key in keys
580685
)
581-
or self.all_fields[i].is_anonymous
686+
or self.all_fields[i].in_variant
582687
):
583688
recordlist.append(self.field_form(i, keys, ak_add_doc=ak_add_doc))
584-
namelist.append(field_records[i].field_name)
585-
if all(re.fullmatch(r"_[0-9]+", name) is not None for name in namelist):
689+
namelist.append(self.all_fields[i].name)
690+
record_names.append(field_records[i].field_name)
691+
if all(re.fullmatch(r"_[0-9]+", name) is not None for name in record_names):
586692
namelist = None
587693
return ak.forms.RecordForm(
588694
recordlist, namelist, form_key="whatever", parameters=parameters
@@ -1680,7 +1786,8 @@ def __init__(self, fid, ntuple):
16801786
self._lookup = None
16811787
self._path = None
16821788
self._is_anonymous = None
1683-
self._is_ignored = None
1789+
self._in_variant = None
1790+
self._name = None
16841791

16851792
def __repr__(self):
16861793
if len(self) == 0:
@@ -1693,15 +1800,19 @@ def name(self):
16931800
"""
16941801
Name of the ``RField``.
16951802
"""
1696-
# We rename subfields of tuples to match Awkward
1697-
name = self._ntuple.field_records[self._fid].field_name
1698-
if (
1699-
not self.top_level
1700-
and self.parent.record.struct_role == uproot.const.RNTupleFieldRole.RECORD
1701-
and re.fullmatch(r"_[0-9]+", name) is not None
1702-
):
1703-
name = name[1:]
1704-
return name
1803+
if self._name is None:
1804+
# We rename subfields of tuples to match Awkward
1805+
name = self._ntuple.field_records[self._fid].field_name
1806+
if (
1807+
not self.top_level
1808+
and self.parent.record.struct_role
1809+
== uproot.const.RNTupleFieldRole.RECORD
1810+
and re.fullmatch(r"_[0-9]+", name) is not None
1811+
):
1812+
name = name[1:]
1813+
1814+
self._name = name
1815+
return self._name
17051816

17061817
@property
17071818
def description(self):
@@ -1729,12 +1840,11 @@ def is_anonymous(self):
17291840
"""
17301841
There are some anonymous fields in the RNTuple specification that we hide from the user
17311842
to simplify the interface. These are fields named `_0` that are children of a collection,
1732-
variant, or atomic field.
1733-
1734-
All children fields of variants are ignored, since they cannot be accessed directly
1735-
in a consistent manner. They can only be accessed through the parent variant field.
1843+
variant, or atomic field. Fields that encode hierarchy (i.e. fields named `:_[0-9]+` that
1844+
are children of a record) are also considered anonymous.
17361845
"""
17371846
if self._is_anonymous is None:
1847+
# children of collections, variants, or atomic fields
17381848
self._is_anonymous = not self.top_level and (
17391849
self.parent.record.struct_role
17401850
in (
@@ -1747,29 +1857,30 @@ def is_anonymous(self):
17471857
and self.record.field_name == "_0"
17481858
)
17491859
)
1750-
field = self
1751-
while not field.top_level:
1752-
field = field.parent
1753-
if field.record.struct_role == uproot.const.RNTupleFieldRole.VARIANT:
1754-
self._is_anonymous = True
1755-
break
1756-
return self._is_anonymous
1757-
1758-
@property
1759-
def is_ignored(self):
1760-
"""
1761-
There are some fields in the RNTuple specification named `:_i` (for `i=0,1,2,...`)
1762-
that encode class hierarchy. These are not useful in Uproot, so they are ignored.
1763-
"""
1764-
if self._is_ignored is None:
1765-
self._is_ignored = (
1860+
# fields that encode hierarchy
1861+
self._is_anonymous |= (
17661862
not self.top_level
17671863
and self.parent.record.struct_role
17681864
== uproot.const.RNTupleFieldRole.RECORD
17691865
and re.fullmatch(r":_[0-9]+", self.name) is not None
17701866
)
1867+
return self._is_anonymous
17711868

1772-
return self._is_ignored
1869+
@property
1870+
def in_variant(self):
1871+
"""
1872+
All children fields of variants need special treatment, since they cannot be accessed directly
1873+
in a consistent manner. They can only be accessed through the parent variant field.
1874+
"""
1875+
if self._in_variant is None:
1876+
self._in_variant = False
1877+
field = self
1878+
while not field.top_level:
1879+
field = field.parent
1880+
if field.record.struct_role == uproot.const.RNTupleFieldRole.VARIANT:
1881+
self._in_variant = True
1882+
break
1883+
return self._in_variant
17731884

17741885
@property
17751886
def parent(self):

0 commit comments

Comments
 (0)