1010import re
1111import struct
1212import sys
13- from collections import defaultdict
13+ from collections import Counter , defaultdict
14+ from itertools import groupby
1415from typing import NamedTuple
1516
1617import awkward as ak
@@ -160,6 +161,101 @@ def all_fields(self):
160161 """
161162 if self ._all_fields is None :
162163 self ._all_fields = [RField (i , self ) for i in range (len (self .field_records ))]
164+
165+ # Collect fields that have the same path
166+ def _sort_key (f ):
167+ return f .path if f .path else ""
168+
169+ sorted_by_path = sorted (self ._all_fields , key = _sort_key )
170+ for path , group in groupby (sorted_by_path , key = _sort_key ):
171+ if path == "" :
172+ continue
173+
174+ group_list = list (group )
175+ if len (group_list ) < 2 :
176+ continue
177+
178+ # Step 1: Generate ancestor stack and remove top common parts
179+ ancestor_stacks = []
180+ for field in group_list :
181+ f = field
182+ tmp_stack = []
183+ while isinstance (f , RField ):
184+ tmp_stack .append (f )
185+ f = f .parent
186+ ancestor_stacks .append (tmp_stack )
187+
188+ while all (
189+ stack [- 1 ] == ancestor_stacks [0 ][- 1 ] for stack in ancestor_stacks [1 :]
190+ ):
191+ for stack in ancestor_stacks :
192+ stack .pop ()
193+
194+ # Step 2: Generate ancestor name stacks (e.g. ['member', 'Base', 'Child'])
195+ ancestor_name_stacks = []
196+ for name_stack in ancestor_stacks :
197+ ancestor_name_stacks .append (
198+ [name_stack [0 ].name ]
199+ + [field .record .type_name for field in name_stack [1 :]]
200+ )
201+
202+ # Step 3: Generate unique field names by adding prefixes like `Base::` until the names are unique
203+ def _generate_fieldname (name_stack , n_prefix ):
204+ """
205+ ['member'] -> 'member'
206+ ['member', 'Base', 'Child', 'GrandChild'], n_prefix=0 -> 'member'
207+ ['member', 'Base', 'Child', 'GrandChild'], n_prefix=1 -> 'GrandChild::member'
208+ ['member', 'Base', 'Child', 'GrandChild'], n_prefix=2 -> 'GrandChild::Child::member'
209+ """
210+ if len (name_stack ) < 2 :
211+ return name_stack [0 ]
212+
213+ field_name = name_stack [0 ]
214+
215+ n_prefix = min (n_prefix , len (name_stack ) - 1 )
216+ prefixes = name_stack [::- 1 ][:n_prefix ]
217+
218+ if prefixes :
219+ return "::" .join (prefixes ) + "::" + field_name
220+ else :
221+ return field_name
222+
223+ n_prefixes = [0 for _ in range (len (group_list ))]
224+ new_names = [
225+ _generate_fieldname (name_stack , n_prefix )
226+ for name_stack , n_prefix in zip (
227+ ancestor_name_stacks , n_prefixes , strict = True
228+ )
229+ ]
230+
231+ name_counter = Counter (new_names )
232+ while len (new_names ) != len (name_counter ):
233+ for i in range (len (n_prefixes )):
234+ # For non-unique names, add one more prefix and regenerate the name
235+ if name_counter [new_names [i ]] > 1 :
236+ n_prefixes [i ] += 1
237+
238+ if n_prefixes [i ] >= len (ancestor_name_stacks [i ]):
239+ raise RuntimeError (
240+ "Ran out of prefixes to add, but names are still not unique."
241+ )
242+
243+ new_names [i ] = _generate_fieldname (
244+ ancestor_name_stacks [i ], n_prefixes [i ]
245+ )
246+ name_counter = Counter (new_names )
247+
248+ # Step 4: Rename fields
249+ for field , new_name in zip (group_list , new_names , strict = True ):
250+ field ._name = new_name
251+
252+ # invalidate path cache
253+ to_invalidate = [field ]
254+ while to_invalidate :
255+ f = to_invalidate .pop ()
256+ f ._path = None
257+ to_invalidate .extend (f .fields )
258+
163259 return self ._all_fields
164260
165261 def _prepare_header_chunk (self ):
@@ -527,7 +623,7 @@ def field_form(self, this_id, keys, ak_add_doc=False):
527623 or key == self .all_fields [i ].path
528624 for key in keys
529625 )
530- or self .all_fields [i ].is_anonymous
626+ or self .all_fields [i ].in_variant
531627 ):
532628 recordlist .append (
533629 self .field_form (i , keys , ak_add_doc = ak_add_doc )
@@ -566,23 +662,33 @@ def field_form(self, this_id, keys, ak_add_doc=False):
566662 )
567663 elif structural_role == uproot .const .RNTupleFieldRole .RECORD :
568664 newids = []
569- if this_id in self ._related_ids :
570- newids = self ._related_ids [this_id ]
665+ to_check = [this_id ]
666+ while to_check :
667+ current_id = to_check .pop ()
668+ if current_id in self ._related_ids :
669+ children = self ._related_ids [current_id ]
670+ for child in children :
671+ if self .all_fields [child ].is_anonymous :
672+ to_check .append (child )
673+ else :
674+ newids .append (child )
571675 # go find N in the rest, N is the # of fields in struct
572676 recordlist = []
573677 namelist = []
678+ record_names = []
574679 for i in newids :
575680 if (
576681 any (
577682 key .startswith (f"{ self .all_fields [i ].path } ." )
578683 or key == self .all_fields [i ].path
579684 for key in keys
580685 )
581- or self .all_fields [i ].is_anonymous
686+ or self .all_fields [i ].in_variant
582687 ):
583688 recordlist .append (self .field_form (i , keys , ak_add_doc = ak_add_doc ))
584- namelist .append (field_records [i ].field_name )
585- if all (re .fullmatch (r"_[0-9]+" , name ) is not None for name in namelist ):
689+ namelist .append (self .all_fields [i ].name )
690+ record_names .append (field_records [i ].field_name )
691+ if all (re .fullmatch (r"_[0-9]+" , name ) is not None for name in record_names ):
586692 namelist = None
587693 return ak .forms .RecordForm (
588694 recordlist , namelist , form_key = "whatever" , parameters = parameters
@@ -1680,7 +1786,8 @@ def __init__(self, fid, ntuple):
16801786 self ._lookup = None
16811787 self ._path = None
16821788 self ._is_anonymous = None
1683- self ._is_ignored = None
1789+ self ._in_variant = None
1790+ self ._name = None
16841791
16851792 def __repr__ (self ):
16861793 if len (self ) == 0 :
@@ -1693,15 +1800,19 @@ def name(self):
16931800 """
16941801 Name of the ``RField``.
16951802 """
1696- # We rename subfields of tuples to match Awkward
1697- name = self ._ntuple .field_records [self ._fid ].field_name
1698- if (
1699- not self .top_level
1700- and self .parent .record .struct_role == uproot .const .RNTupleFieldRole .RECORD
1701- and re .fullmatch (r"_[0-9]+" , name ) is not None
1702- ):
1703- name = name [1 :]
1704- return name
1803+ if self ._name is None :
1804+ # We rename subfields of tuples to match Awkward
1805+ name = self ._ntuple .field_records [self ._fid ].field_name
1806+ if (
1807+ not self .top_level
1808+ and self .parent .record .struct_role
1809+ == uproot .const .RNTupleFieldRole .RECORD
1810+ and re .fullmatch (r"_[0-9]+" , name ) is not None
1811+ ):
1812+ name = name [1 :]
1813+
1814+ self ._name = name
1815+ return self ._name
17051816
17061817 @property
17071818 def description (self ):
@@ -1729,12 +1840,11 @@ def is_anonymous(self):
17291840 """
17301841 There are some anonymous fields in the RNTuple specification that we hide from the user
17311842 to simplify the interface. These are fields named `_0` that are children of a collection,
1732- variant, or atomic field.
1733-
1734- All children fields of variants are ignored, since they cannot be accessed directly
1735- in a consistent manner. They can only be accessed through the parent variant field.
1843+ variant, or atomic field. Fields that encode hierarchy (i.e. fields named `:_[0-9]+` that
1844+ are children of a record) are also considered anonymous.
17361845 """
17371846 if self ._is_anonymous is None :
1847+ # children of collections, variants, or atomic fields
17381848 self ._is_anonymous = not self .top_level and (
17391849 self .parent .record .struct_role
17401850 in (
@@ -1747,29 +1857,30 @@ def is_anonymous(self):
17471857 and self .record .field_name == "_0"
17481858 )
17491859 )
1750- field = self
1751- while not field .top_level :
1752- field = field .parent
1753- if field .record .struct_role == uproot .const .RNTupleFieldRole .VARIANT :
1754- self ._is_anonymous = True
1755- break
1756- return self ._is_anonymous
1757-
1758- @property
1759- def is_ignored (self ):
1760- """
1761- There are some fields in the RNTuple specification named `:_i` (for `i=0,1,2,...`)
1762- that encode class hierarchy. These are not useful in Uproot, so they are ignored.
1763- """
1764- if self ._is_ignored is None :
1765- self ._is_ignored = (
1860+ # fields that encode hierarchy
1861+ self ._is_anonymous |= (
17661862 not self .top_level
17671863 and self .parent .record .struct_role
17681864 == uproot .const .RNTupleFieldRole .RECORD
17691865 and re .fullmatch (r":_[0-9]+" , self .name ) is not None
17701866 )
1867+ return self ._is_anonymous
17711868
1772- return self ._is_ignored
1869+ @property
1870+ def in_variant (self ):
1871+ """
1872+ All children fields of variants need special treatment, since they cannot be accessed directly
1873+ in a consistent manner. They can only be accessed through the parent variant field.
1874+ """
1875+ if self ._in_variant is None :
1876+ self ._in_variant = False
1877+ field = self
1878+ while not field .top_level :
1879+ field = field .parent
1880+ if field .record .struct_role == uproot .const .RNTupleFieldRole .VARIANT :
1881+ self ._in_variant = True
1882+ break
1883+ return self ._in_variant
17731884
17741885 @property
17751886 def parent (self ):
0 commit comments