@@ -12,29 +12,28 @@ def __init__(self, slot):
1212 self .identifier = slot .identifier
1313 #: slot defining the ray
1414 self .slot = slot
15- #: list of RTDCBase (hierarchy children)
16- self .steps = []
17- #: corresponds to hashes of the applied filters
18- self .step_hashes = []
15+ #: segments of the filter ray, consisting of hash, previous, and
16+ #: next dataset
17+ self .segments = []
1918 # holds the filters (protected so that users use set_filters)
2019 self ._filters = []
2120 # used for testing (incremented when the ray is cut)
2221 self ._generation = 0
2322 # used for checking validity of the ray
2423 self ._slot_hash = "unset"
25- self ._root_child = None
2624
2725 def __repr__ (self ):
2826 repre = "<Pipeline Filter Ray '{}' at {}>" .format (self .identifier ,
2927 hex (id (self )))
3028 return repre
3129
32- def _add_step (self , ds , filt ):
33- """Add a filter step """
34- self . step_hashes . append ( filt . hash )
30+ def _add_segment (self , ds , filt ):
31+ """Add a filter segment """
32+ ds . reset_filter ( )
3533 filt .update_dataset (ds )
36- self .steps .append (ds )
37- return self ._new_child (ds , filt )
34+ child = self ._new_child (ds , filt )
35+ self .segments .append ([filt .hash , ds , child ])
36+ return child
3837
3938 def _new_child (self , ds , filt = None , apply_filter = False ):
4039 identifier = self .slot .identifier
@@ -58,21 +57,7 @@ def filters(self):
5857 """
5958 return self ._filters
6059
61- @property
62- def root_child (self ):
63- """This is the first element in self.steps
64- (Will return a dataset even if self.steps is empty)
65- """
66- if self ._slot_hash != self .slot .hash :
67- # reset everything (e.g. emodulus recipe might have changed)
68- self .steps = []
69- self .step_hashes = []
70- self ._root_child = self ._new_child (self .slot .get_dataset (),
71- apply_filter = True )
72- self ._slot_hash = self .slot .hash
73- return self ._root_child
74-
75- def get_final_child (self , rtdc_ds = None , apply_filter = True ):
60+ def get_final_child (self , rtdc_ds = None , filters = None , apply_filter = True ):
7661 """Return the final ray child of `rtdc_ds`
7762
7863 If `rtdc_ds` is None, then the dataset of the current
@@ -86,19 +71,27 @@ def get_final_child(self, rtdc_ds=None, apply_filter=True):
8671 is applied to other data on disk e.g. when computing
8772 statistics. For regular use of the filter ray in a
8873 pipeline, use :func:`get_dataset`.
74+
75+ .. versionchanged:: 2.25.1
76+ The dataset returned is a clean child dataset without any
77+ filters defined.
78+
8979 """
90- filters = self .filters
80+ if filters is None :
81+ filters = self .filters
82+ external_filt = False
83+ else :
84+ external_filt = True
9185
9286 if rtdc_ds is None :
9387 # normal case
94- external = False
95- rtdc_ds = self .slot .get_dataset ()
96- ds = self .root_child
88+ external_ds = False
89+ ds = self .slot .get_dataset ()
9790 else :
9891 # ray is applied to other data
99- external = True
100- # do not modify rtdc_ds (create a child to work with)
101- ds = self ._new_child (rtdc_ds , apply_filter = True )
92+ external_ds = True
93+ # do not modify the original dataset (create a child to work with)
94+ ds = self ._new_child (rtdc_ds )
10295
10396 # Dear future self,
10497 #
@@ -107,48 +100,46 @@ def get_final_child(self, rtdc_ds=None, apply_filter=True):
107100 # Sincerely,
108101 # past self
109102
103+ filters = [f for f in filters if f .filter_used ]
104+
110105 if filters :
111106 # apply all filters
112107 for ii , filt in enumerate (filters ):
113108 # remember the previous hierarchy parent
114109 # (ds is always used for the next iteration)
115- prev_ds = ds
116- if external :
117- # do not touch self.steps or self.step_hashes
110+ if external_ds or external_filt :
111+ # do not touch self.segments
118112 filt .update_dataset (ds )
119113 ds = self ._new_child (ds , filt )
120- elif len (self .steps ) < ii + 1 :
121- # just create a new step
122- ds = self ._add_step (ds , filt )
123- elif filt .hash != self .step_hashes [ii ]:
114+ elif len (self .segments ) < ii + 1 :
115+ # just create a new segment
116+ ds = self ._add_segment (ds , filt )
117+ elif filt .hash != self .segments [ii ][ 0 ]:
124118 # the filter ray is changing here;
125- # cut it and add a new step
126- self .steps = self .steps [:ii ]
127- self .step_hashes = self .step_hashes [:ii ]
128- ds = self ._add_step (ds , filt )
119+ # trim it and add a new segment
120+ self .segments = self .segments [:ii ]
121+ ds = self ._add_segment (ds , filt )
129122 self ._generation += 1 # for testing
130123 else :
131- # the filters match so far
132- if len (self .steps ) > ii + 1 : # next child exists
133- ds = self .steps [ii + 1 ]
134- else : # next child does not exist
135- ds = self ._new_child (ds , filt )
136- # we now have the entire filter pipeline in self.steps
137- final_ds = prev_ds
124+ # reuse previous segment
125+ ds = self .segments [ii ][2 ]
126+ final_ds = ds
138127 else :
139- final_ds = rtdc_ds
128+ final_ds = ds
129+
130+ if not external_ds :
131+ ds .reset_filter ()
132+
140133 if apply_filter :
141134 final_ds .apply_filter ()
135+
142136 return final_ds
143137
144- def get_dataset (self , filters = None , apply_filter = True ):
138+ def get_dataset (self , apply_filter = True ):
145139 """Return the dataset that corresponds to applying these filters
146140
147141 Parameters
148142 ----------
149- filters: list of Filter or None
150- Filters used for computing the dataset hierarchy. If set
151- to None, the current filters in `self.filters` are used.
152143 apply_filter: bool
153144 Whether to apply all filters and update the metadata of
154145 the requested dataset. This should be True if you are
@@ -157,14 +148,11 @@ def get_dataset(self, filters=None, apply_filter=True):
157148 apply some more filters and then call `rejuvenate`
158149 yourself.
159150 """
160- if filters is not None :
161- # put the filters in place
162- self .set_filters (filters )
163151 # compute the final hierarchy child
164152 ds = self .get_final_child (apply_filter = apply_filter )
165153 return ds
166154
167155 def set_filters (self , filters ):
168156 """Set the filters of the current ray"""
169157 # only take into account active filters
170- self ._filters = [ f for f in filters if f . filter_used ]
158+ self ._filters = filters
0 commit comments