@@ -94,9 +94,7 @@ def disjoint(self, other):
9494 def chunk (
9595 self ,
9696 data : Data ,
97- res_threshold = 100 ,
9897 pc_threshold = 600000 ,
99- depth_threshold = 6 ,
10098 ):
10199 """
102100 Split up a dataset into tiles based on the given thresholds. Unlike Scan
@@ -131,34 +129,57 @@ def chunk(
131129
132130 if self .bounds == self .root :
133131 self .root = chunk .bounds
132+ yield from chunk .filter (data , pc_threshold )
134133
135- filtered = []
136- curr = db .from_delayed (
137- [
138- dask .delayed (ch .filter )(
139- data , res_threshold , pc_threshold , depth_threshold , 1
140- )
141- for ch in chunk .split ()
142- ]
143- )
144- curr_depth = 1
134+ def filter (
135+ self ,
136+ data : Data ,
137+ pc_threshold = 600000 ,
138+ prev_estimate = 0 ,
139+ ):
140+ """
141+ Creates quad tree of chunks for this bounds, runs pdal quickinfo over
142+ this to determine if there are any points available. Uses a bottom
143+ resolution of 1km.
144+
145+ :param data: Data object containing point cloud details.
146+ :param res_threshold: Resolution threshold., defaults to 100
147+ :param pc_threshold: Point count threshold., defaults to 600000
148+ :param depth_threshold: Tree depth threshold., defaults to 6
149+ :param depth: Current tree depth., defaults to 0
150+ :return: Returns a list of Extents.
151+ """
145152
146- logger = data .storageconfig .log
147- while curr .npartitions > 0 :
148- logger .debug (
149- f'Filtering { curr .npartitions } tiles at depth { curr_depth } '
150- )
151- n = curr .compute ()
152- to_add = [ne for ne in n if isinstance (ne , Extents )]
153- if to_add :
154- filtered = filtered + to_add
153+ pc = data .estimate_count (self .bounds )
155154
156- curr = db .from_delayed (
157- [ne for ne in n if not isinstance (ne , Extents )]
158- )
159- curr_depth += 1
155+ target_pc = pc_threshold
156+ minx , miny , maxx , maxy = self .bounds .get ()
160157
161- return filtered
158+ # is it empty?
159+ if not pc :
160+ yield self
161+ else :
162+ # has it hit the threshold yet?
163+ area = (maxx - minx ) * (maxy - miny )
164+ next_split_x = (maxx - minx ) / 2
165+ next_split_y = (maxy - miny ) / 2
166+
167+ # if the next split would put our area below the resolution, or if
168+ # the point count is less than the point threshold then use this
169+ # tile as the work unit.
170+ if next_split_x < self .resolution or next_split_y < self .resolution :
171+ yield self
172+ elif pc <= target_pc :
173+ yield self
174+ elif pc == prev_estimate :
175+ yield self
176+ else :
177+ for ch in self .split ():
178+ yield from ch .filter (
179+ data ,
180+ pc_threshold ,
181+ prev_estimate = pc
182+ )
162183
163184 def split (self ):
164185 """
@@ -202,63 +223,6 @@ def split(self):
202223 ]
203224 return exts
204225
205- def filter (
206- self ,
207- data : Data ,
208- res_threshold = 100 ,
209- pc_threshold = 600000 ,
210- depth_threshold = 6 ,
211- depth = 0 ,
212- ):
213- """
214- Creates quad tree of chunks for this bounds, runs pdal quickinfo over
215- this to determine if there are any points available. Uses a bottom
216- resolution of 1km.
217-
218- :param data: Data object containing point cloud details.
219- :param res_threshold: Resolution threshold., defaults to 100
220- :param pc_threshold: Point count threshold., defaults to 600000
221- :param depth_threshold: Tree depth threshold., defaults to 6
222- :param depth: Current tree depth., defaults to 0
223- :return: Returns a list of Extents.
224- """
225-
226- pc = data .estimate_count (self .bounds )
227- target_pc = pc_threshold
228- minx , miny , maxx , maxy = self .bounds .get ()
229-
230- # is it empty?
231- if not pc :
232- return []
233- else :
234- # has it hit the threshold yet?
235- area = (maxx - minx ) * (maxy - miny )
236- next_split_x = (maxx - minx ) / 2
237- next_split_y = (maxy - miny ) / 2
238-
239- # if the next split would put our area below the resolution, or if
240- # the point count is less than the threshold (600k) then use this
241- # tile as the work unit.
242- if next_split_x < self .resolution or next_split_y < self .resolution :
243- return [self ]
244- elif pc < target_pc :
245- return [self ]
246- elif area < res_threshold ** 2 or depth >= depth_threshold :
247- pc_per_cell = pc / (area / self .resolution ** 2 )
248- cell_estimate = ceil (target_pc / pc_per_cell )
249-
250- return self .get_leaf_children (cell_estimate )
251- else :
252- return [
253- dask .delayed (ch .filter )(
254- data ,
255- res_threshold ,
256- pc_threshold ,
257- depth_threshold ,
258- depth = depth + 1 ,
259- )
260- for ch in self .split ()
261- ]
262226
263227 def _find_dims (self , tile_size ):
264228 """
@@ -308,15 +272,13 @@ def get_leaf_children(self, tile_size):
308272 coords_list = np .array (
309273 [[* x , * y ] for x in dx for y in dy ], dtype = np .float64
310274 )
311- yield from [
312- Extents (
313- Bounds (minx , miny , maxx , maxy ),
314- self .resolution ,
315- self .alignment ,
316- self .root ,
317- )
318- for minx , maxx , miny , maxy in coords_list
319- ]
275+ for minx , maxx , miny , maxy in coords_list :
276+ yield Extents (
277+ Bounds (minx , miny , maxx , maxy ),
278+ self .resolution ,
279+ self .alignment ,
280+ self .root ,
281+ )
320282
321283 @staticmethod
322284 def from_storage (tdb_dir : str ):
0 commit comments