Skip to content

Commit b9f8b46

Browse files
committed
fixing chunking
1 parent 9e83cd3 commit b9f8b46

File tree

2 files changed

+19
-35
lines changed

2 files changed

+19
-35
lines changed

src/silvimetric/commands/shatter.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@
1111

1212
from dask.delayed import delayed
1313
import dask.bag as db
14-
from dask.diagnostics import ProgressBar
1514
from dask import compute, persist
1615
from dask.distributed import futures_of, as_completed
1716

src/silvimetric/resources/extents.py

Lines changed: 19 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
import math
22
import numpy as np
3+
import itertools
34

5+
from dask.distributed import get_client
46
from dask.delayed import delayed, Delayed
57
from dask import compute
68

@@ -97,48 +99,31 @@ def chunk(
9799
pc_threshold=600000,
98100
):
99101
"""
100-
Split up a dataset into tiles based on the given thresholds. Unlike Scan
101-
this will filter out any tiles that contain no points.
102+
Split up a dataset into tiles based on the point threshold. Unlike Scan
103+
this will not stop at a specific depth, but keep going until finding
104+
nodes that fit the point count threshold.
102105
103106
:param data: Incoming Data object to oeprate on.
104-
:param res_threshold: Resolution threshold., defaults to 100
105107
:param pc_threshold: Point count threshold., defaults to 600000
106-
:param depth_threshold: Tree depth threshold., defaults to 6
107108
:return: Return list of Extents that fit the criteria
108109
"""
109-
if self.root is not None:
110-
base_bbox = self.root.get()
111-
r = self.root
112-
else:
113-
base_bbox = self.bounds.get()
114-
r = self.bounds
115-
116-
bminx = base_bbox[0]
117-
bmaxy = base_bbox[3]
110+
def add_lists(l1, l2):
111+
return list(itertools.chain(l1,l2))
118112

119-
# make bounds in scale with the desired resolution
120-
minx = bminx + (self.x1 * self.resolution)
121-
maxx = bminx + (self.x2 * self.resolution)
122-
123-
miny = bmaxy - (self.y2 * self.resolution)
124-
maxy = bmaxy - (self.y1 * self.resolution)
125-
126-
chunk = Extents(
127-
Bounds(minx, miny, maxx, maxy), self.resolution, self.alignment, r
128-
)
113+
data = delayed(data)
114+
pc_threshold = delayed(pc_threshold)
115+
tasks = [self.filter(data, pc_threshold)]
129116

130-
if self.bounds == self.root:
131-
self.root = chunk.bounds
132-
tasks = [chunk.filter(data, pc_threshold)]
133117
while tasks:
134-
tasks = compute(*tasks)
135-
nt = []
136-
for t in tasks:
137-
if isinstance(t, Extents):
138-
yield t
139-
else:
140-
nt = nt + t
141-
tasks = nt
118+
batches = list(itertools.batched(tasks, 1000))
119+
for batch in batches:
120+
results = compute(batch)[0]
121+
tasks = []
122+
for r in results:
123+
if isinstance(r, Extents):
124+
yield r
125+
else:
126+
tasks = tasks + r
142127

143128
@delayed
144129
def filter(

0 commit comments

Comments
 (0)