Skip to content

Commit 0dc171b

Browse files
committed
tweaks
1 parent df21fb1 commit 0dc171b

File tree

7 files changed

+33
-24
lines changed

7 files changed

+33
-24
lines changed

src/silvimetric/commands/scan.py

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
import dask
55
import math
66
import json
7-
import itertools
87

98
from dask.diagnostics import ProgressBar
109

@@ -52,15 +51,24 @@ def scan(
5251
extents = Extents.from_sub(tdb_dir, data.bounds)
5352
logger.info('Gathering initial chunks...')
5453
count = dask.delayed(data.estimate_count)(extents.bounds).persist()
55-
5654
cell_counts = extent_handle(
5755
extents, data, resolution, point_count, depth, log
5856
)
5957

60-
num_cells = np.sum(cell_counts).item()
61-
std = np.std(cell_counts)
62-
mean = np.mean(cell_counts)
63-
rec = int(mean)
58+
np_cell_counts = np.array(cell_counts)
59+
num_cells = np.sum(np_cell_counts).item()
60+
q1, q3 = np.percentile(np_cell_counts, [25,75])
61+
iqr = q3 - q1
62+
low_bounds = q1 - (1.5 * iqr)
63+
up_bounds = q3 + (1.5 * iqr)
64+
65+
adjusted = np_cell_counts[np_cell_counts > low_bounds]
66+
adjusted = adjusted[adjusted < up_bounds]
67+
68+
std = np.std(adjusted)
69+
mean = np.mean(adjusted)
70+
median = np.median(adjusted)
71+
rec = median
6472

6573
pc_info = dict(
6674
pc_info=dict(
@@ -75,6 +83,7 @@ def scan(
7583
num_tiles=len(cell_counts),
7684
mean=mean,
7785
std_dev=std,
86+
median=median,
7887
recommended=rec,
7988
)
8089
)

src/silvimetric/commands/shatter.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -161,11 +161,12 @@ def kill_gracefully(signum, frame):
161161
if dc is not None:
162162
processes = []
163163
count = 0
164-
for leaf_bunch in itertools.batched(leaves, consolidate_count):
164+
for leaf in leaves:
165165
count = count + 1
166-
processes.append(dc.map(do_one, leaf_bunch, config=config, storage=storage))
166+
processes.append(dc.submit(do_one, leaf, config=config, storage=storage))
167+
if count % consolidate_count == 0:
168+
processes.append(dc.submit(storage.consolidate_shatter, timestamp=config.timestamp, key=f'consolidate_{count}'))
167169

168-
processes.append(dc.submit(storage.consolidate_shatter, config.timestamp))
169170
gathered = dc.gather(processes)
170171
point_count = 0
171172
for pc in gathered:
@@ -234,9 +235,8 @@ def shatter(config: ShatterConfig) -> int:
234235
if config.tile_size is not None:
235236
leaves = extents.get_leaf_children(config.tile_size)
236237
else:
237-
leaves = extents.chunk(data)
238+
leaves = itertools.chain(extents.chunk(data))
238239

239-
leaves = itertools.chain(leaves)
240240
# Begin main operations
241241
config.log.debug('Fetching and arranging data...')
242242
storage.save_shatter_meta(config)

src/silvimetric/resources/metrics/grid_metrics.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,14 +52,14 @@ def _get_grid_metrics(elev_key='Z'):
5252

5353
# give profile_area separate pct_base so we can apply separate filters
5454
percentiles['profile_area'].attributes = [A[elev_key]]
55+
percentiles['iq'].attributes = [A[elev_key], A['Intensity']]
5556
# pct_base_copy = copy.deepcopy(pct_base)
5657
# pct_base_copy.name = 'pct_base_profile_area'
5758
# percentiles['profile_area'].dependencies = [pct_base_copy]
5859

5960
statistics['cumean'].attributes = [A[elev_key]]
6061
statistics['sqmean'].attributes = [A[elev_key]]
6162

62-
statistics['iq'].attributes = [A[elev_key], A['Intensity']]
6363
statistics['min'].attributes = [A[elev_key], A['Intensity']]
6464
statistics['max'].attributes = [A[elev_key], A['Intensity']]
6565
statistics['mode'].attributes = [A[elev_key], A['Intensity']]

src/silvimetric/resources/metrics/p_moments.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,28 +11,28 @@ def m_mean(data, *args):
1111

1212
def m_variance(data, *args):
1313
# copy FUSION's variance approach
14-
num = ((data - data.mean()) ** 2).sum()
1514
denom = (data.count() - 1)
1615
if denom == 0:
1716
return np.nan
17+
num = ((data - data.mean()) ** 2).sum()
1818
return num / denom
1919

2020

2121
def m_skewness(data, *args):
2222
# copy FUSION's approximation of skewness
23-
num = ((data - data.mean()) ** 3).sum()
2423
denom = ( (data.count() - 1) * np.std(data) ** 3)
2524
if denom == 0:
2625
return np.nan
26+
num = ((data - data.mean()) ** 3).sum()
2727
return num / denom
2828

2929

3030
def m_kurtosis(data, *args):
3131
# copy FUSION's approximation of kurtosis
32-
num = ((data - data.mean()) ** 4).sum()
3332
denom = ((data.count() - 1) * np.std(data) ** 4)
3433
if denom == 0:
3534
return np.nan
35+
num = ((data - data.mean()) ** 4).sum()
3636
return num / denom
3737

3838

src/silvimetric/resources/metrics/percentiles.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,12 @@ def m_90m10(data, *args):
7575
def m_95m05(data, *args):
7676
return args[0][13] - args[0][1]
7777

78+
def m_iq(data, *args):
79+
q1 = args[0][4]
80+
q3 = args[0][10]
81+
return q3 - q1
82+
83+
7884
def m_profile_area(data, *args):
7985
# sanity check...must have valid heights/elevations
8086
p = np.percentile(data, range(0,100)).tolist()
@@ -125,4 +131,5 @@ def m_profile_area(data, *args):
125131
percentiles['p99'] = Metric('p99', np.float32, m_p99, [pct_base])
126132
percentiles['90m10'] = Metric('90m10', np.float32, m_90m10, [pct_base])
127133
percentiles['95m05'] = Metric('95m05', np.float32, m_95m05, [pct_base])
128-
percentiles['profile_area'] = Metric('profile_area', np.float32, m_profile_area)
134+
percentiles['profile_area'] = Metric('profile_area', np.float32, m_profile_area)
135+
percentiles['iq'] = Metric('iq', np.float32, m_iq, [pct_base])

src/silvimetric/resources/metrics/stats.py

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -42,11 +42,6 @@ def m_cv(data, *args):
4242
return np.nan
4343
return stddev / mean
4444

45-
46-
def m_iq(data):
47-
return stats.iqr(data)
48-
49-
5045
def m_crr(data, *args):
5146
mean, minimum, maximum = args
5247
den = maximum - minimum
@@ -80,7 +75,6 @@ def m_mad_mode(data, *args):
8075
maximum = Metric('max', np.float32, m_max)
8176
stddev = Metric('stddev', np.float32, m_stddev)
8277
cv = Metric('cv', np.float32, m_cv, [stddev, mean])
83-
iq = Metric('iq', np.float32, m_iq)
8478
crr = Metric('canopy_relief_ratio', np.float32, m_crr, [mean, minimum, maximum])
8579
sqmean = Metric('sqmean', np.float32, m_sqmean)
8680
cumean = Metric('cumean', np.float32, m_cumean)
@@ -92,7 +86,6 @@ def m_mad_mode(data, *args):
9286
max=maximum,
9387
stddev=stddev,
9488
cv=cv,
95-
iq=iq,
9689
crr=crr,
9790
sqmean=sqmean,
9891
cumean=cumean,

tests/test_data.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@ def test_chunking(
8989
autzen_storage.alignment,
9090
autzen_storage.root,
9191
)
92-
chs1 = ex.chunk(autzen_data, pc_threshold=600000)
92+
chs1 = ex.chunk(autzen_data, pc_threshold=100000)
9393
chs2 = ex.get_leaf_children(50)
9494
for c in chs1:
9595
assert isinstance(c, Extents)

0 commit comments

Comments
 (0)