Skip to content

Commit 902f46e

Browse files
authored
1.4.1 (#123)
* 1.4.0 * Maintenance work and Extraction update * add start_datetime and end_datetime to attributes in tiledb * bump version * adjust time test
1 parent cf19081 commit 902f46e

25 files changed

+391
-235
lines changed

src/silvimetric/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
__version__ = '1.4.0'
1+
__version__ = '1.4.1'
22

33
from .resources.bounds import Bounds
44
from .resources.extents import Extents

src/silvimetric/cli/cli.py

Lines changed: 31 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -110,23 +110,22 @@ def cli(
110110
)
111111
@click.option(
112112
'--history',
113-
type=bool,
114113
is_flag=True,
115-
default=False,
116114
help='Show the history section of the output.',
117115
)
118116
@click.option(
119117
'--metadata',
120-
type=bool,
121118
is_flag=True,
122-
default=False,
123119
help='Show the metadata section of the output.',
124120
)
121+
@click.option(
122+
'--metrics',
123+
is_flag=True,
124+
help='Show the metrics section of the output.',
125+
)
125126
@click.option(
126127
'--attributes',
127-
type=bool,
128128
is_flag=True,
129-
default=False,
130129
help='Show the attributes section of the output.',
131130
)
132131
@click.option(
@@ -144,7 +143,9 @@ def cli(
144143
'--name', type=str, default=None, help='Select processes with this name'
145144
)
146145
@click.pass_obj
147-
def info_cmd(app, bounds, date, dates, name, history, metadata, attributes):
146+
def info_cmd(
147+
app, bounds, date, dates, name, history, metadata, attributes, metrics
148+
):
148149
import json
149150

150151
if date is not None and dates is not None:
@@ -164,14 +165,27 @@ def info_cmd(app, bounds, date, dates, name, history, metadata, attributes):
164165
concise=True,
165166
)
166167

167-
if any([history, metadata, attributes]):
168+
ms = [
169+
{
170+
'name': v['name'],
171+
'dtype': v['dtype'],
172+
'dependencies': [dep['name'] for dep in v['dependencies']],
173+
}
174+
for v in i['metadata']['metrics']
175+
]
176+
177+
i['metadata'].pop('metrics')
178+
# print(metrics.keys())
179+
if any([history, metadata, attributes, metrics]):
168180
filtered = {}
169181
if history:
170182
filtered['history'] = i['history']
171183
if metadata:
172184
filtered['metadata'] = i['metadata']
173185
if attributes:
174186
filtered['attributes'] = i['attributes']
187+
if metrics:
188+
filtered['metrics'] = ms
175189

176190
app.log.info(json.dumps(filtered, indent=2))
177191

@@ -211,7 +225,6 @@ def scan_cmd(
211225
app.workers,
212226
app.threads,
213227
app.watch,
214-
app.log,
215228
)
216229
return scan.scan(
217230
app.tdb_dir,
@@ -269,7 +282,7 @@ def initialize_cmd(
269282
attributes: list[Attribute],
270283
resolution: float,
271284
metrics: list[Metric],
272-
alignment: str
285+
alignment: str,
273286
):
274287
"""Initialize silvimetrics DATABASE"""
275288

@@ -281,7 +294,7 @@ def initialize_cmd(
281294
attrs=attributes,
282295
metrics=metrics,
283296
resolution=resolution,
284-
alignment=alignment
297+
alignment=alignment,
285298
)
286299
return initialize.initialize(storageconfig)
287300

@@ -333,7 +346,6 @@ def shatter_cmd(app, pointcloud, bounds, report, tilesize, date, dates):
333346
app.workers,
334347
app.threads,
335348
app.watch,
336-
app.log,
337349
)
338350

339351
if date is not None and dates is not None:
@@ -404,14 +416,13 @@ def extract_cmd(app, attributes, metrics, outdir, bounds):
404416

405417
# TODO only allow metrics and attributes to be added if they're present
406418
# in the storage config.
407-
dask_handle(
408-
app.dasktype,
409-
app.scheduler,
410-
app.workers,
411-
app.threads,
412-
app.watch,
413-
app.log,
414-
)
419+
# dask_handle(
420+
# app.dasktype,
421+
# app.scheduler,
422+
# app.workers,
423+
# app.threads,
424+
# app.watch,
425+
# )
415426

416427
config = ExtractConfig(
417428
tdb_dir=app.tdb_dir,

src/silvimetric/cli/common.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,7 @@ def convert(self, value, param, ctx) -> list[Metric]:
136136
elif val == 'aad':
137137
metrics.update(list(aad.aad.values()))
138138
elif val == 'grid_metrics':
139-
metrics.update(list(grid_metrics.values()))
139+
metrics.update(list(grid_metrics.get_grid_metrics().values()))
140140
elif val == 'all':
141141
metrics.update(list(all_metrics.values()))
142142
else:
@@ -158,7 +158,6 @@ def dask_handle(
158158
workers: int,
159159
threads: int,
160160
watch: bool,
161-
log: Log,
162161
) -> None:
163162
dask_config = {}
164163

src/silvimetric/commands/extract.py

Lines changed: 72 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,15 @@
11
from pathlib import Path
2-
from itertools import chain
3-
42

3+
from dask.diagnostics import ProgressBar
4+
from distributed.client import _get_global_client as get_client
55
from typing_extensions import Union
66
from osgeo import gdal, osr
77
import dask
88
import numpy as np
99
import pandas as pd
1010

1111

12-
from .. import Storage, Extents, ExtractConfig
12+
from .. import Storage, Extents, ExtractConfig, Bounds, Graph
1313

1414
np_to_gdal_types = {
1515
np.dtype(np.byte).str: gdal.GDT_Byte,
@@ -27,8 +27,7 @@
2727

2828

2929
def write_tif(
30-
xsize: int,
31-
ysize: int,
30+
bounds: Bounds,
3231
data: np.ndarray,
3332
nan_val: float | int,
3433
name: str,
@@ -48,13 +47,14 @@ def write_tif(
4847
crs = config.crs
4948
srs = osr.SpatialReference()
5049
srs.ImportFromWkt(crs.to_wkt())
51-
b = config.bounds
50+
minx, miny, maxx, maxy = bounds.get()
51+
ysize, xsize = data.shape
5252

5353
transform = [
54-
b.minx,
54+
minx,
5555
config.resolution,
5656
0,
57-
b.maxy,
57+
maxy,
5858
0,
5959
-1 * config.resolution,
6060
]
@@ -70,8 +70,8 @@ def write_tif(
7070
)
7171
tif.SetGeoTransform(transform)
7272
tif.SetProjection(srs.ExportToWkt())
73-
tif.GetRasterBand(1).WriteArray(data)
7473
tif.GetRasterBand(1).SetNoDataValue(nan_val)
74+
tif.GetRasterBand(1).WriteArray(data)
7575
tif.FlushCache()
7676
tif = None
7777

@@ -98,17 +98,20 @@ def expl(x):
9898

9999
# set index so we can apply to the whole dataset without needing to skip X
100100
# and Y then reset in the index because that's what metric.do expects
101-
exploded = data_in.set_index(['X', 'Y']).apply(expl)[attrs].reset_index()
102-
metric_data = dask.persist(
103-
*[m.do(exploded) for m in storage.config.metrics]
104-
)
101+
data_in = data_in.set_index(['Y', 'X'])
102+
exploded = data_in.apply(expl)[attrs].reset_index()
105103

106-
data_out = data_in.set_index(['X', 'Y']).join([m for m in metric_data])
107-
return data_out
104+
exploded.rename(columns={'X': 'xi', 'Y': 'yi'}, inplace=True)
105+
graph = Graph(storage.config.metrics)
106+
metric_data = graph.run(exploded)
107+
#rename index from xi,yi to X,Y
108+
metric_data.index = metric_data.index.rename(['Y','X'])
109+
110+
return metric_data
108111

109112

110113
def handle_overlaps(
111-
config: ExtractConfig, storage: Storage, indices: np.ndarray
114+
config: ExtractConfig, storage: Storage, extents: Extents
112115
) -> pd.DataFrame:
113116
"""
114117
Handle cells that have overlapping data. We have to re-run metrics over
@@ -124,10 +127,10 @@ def handle_overlaps(
124127
ma_list = storage.getDerivedNames()
125128
att_list = [a.name for a in config.attrs]
126129

127-
minx = indices['x'].min()
128-
maxx = indices['x'].max()
129-
miny = indices['y'].min()
130-
maxy = indices['y'].max()
130+
minx = extents.x1
131+
maxx = extents.x2
132+
miny = extents.y1
133+
maxy = extents.y2
131134

132135
att_meta = {}
133136
att_meta['X'] = np.int32
@@ -137,44 +140,43 @@ def handle_overlaps(
137140
att_meta[a.name] = a.dtype
138141

139142
with storage.open('r') as tdb:
140-
# TODO this can be more efficient. Use count to find indices, then work
141-
# with that smaller set from there. Working as is for now, but slow.
142-
dit = tdb.query(
143-
attrs=[*att_list, *ma_list],
143+
storage.config.log.info('Looking for overlaps...')
144+
data = tdb.query(
145+
attrs=[*ma_list],
144146
order='F',
145147
coords=True,
146-
return_incomplete=True,
147-
use_arrow=False,
148148
).df[minx:maxx, miny:maxy]
149-
data = pd.DataFrame()
150-
151-
storage.config.log.info('Collecting database information...')
152-
for d in dit:
153-
if data.empty:
154-
data = d
155-
else:
156-
data = pd.concat([data, d])
149+
data = data
157150

158151
# find values that are not unique, means they have multiple entries
159-
data = data.set_index(['X', 'Y'])
152+
data = data.set_index(['Y', 'X'])
160153
redo_indices = data.index[data.index.duplicated(keep='first')]
161154
if redo_indices.empty:
162-
return data.reset_index()
155+
storage.config.log.info('No overlapping data. Continuing...')
156+
return data
163157

164-
# data with overlaps
165158
redo_data = (
166-
data.loc[redo_indices][att_list]
167-
.groupby(['X', 'Y'])
168-
.agg(lambda x: list(chain(*x)))
159+
tdb.query(
160+
attrs=[*att_list],
161+
order='F',
162+
coords=True,
163+
use_arrow=False,
164+
)
165+
.df[:, :]
166+
.set_index(['Y', 'X'])
169167
)
168+
169+
# data with overlaps
170+
redo_data = redo_data.loc[redo_indices]
171+
170172
# data that has no overlaps
171173
clean_data = data.loc[data.index[~data.index.duplicated(False)]]
172174

173175
storage.config.log.warning(
174176
'Overlapping data detected. Rerunning metrics over these cells...'
175177
)
176178
new_metrics = get_metrics(redo_data.reset_index(), storage)
177-
return pd.concat([clean_data, new_metrics]).reset_index()
179+
return pd.concat([clean_data, new_metrics])
178180

179181

180182
def extract(config: ExtractConfig) -> None:
@@ -197,28 +199,43 @@ def extract(config: ExtractConfig) -> None:
197199
storage.config.alignment,
198200
root=root_bounds,
199201
)
200-
i = e.get_indices()
201-
xsize = e.x2
202-
ysize = e.y2
203202

204203
# figure out if there are any overlaps and handle them
205-
final = handle_overlaps(config, storage, i).sort_values(['Y', 'X'])
204+
final = handle_overlaps(config, storage, e)
205+
206+
xis = final.index.get_level_values(1).astype(np.int64)
207+
yis = final.index.get_level_values(0).astype(np.int64)
208+
new_idx = pd.MultiIndex.from_product(
209+
(range(yis.min(), yis.max() + 1), range(xis.min(), xis.max() + 1))
210+
).rename(['Y','X'])
211+
final = final.reindex(new_idx)
212+
213+
xs = root_bounds.minx + xis * config.resolution
214+
ys = root_bounds.maxy - yis * config.resolution
215+
final_bounds = Bounds(xs.min(), ys.min(), xs.max(), ys.max())
206216

207217
# output metric data to tifs
208218
config.log.info(f'Writing rasters to {config.out_dir}')
219+
futures = []
209220
for ma in ma_list:
210221
# TODO should output in sections so we don't run into memory problems
211222
dtype = final[ma].dtype
212-
if dtype.kind in ['u', 'i']:
213-
nan_val = np.iinfo(dtype).max
214-
elif dtype.kind == 'f':
215-
nan_val = np.nan
223+
if dtype.kind == 'u':
224+
nan_val = 0
225+
elif dtype.kind in ['i', 'f']:
226+
nan_val = -9999
216227
else:
217-
raise ValueError('Invalid Raster data type {dtype}.')
228+
nan_val = 0
229+
unstacked = final[ma].unstack()
230+
m_data = unstacked.to_numpy()
218231

219-
m_data = np.full(shape=(ysize, xsize), fill_value=nan_val, dtype=dtype)
220-
a = final[['X', 'Y', ma]].to_numpy()
221-
for x, y, md in a[:]:
222-
m_data[int(y)][int(x)] = md
232+
futures.append(
233+
dask.delayed(write_tif)(final_bounds, m_data, nan_val, ma, config)
234+
)
223235

224-
write_tif(xsize, ysize, m_data, nan_val, ma, config)
236+
dc = get_client()
237+
if dc is not None:
238+
dask.compute(*futures)
239+
else:
240+
with ProgressBar():
241+
dask.compute(*futures)

src/silvimetric/commands/info.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,5 +99,10 @@ def info(
9999
info['history'] = history
100100
except KeyError:
101101
history = {}
102+
# remove unnecessary keys for printing to logs
103+
if concise:
104+
info['metadata'].pop('attrs')
105+
info['metadata'].pop('debug')
106+
info['metadata'].pop('log')
102107

103108
return info

0 commit comments

Comments
 (0)