Skip to content

Commit 004b506

Browse files
authored
Adjustments for large datasets (#124)
* changes to cli to better accept list of attributes and cover grid_metrics generation better * pinning pdal python version * removing duplicates * dense working * adjusting tests now that we're no longer accepting duplicates in storage * change tdb_dir to storage parameter in info so we don't remake a storage if it's not necessary * moving extraction to use latest input for arrays with duplicate values * update gitignore to include vscode workspace files * dense working with configs, shatter, commands, extract, info * removing vacuum after every extent, fixing index error from pdal python return * updating environment to latest tiledb for mem fix * debugged tiledb and bad access problems * changes for tiling according to tiledb accurately * move deletion to overwriting mode * fix method usage in test_remote_creation * removing itertools.batched for now so that sm works with python3.11 * using np.isclose to better compare floats * pin jupyter-book to <2.0.0
1 parent 902f46e commit 004b506

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

45 files changed

+1735
-1343
lines changed

.github/workflows/main.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ jobs:
2626
fail-fast: false
2727
matrix:
2828
os: [ubuntu-latest, windows-latest, macos-latest]
29-
python-version: ['3.10', '3.11', '3.12']
29+
python-version: ['3.11', '3.12', '3.13']
3030
steps:
3131
- name: Checkout
3232
uses: actions/checkout@v4

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,3 +34,4 @@ autzen-aligned.tdb/
3434
.DS_Store
3535

3636
docs/source/conf.py
37+
*.code-workspace

docs/doc-environment.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ channels:
22
- conda-forge
33

44
dependencies:
5-
- jupyter-book
5+
- jupyter-book<2.0.0
66
- sphinx > 6
77
- sphinx-notfound-page
88
- python>=3.10, <3.13

environment.yml

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,16 @@
1-
name: silvimetric
1+
name: pdal-sm
22
channels:
33
- conda-forge
44
dependencies:
5-
- python>=3.10, <3.13
5+
- python>=3.11, <3.14
66
- pip
7-
- tiledb-py>=0.32.5
8-
- pdal
7+
- cloudpickle>=3.1.2
8+
- boto3
9+
- tiledb>=2.29.2
10+
- tiledb-py>=0.35.2
11+
- libpdal-core>=2.9.2
912
- python-pdal
1013
- numpy
11-
- shapely
1214
- dask>=2024.11.2
1315
- pyproj
1416
- gdal
@@ -17,4 +19,5 @@ dependencies:
1719
- python-json-logger
1820
- dill
1921
- pandas
20-
- typing_extensions
22+
- typing_extensions
23+
- fast-histogram

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ max-line-length = 80
6161
[tool.ruff.lint.per-file-ignores]
6262
"__init__.py" = ["E402", "F401"]
6363
"**/{tests,docs,tools}/*" = ["E402"]
64+
"**/{tests}/*" = ["D101"]
6465

6566
[tool.setuptools]
6667
package-dir = {"" = "src"}

src/silvimetric/cli/cli.py

Lines changed: 32 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -30,9 +30,6 @@
3030
@click.option(
3131
'--log-dir', default=None, help='Directory for log output', type=str
3232
)
33-
@click.option(
34-
'--progress', is_flag=True, default=True, type=bool, help='Report progress'
35-
)
3633
@click.option('--workers', type=int, help='Number of workers for Dask')
3734
@click.option(
3835
'--threads', type=int, help='Number of threads per worker for Dask'
@@ -66,7 +63,6 @@ def cli(
6663
database,
6764
debug,
6865
log_dir,
69-
progress,
7066
dasktype,
7167
scheduler,
7268
workers,
@@ -88,7 +84,6 @@ def cli(
8884
tdb_dir=database,
8985
log=log,
9086
debug=debug,
91-
progress=progress,
9287
scheduler=scheduler,
9388
dasktype=dasktype,
9489
workers=workers,
@@ -155,12 +150,15 @@ def info_cmd(
155150

156151
start_date = dates[0] if dates else date
157152
end_date = dates[1] if dates else date
153+
if start_date is None and end_date is None:
154+
info_dates=None
155+
else:
156+
info_dates = tuple(start_date, end_date)
158157

159158
i = info.info(
160159
app.tdb_dir,
161160
bounds=bounds,
162-
start_time=start_date,
163-
end_time=end_date,
161+
dates = info_dates,
164162
name=name,
165163
concise=True,
166164
)
@@ -175,7 +173,6 @@ def info_cmd(
175173
]
176174

177175
i['metadata'].pop('metrics')
178-
# print(metrics.keys())
179176
if any([history, metadata, attributes, metrics]):
180177
filtered = {}
181178
if history:
@@ -187,10 +184,10 @@ def info_cmd(
187184
if metrics:
188185
filtered['metrics'] = ms
189186

190-
app.log.info(json.dumps(filtered, indent=2))
187+
print(json.dumps(filtered, indent=2))
191188

192189
else:
193-
app.log.info(json.dumps(i, indent=2))
190+
print(json.dumps(i, indent=2))
194191
return
195192

196193

@@ -199,13 +196,6 @@ def info_cmd(
199196
@click.option(
200197
'--resolution', type=float, default=100, help='Summary pixel resolution'
201198
)
202-
@click.option(
203-
'--filter_empty',
204-
is_flag=True,
205-
type=bool,
206-
default=False,
207-
help='Remove empty space in computation. Will take extra time.',
208-
)
209199
@click.option(
210200
'--point_count', type=int, default=600000, help='Point count threshold.'
211201
)
@@ -215,7 +205,7 @@ def info_cmd(
215205
)
216206
@click.pass_obj
217207
def scan_cmd(
218-
app, resolution, point_count, pointcloud, bounds, depth, filter_empty
208+
app, resolution, point_count, pointcloud, bounds, depth
219209
):
220210
"""Scan point cloud, output information on it, and determine the optimal
221211
tile size."""
@@ -233,7 +223,6 @@ def scan_cmd(
233223
point_count,
234224
resolution,
235225
depth,
236-
filter_empty,
237226
log=app.log,
238227
)
239228

@@ -254,9 +243,9 @@ def scan_cmd(
254243
@click.option(
255244
'--attributes',
256245
'-a',
257-
multiple=True,
258246
type=AttrParamType(),
259-
help='List of attributes to include in Database',
247+
default=[],
248+
help='List of attributes to include in Database, eg. -a Z,Intensity',
260249
)
261250
@click.option(
262251
'--metrics',
@@ -268,6 +257,12 @@ def scan_cmd(
268257
@click.option(
269258
'--resolution', type=float, default=30.0, help='Summary pixel resolution'
270259
)
260+
@click.option(
261+
'--xsize', type=float, default=1000, help='TileDB X Tile size.'
262+
)
263+
@click.option(
264+
'--ysize', type=float, default=1000, help='TileDB Y Tile size.'
265+
)
271266
@click.option(
272267
'--alignment',
273268
type=str,
@@ -283,6 +278,8 @@ def initialize_cmd(
283278
resolution: float,
284279
metrics: list[Metric],
285280
alignment: str,
281+
xsize: int,
282+
ysize: int
286283
):
287284
"""Initialize silvimetrics DATABASE"""
288285

@@ -295,6 +292,8 @@ def initialize_cmd(
295292
metrics=metrics,
296293
resolution=resolution,
297294
alignment=alignment,
295+
xsize=xsize,
296+
ysize=ysize
298297
)
299298
return initialize.initialize(storageconfig)
300299

@@ -376,7 +375,7 @@ def shatter_cmd(app, pointcloud, bounds, report, tilesize, date, dates):
376375
report_path = f'reports/{config.name}.html'
377376
with performance_report(report_path):
378377
shatter.shatter(config)
379-
print(f'Writing report to {report_path}.')
378+
app.log.debug(f'Writing report to {report_path}.')
380379
else:
381380
shatter.shatter(config)
382381

@@ -385,10 +384,9 @@ def shatter_cmd(app, pointcloud, bounds, report, tilesize, date, dates):
385384
@click.option(
386385
'--attributes',
387386
'-a',
388-
multiple=True,
389387
type=AttrParamType(),
390388
default=[],
391-
help='List of attributes to include output',
389+
help='List of attributes to include output, eg -a Z,Intensity',
392390
)
393391
@click.option(
394392
'--metrics',
@@ -414,15 +412,13 @@ def shatter_cmd(app, pointcloud, bounds, report, tilesize, date, dates):
414412
def extract_cmd(app, attributes, metrics, outdir, bounds):
415413
"""Extract silvimetric metrics from DATABASE"""
416414

417-
# TODO only allow metrics and attributes to be added if they're present
418-
# in the storage config.
419-
# dask_handle(
420-
# app.dasktype,
421-
# app.scheduler,
422-
# app.workers,
423-
# app.threads,
424-
# app.watch,
425-
# )
415+
dask_handle(
416+
app.dasktype,
417+
app.scheduler,
418+
app.workers,
419+
app.threads,
420+
app.watch,
421+
)
426422

427423
config = ExtractConfig(
428424
tdb_dir=app.tdb_dir,
@@ -445,7 +441,7 @@ def extract_cmd(app, attributes, metrics, outdir, bounds):
445441
)
446442
@click.pass_obj
447443
def delete_cmd(app, task_id):
448-
manage.delete(tdb_dir=app.tdb_dir, name=task_id, log=app.log)
444+
manage.delete(storage=app.tdb_dir, name=task_id, log=app.log)
449445

450446

451447
@cli.command('restart')
@@ -458,7 +454,7 @@ def delete_cmd(app, task_id):
458454
)
459455
@click.pass_obj
460456
def restart_cmd(app, task_id):
461-
manage.restart(tdb_dir=app.tdb_dir, name=task_id, log=app.log)
457+
manage.restart(storage=app.tdb_dir, name=task_id, log=app.log)
462458

463459

464460
@cli.command('resume')
@@ -471,7 +467,7 @@ def restart_cmd(app, task_id):
471467
)
472468
@click.pass_obj
473469
def resume_cmd(app, task_id):
474-
manage.resume(tdb_dir=app.tdb_dir, name=task_id, log=app.log)
470+
manage.resume(storage=app.tdb_dir, name=task_id, log=app.log)
475471

476472

477473
if __name__ == '__main__':

0 commit comments

Comments
 (0)