Skip to content

Commit 88f548e

Browse files
authored
implement #129, add pandas DataFrame i/o for convenience (#130)
1 parent ee07fc2 commit 88f548e

File tree

3 files changed

+62
-4
lines changed

3 files changed

+62
-4
lines changed

.github/environment.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,3 +8,4 @@ dependencies:
88
- pdal
99
- pytest
1010
- meshio
11+
- pandas

pdal/pipeline.py

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,11 @@
1111
except ModuleNotFoundError: # pragma: no cover
1212
Mesh = None
1313

14+
try:
15+
from pandas import DataFrame
16+
except ModuleNotFoundError: # pragma: no cover
17+
DataFrame = None
18+
1419
from . import drivers, libpdalpython
1520

1621
LogLevelToPDAL = {
@@ -28,13 +33,19 @@ def __init__(
2833
spec: Union[None, str, Sequence[Stage]] = None,
2934
arrays: Sequence[np.ndarray] = (),
3035
loglevel: int = logging.ERROR,
31-
json: Optional[str] = None
36+
json: Optional[str] = None,
37+
dataframes: Sequence[DataFrame] = (),
3238
):
39+
3340
if json:
3441
if spec and json:
3542
raise ValueError("provide 'spec' or 'json' arguments, not both")
3643
spec = json
3744

45+
# Convert our data frames to Numpy Structured Arrays
46+
if dataframes:
47+
arrays = [df.to_records() for df in dataframes]
48+
3849
super().__init__()
3950
self._stages: List[Stage] = []
4051
if spec:
@@ -112,6 +123,13 @@ def get_meshio(self, idx: int) -> Optional[Mesh]:
112123
[("triangle", np.stack((mesh["A"], mesh["B"], mesh["C"]), 1))],
113124
)
114125

126+
127+
def get_dataframe(self, idx: int) -> Optional[DataFrame]:
128+
if DataFrame is None:
129+
raise RuntimeError("Pandas support requires Pandas to be installed")
130+
131+
return DataFrame(self.arrays[idx])
132+
115133
def _get_json(self) -> str:
116134
return self.toJSON()
117135

test/test_pipeline.py

Lines changed: 42 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -344,7 +344,7 @@ def test_logging_filters_python(self, filename):
344344
reason="filters.python PDAL plugin is not available",
345345
)
346346
def test_filters_python(self):
347-
r = pdal.Reader("test/data/autzen-utm.las")
347+
r = pdal.Reader(os.path.join(DATADIRECTORY,"autzen-utm.las"))
348348
f = pdal.Filter.python(script=__file__, function="a_filter", module="anything")
349349
count = (r | f).execute()
350350
assert count == 1065
@@ -364,14 +364,14 @@ def test_only_readers(self):
364364
np.testing.assert_array_equal(np.concatenate([array1, array1]), array2)
365365

366366
def test_quickinfo(self):
367-
r = pdal.Reader("test/data/autzen-utm.las")
367+
r = pdal.Reader(os.path.join(DATADIRECTORY,"autzen-utm.las"))
368368
p = r.pipeline()
369369
info = p.quickinfo
370370
assert 'readers.las' in info.keys()
371371
assert info['readers.las']['num_points'] == 1065
372372

373373
def test_jsonkwarg(self):
374-
pipeline = pdal.Reader("test/data/autzen-utm.las").pipeline().toJSON()
374+
pipeline = pdal.Reader(os.path.join(DATADIRECTORY,"autzen-utm.las")).pipeline().toJSON()
375375
r = pdal.Pipeline(json=pipeline)
376376
p = r.pipeline
377377
assert 'readers.las' in p
@@ -487,6 +487,45 @@ def test_meshio(self, filename):
487487
assert len(triangles) == 134
488488
assert triangles[0][0] == 29
489489

490+
class TestDataFrame:
491+
492+
@pytest.mark.skipif(
493+
not pdal.pipeline.DataFrame,
494+
reason="pandas is not available",
495+
)
496+
def test_fetch(self):
497+
r = pdal.Reader(os.path.join(DATADIRECTORY,"autzen-utm.las"))
498+
p = r.pipeline()
499+
p.execute()
500+
df = p.get_dataframe(0)
501+
assert df.size == 17040
502+
503+
def test_load(self):
504+
r = pdal.Reader(os.path.join(DATADIRECTORY,"autzen-utm.las"))
505+
p = r.pipeline()
506+
p.execute()
507+
data = p.arrays[0]
508+
df = pdal.pipeline.DataFrame
509+
dataframes = [df(data), df(data), df(data)]
510+
filter_intensity = """{
511+
"pipeline":[
512+
{
513+
"type":"filters.range",
514+
"limits":"Intensity[100:300)"
515+
}
516+
]
517+
}"""
518+
p = pdal.Pipeline(filter_intensity, dataframes = dataframes)
519+
p.execute()
520+
arrays = p.arrays
521+
assert len(arrays) == 3
522+
523+
# We copied the array three times. Sum the Intensity values
524+
# post filtering to see if we had our intended effect
525+
for data in arrays:
526+
assert len(data) == 387
527+
assert data["Intensity"].sum() == 57684
528+
490529

491530
class TestPipelineIterator:
492531
@pytest.mark.parametrize("filename", ["sort.json", "sort.py"])

0 commit comments

Comments
 (0)