Skip to content

Commit f858350

Browse files
jf-geohobu
andauthored
Add GeoDataFrame support to Pipeline (#173)
* Added GeoDataFrame support to pipeline.py Added basic GeoPandas GeoDataFrame support. If GeoPandas is installed users can read an array from an executed pipeline and return a GeoDataFrame, with optional arguments for XY vs XYZ point and CRS. DataFrames passed to the Pipeline constructor will drop the "geometry" column if present. * Update test_pipeline.py Added test for GeoDataFrames * add geopandas to environment reqs --------- Co-authored-by: Howard Butler <[email protected]>
1 parent b83d78d commit f858350

File tree

3 files changed

+80
-3
lines changed

3 files changed

+80
-3
lines changed

.github/environment.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,4 +9,4 @@ dependencies:
99
- pdal
1010
- pytest
1111
- meshio
12-
- pandas
12+
- geopandas

src/pdal/pipeline.py

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,11 @@
1717
except ModuleNotFoundError: # pragma: no cover
1818
DataFrame = None
1919

20+
try:
21+
from geopandas import GeoDataFrame, points_from_xy
22+
except ModuleNotFoundError: # pragma: no cover
23+
GeoDataFrame = points_from_xy = None
24+
2025
from . import drivers, libpdalpython
2126

2227
LogLevelToPDAL = {
@@ -45,7 +50,7 @@ def __init__(
4550

4651
# Convert our data frames to Numpy Structured Arrays
4752
if dataframes:
48-
arrays = [df.to_records() for df in dataframes]
53+
arrays = [df.to_records() if not "geometry" in df.columns else df.drop(columns=["geometry"]).to_records() for df in dataframes]
4954

5055
super().__init__()
5156
self._stages: List[Stage] = []
@@ -124,13 +129,26 @@ def get_meshio(self, idx: int) -> Optional[Mesh]:
124129
[("triangle", np.stack((mesh["A"], mesh["B"], mesh["C"]), 1))],
125130
)
126131

127-
128132
def get_dataframe(self, idx: int) -> Optional[DataFrame]:
129133
if DataFrame is None:
130134
raise RuntimeError("Pandas support requires Pandas to be installed")
131135

132136
return DataFrame(self.arrays[idx])
133137

138+
def get_geodataframe(self, idx: int, xyz: bool=False, crs: Any=None) -> Optional[GeoDataFrame]:
139+
if GeoDataFrame is None:
140+
raise RuntimeError("GeoPandas support requires GeoPandas to be installed")
141+
df = DataFrame(self.arrays[idx])
142+
coords = [df["X"], df["Y"], df["Z"]] if xyz else [df["X"], df["Y"]]
143+
geometry = points_from_xy(*coords)
144+
gdf = GeoDataFrame(
145+
df,
146+
geometry=geometry,
147+
crs=crs,
148+
)
149+
df = coords = geometry = None
150+
return gdf
151+
134152
def _get_json(self) -> str:
135153
return self.toJSON()
136154

test/test_pipeline.py

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -541,6 +541,65 @@ def test_load(self):
541541
assert data["Intensity"].sum() == 57684
542542

543543

544+
class TestGeoDataFrame:
545+
546+
@pytest.mark.skipif(
547+
not pdal.pipeline.GeoDataFrame,
548+
reason="geopandas is not available",
549+
)
550+
def test_fetch(self):
551+
r = pdal.Reader(os.path.join(DATADIRECTORY,"autzen-utm.las"))
552+
p = r.pipeline()
553+
p.execute()
554+
record_count = p.arrays[0].shape[0]
555+
dimension_count = len(p.arrays[0].dtype)
556+
gdf = p.get_geodataframe(0)
557+
gdf_xyz = p.get_geodataframe(0, xyz=True)
558+
gdf_crs = p.get_geodataframe(0, crs="EPSG:4326")
559+
assert len(gdf) == record_count
560+
assert len(gdf.columns) == dimension_count + 1
561+
assert isinstance(gdf, pdal.pipeline.GeoDataFrame)
562+
assert gdf.geometry.is_valid.all()
563+
assert not gdf.geometry.is_empty.any()
564+
assert gdf.crs is None
565+
assert gdf.geometry.z.isna().all()
566+
assert not gdf_xyz.geometry.z.isna().any()
567+
assert gdf_crs.crs.srs == "EPSG:4326"
568+
569+
@pytest.mark.skipif(
570+
not pdal.pipeline.GeoDataFrame,
571+
reason="geopandas is not available",
572+
)
573+
def test_load(self):
574+
r = pdal.Reader(os.path.join(DATADIRECTORY,"autzen-utm.las"))
575+
p = r.pipeline()
576+
p.execute()
577+
data = p.arrays[0]
578+
gdf = pdal.pipeline.GeoDataFrame(
579+
data,
580+
geometry=pdal.pipeline.points_from_xy(data["X"], data["Y"], data["Z"])
581+
)
582+
dataframes = [gdf, gdf, gdf]
583+
filter_intensity = """{
584+
"pipeline":[
585+
{
586+
"type":"filters.range",
587+
"limits":"Intensity[100:300)"
588+
}
589+
]
590+
}"""
591+
p = pdal.Pipeline(filter_intensity, dataframes = dataframes)
592+
p.execute()
593+
arrays = p.arrays
594+
assert len(arrays) == 3
595+
596+
# We copied the array three times. Sum the Intensity values
597+
# post filtering to see if we had our intended effect
598+
for data in arrays:
599+
assert len(data) == 387
600+
assert data["Intensity"].sum() == 57684
601+
602+
544603
class TestPipelineIterator:
545604
@pytest.mark.parametrize("filename", ["sort.json", "sort.py"])
546605
def test_non_streamable(self, filename):

0 commit comments

Comments
 (0)