Skip to content

Commit 6924ce1

Browse files
hazard.io: avoid pickling geometries and compress hdf5 files
1 parent c3b5376 commit 6924ce1

File tree

1 file changed

+25
-6
lines changed

1 file changed

+25
-6
lines changed

climada/hazard/centroids/centr.py

Lines changed: 25 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -906,22 +906,33 @@ def write_hdf5(self, file_name, mode="w"):
906906
(path and) file name to write to.
907907
"""
908908
LOGGER.info("Writing %s", file_name)
909-
store = pd.HDFStore(file_name, mode=mode)
909+
xycols = []
910+
wkbcols = []
911+
store = pd.HDFStore(file_name, mode=mode, complevel=9)
910912
pandas_df = pd.DataFrame(self.gdf)
911913
for col in pandas_df.columns:
912914
if str(pandas_df[col].dtype) == "geometry":
913-
pandas_df[col] = np.asarray(self.gdf[col])
915+
try:
916+
pandas_df[col + ".x"] = self.gdf[col].x
917+
pandas_df[col + ".y"] = self.gdf[col].y
918+
pandas_df.drop(col, inplace=True)
919+
xycols.append(col)
920+
except ValueError:
921+
pandas_df[col] = self.gdf[col].to_wkb()
922+
wkbcols.append(col)
914923

915924
# Avoid pandas PerformanceWarning when writing HDF5 data
916925
with warnings.catch_warnings():
917926
warnings.simplefilter("ignore", category=pd.errors.PerformanceWarning)
918927
# Write dataframe
919928
store.put("centroids", pandas_df)
920929

921-
store.get_storer("centroids").attrs.metadata = {
922-
"crs": CRS.from_user_input(self.crs).to_wkt()
923-
}
924-
930+
centroids_metadata = {"crs": CRS.from_user_input(self.crs).to_wkt()}
931+
if xycols:
932+
centroids_metadata["xy_columns"] = xycols
933+
if wkbcols:
934+
centroids_metadata["wkb_columns"] = wkbcols
935+
store.get_storer("centroids").attrs.metadata = centroids_metadata
925936
store.close()
926937

927938
@classmethod
@@ -951,6 +962,14 @@ def from_hdf5(cls, file_name):
951962
# the CRS was stored in '_crs'/'crs'
952963
crs = metadata.get("crs")
953964
gdf = gpd.GeoDataFrame(store["centroids"], crs=crs)
965+
for xycol in metadata.get("xy_columns", []):
966+
gdf[xycol] = gpd.points_from_xy(
967+
x=gdf[xycol + ".x"], y=gdf[xycol + ".y"], crs=crs
968+
)
969+
gdf.drop([xycol + ".x", xycol + ".y"], inplace=True)
970+
for wkbcol in metadata.get("wkb_columns", []):
971+
gdf[wkbcol] = gpd.GeoSeries.from_wkb(gdf[wkbcol], crs=crs)
972+
954973
except TypeError:
955974
with h5py.File(file_name, "r") as data:
956975
gdf = cls._gdf_from_legacy_hdf5(data.get("centroids"))

0 commit comments

Comments
 (0)