|
37 | 37 | from geopandas import GeoDataFrame, GeoSeries, points_from_xy |
38 | 38 | from mpl_toolkits.axes_grid1 import make_axes_locatable |
39 | 39 | from rasterio.warp import Resampling |
| 40 | +from xarray import DataArray |
40 | 41 |
|
41 | 42 | import climada.util.coordinates as u_coord |
42 | 43 | import climada.util.hdf5_handler as u_hdf5 |
@@ -1121,27 +1122,40 @@ def plot_basemap( |
1121 | 1122 | self.to_crs(crs_ori, inplace=True) |
1122 | 1123 | return axis |
1123 | 1124 |
|
1124 | | - def write_hdf5(self, file_name): |
| 1125 | + def write_hdf5(self, file_name, pickle_geometry=False): |
1125 | 1126 | """Write data frame and metadata in hdf5 format |
1126 | 1127 |
|
1127 | 1128 | Parameters |
1128 | 1129 | ---------- |
1129 | 1130 | file_name : str |
1130 | 1131 | (path and) file name to write to. |
| 1132 | + pickle_geometry : bool |
| 1133 | + flag, indicating whether the "geometry" of the Exposures` `data` will be stored as |
| 1134 | + pickled shapely objects instead of wkb bytes. This is faster but less durable, because |
| 1135 | + pickled data may get unreadable for future shapely versions. |
| 1136 | + Default: False |
1131 | 1137 | """ |
1132 | 1138 | LOGGER.info("Writing %s", file_name) |
1133 | 1139 | store = pd.HDFStore(file_name, mode="w") |
1134 | | - pandas_df = pd.DataFrame(self.gdf) |
| 1140 | + pandas_df = pd.DataFrame(self.data) |
| 1141 | + wkb_data = {} |
1135 | 1142 | for col in pandas_df.columns: |
1136 | 1143 | if str(pandas_df[col].dtype) == "geometry": |
1137 | | - pandas_df[col] = np.asarray(self.gdf[col]) |
| 1144 | + if pickle_geometry: |
| 1145 | + pandas_df[col] = np.asarray(self.data[col]) |
| 1146 | + else: |
| 1147 | + wkb_data[col] = to_wkb_store(self.geometry) |
| 1148 | + pandas_df.drop(columns=["geometry"]) |
1138 | 1149 |
|
1139 | 1150 | # Avoid pandas PerformanceWarning when writing HDF5 data |
1140 | 1151 | with warnings.catch_warnings(): |
1141 | 1152 | warnings.simplefilter("ignore", category=pd.errors.PerformanceWarning) |
1142 | 1153 | # Write dataframe |
1143 | 1154 | store.put("exposures", pandas_df) |
1144 | 1155 |
|
| 1156 | + if wkb_data: |
| 1157 | + store.put("wkb_data", wkb_data) |
| 1158 | + |
1145 | 1159 | var_meta = {} |
1146 | 1160 | for var in type(self)._metadata: |
1147 | 1161 | var_meta[var] = getattr(self, var) |
@@ -1184,7 +1198,14 @@ def from_hdf5(cls, file_name): |
1184 | 1198 | crs = metadata.get("crs", metadata.get("_crs")) |
1185 | 1199 | if crs is None and metadata.get("meta"): |
1186 | 1200 | crs = metadata["meta"].get("crs") |
1187 | | - exp = cls(store["exposures"], crs=crs) |
| 1201 | + data = pd.DataFrame(store["exposures"]) |
| 1202 | + try: |
| 1203 | + wkb_data = store.get("wkb_data") |
| 1204 | + except KeyError: |
| 1205 | + wkb_data = {} |
| 1206 | + for col, val in wkb_data.items(): |
| 1207 | + data[col] = from_wkb_store(val) |
| 1208 | + exp = cls(data, crs=crs) |
1188 | 1209 | for key, val in metadata.items(): |
1189 | 1210 | if key in type(exp)._metadata: # pylint: disable=protected-access |
1190 | 1211 | setattr(exp, key, val) |
@@ -1553,6 +1574,21 @@ def _read_mat_optional(exposures, data, var_names): |
1553 | 1574 | pass |
1554 | 1575 |
|
1555 | 1576 |
|
| 1577 | +def to_wkb_store(geometry: np.array, store): |
| 1578 | + wkb_data = geometry.to_wkb().to_numpy() |
| 1579 | + import h5py |
| 1580 | + |
| 1581 | + wkb_dataset = h5py.Dataset(store) |
| 1582 | + |
| 1583 | + # Store WKB as variable-length byte arrays |
| 1584 | + dt = h5py.vlen_dtype(np.dtype("uint8")) |
| 1585 | + wkb_dataset.dtype = dt |
| 1586 | + for i, geom_bytes in enumerate(wkb_data): |
| 1587 | + wkb_dataset[i] = np.frombuffer(geom_bytes, dtype="uint8") |
| 1588 | + |
| 1589 | + return wkb_data |
| 1590 | + |
| 1591 | + |
1556 | 1592 | def _read_mat_metadata(exposures, data, file_name, var_names): |
1557 | 1593 | """Fill metadata in DataFrame object""" |
1558 | 1594 | try: |
|
0 commit comments