Skip to content

Commit 9312b30

Browse files
abandon shapely pickling
1 parent 347f1f9 commit 9312b30

File tree

3 files changed

+45
-57
lines changed

3 files changed

+45
-57
lines changed

CHANGELOG.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ Removed:
2222

2323
- `Hazard.local_exceedance_intensity`, `Hazard.local_return_period` and `Impact.local_exceedance_impact`, `Impact.local_return_period`, using the `climada.util.interpolation` module: New default (no binning), binning on decimals, and faster implementation [#1012](https://github.com/CLIMADA-project/climada_python/pull/1012)
2424
- World Bank indicator data is now downloaded directly from their API via the function `download_world_bank_indicator`, instead of relying on the `pandas-datareader` package [#1033](https://github.com/CLIMADA-project/climada_python/pull/1033)
25-
- `Exposures.write_hdf5` pickles geometry data in WKB format by default, and not as `shapely` objects anymore. There is now a flag to keep the previous behavior.
25+
- `Exposures.write_hdf5` pickles geometry data in WKB format, which is faster and more sustainable. [#1051](https://github.com/CLIMADA-project/climada_python/pull/1051)
2626

2727
### Fixed
2828

climada/entity/exposures/base.py

Lines changed: 3 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1122,31 +1122,22 @@ def plot_basemap(
11221122
self.to_crs(crs_ori, inplace=True)
11231123
return axis
11241124

1125-
def write_hdf5(self, file_name, pickle_geometry_as_shapely=False):
1125+
def write_hdf5(self, file_name):
11261126
"""Write data frame and metadata in hdf5 format
11271127
11281128
Parameters
11291129
----------
11301130
file_name : str
11311131
(path and) file name to write to.
1132-
pickle_geometry_as_shapely : bool
1133-
flag, indicating whether the "geometry" of the Exposures` `data` will be stored as
1134-
pickled shapely objects instead of wkb bytes. This has been the case for earlier
1135-
CLIMADA version, up to 6.0, and is perhaps faster but less durable,
1136-
because pickled data may evantually get unreadable for future shapely versions.
1137-
Default: False
11381132
"""
11391133
LOGGER.info("Writing %s", file_name)
11401134
store = pd.HDFStore(file_name, mode="w")
11411135
pandas_df = pd.DataFrame(self.gdf)
11421136
wkb_columns = []
11431137
for col in pandas_df.columns:
11441138
if str(pandas_df[col].dtype) == "geometry":
1145-
if pickle_geometry_as_shapely:
1146-
pandas_df[col] = np.asarray(self.gdf[col])
1147-
else:
1148-
pandas_df[col] = gpd.GeoSeries(pandas_df[col]).to_wkb()
1149-
wkb_columns.append(col)
1139+
pandas_df[col] = gpd.GeoSeries(pandas_df[col]).to_wkb()
1140+
wkb_columns.append(col)
11501141

11511142
# Avoid pandas PerformanceWarning when writing HDF5 data
11521143
with warnings.catch_warnings():

climada/entity/exposures/test/test_base.py

Lines changed: 41 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -390,51 +390,48 @@ def test_io_hdf5_pass(self):
390390
# PerformanceWarning would result in test failure here
391391
import warnings
392392

393-
for pickle_geometry_as_shapely in [False, True]:
394-
with warnings.catch_warnings():
395-
warnings.simplefilter("error", category=pd.errors.PerformanceWarning)
396-
exp.write_hdf5(
397-
file_name, pickle_geometry_as_shapely=pickle_geometry_as_shapely
398-
)
399-
400-
exp_read = Exposures.from_hdf5(file_name)
401-
402-
self.assertEqual(exp.ref_year, exp_read.ref_year)
403-
self.assertEqual(exp.value_unit, exp_read.value_unit)
404-
self.assertEqual(exp.description, exp_read.description)
405-
np.testing.assert_array_equal(exp.latitude, exp_read.latitude)
406-
np.testing.assert_array_equal(exp.longitude, exp_read.longitude)
407-
np.testing.assert_array_equal(exp.value, exp_read.value)
408-
np.testing.assert_array_equal(
409-
exp.data["deductible"].values, exp_read.data["deductible"].values
410-
)
411-
np.testing.assert_array_equal(
412-
exp.data["cover"].values, exp_read.data["cover"].values
413-
)
414-
np.testing.assert_array_equal(
415-
exp.data["region_id"].values, exp_read.data["region_id"].values
416-
)
417-
np.testing.assert_array_equal(
418-
exp.data["category_id"].values, exp_read.data["category_id"].values
419-
)
420-
np.testing.assert_array_equal(
421-
exp.data["impf_TC"].values, exp_read.data["impf_TC"].values
422-
)
423-
np.testing.assert_array_equal(
424-
exp.data["centr_TC"].values, exp_read.data["centr_TC"].values
425-
)
426-
np.testing.assert_array_equal(
427-
exp.data["impf_FL"].values, exp_read.data["impf_FL"].values
428-
)
429-
np.testing.assert_array_equal(
430-
exp.data["centr_FL"].values, exp_read.data["centr_FL"].values
431-
)
393+
with warnings.catch_warnings():
394+
warnings.simplefilter("error", category=pd.errors.PerformanceWarning)
395+
exp.write_hdf5(file_name=file_name)
396+
397+
exp_read = Exposures.from_hdf5(file_name)
398+
399+
self.assertEqual(exp.ref_year, exp_read.ref_year)
400+
self.assertEqual(exp.value_unit, exp_read.value_unit)
401+
self.assertEqual(exp.description, exp_read.description)
402+
np.testing.assert_array_equal(exp.latitude, exp_read.latitude)
403+
np.testing.assert_array_equal(exp.longitude, exp_read.longitude)
404+
np.testing.assert_array_equal(exp.value, exp_read.value)
405+
np.testing.assert_array_equal(
406+
exp.data["deductible"].values, exp_read.data["deductible"].values
407+
)
408+
np.testing.assert_array_equal(
409+
exp.data["cover"].values, exp_read.data["cover"].values
410+
)
411+
np.testing.assert_array_equal(
412+
exp.data["region_id"].values, exp_read.data["region_id"].values
413+
)
414+
np.testing.assert_array_equal(
415+
exp.data["category_id"].values, exp_read.data["category_id"].values
416+
)
417+
np.testing.assert_array_equal(
418+
exp.data["impf_TC"].values, exp_read.data["impf_TC"].values
419+
)
420+
np.testing.assert_array_equal(
421+
exp.data["centr_TC"].values, exp_read.data["centr_TC"].values
422+
)
423+
np.testing.assert_array_equal(
424+
exp.data["impf_FL"].values, exp_read.data["impf_FL"].values
425+
)
426+
np.testing.assert_array_equal(
427+
exp.data["centr_FL"].values, exp_read.data["centr_FL"].values
428+
)
432429

433-
self.assertTrue(
434-
u_coord.equal_crs(exp.crs, exp_read.crs),
435-
f"{exp.crs} and {exp_read.crs} are different",
436-
)
437-
self.assertTrue(u_coord.equal_crs(exp.gdf.crs, exp_read.gdf.crs))
430+
self.assertTrue(
431+
u_coord.equal_crs(exp.crs, exp_read.crs),
432+
f"{exp.crs} and {exp_read.crs} are different",
433+
)
434+
self.assertTrue(u_coord.equal_crs(exp.gdf.crs, exp_read.gdf.crs))
438435

439436

440437
class TestAddSea(unittest.TestCase):

0 commit comments

Comments
 (0)