abandon shapely pickling

emanuel-schmid · emanuel-schmid · commit 9312b3044d45 · 2025-05-08T10:44:35.000+02:00
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -22,7 +22,7 @@ Removed:
 
 - `Hazard.local_exceedance_intensity`, `Hazard.local_return_period` and `Impact.local_exceedance_impact`, `Impact.local_return_period`, using the `climada.util.interpolation` module: New default (no binning), binning on decimals, and faster implementation [#1012](https://github.com/CLIMADA-project/climada_python/pull/1012)
 - World Bank indicator data is now downloaded directly from their API via the function `download_world_bank_indicator`, instead of relying on the `pandas-datareader` package [#1033](https://github.com/CLIMADA-project/climada_python/pull/1033)
-- `Exposures.write_hdf5` pickles geometry data in WKB format by default, and not as `shapely` objects anymore. There is now a flag to keep the previous behavior.
+- `Exposures.write_hdf5` pickles geometry data in WKB format, which is faster and more sustainable. [#1051](https://github.com/CLIMADA-project/climada_python/pull/1051)
 
 ### Fixed
 
diff --git a/climada/entity/exposures/base.py b/climada/entity/exposures/base.py
@@ -1122,31 +1122,22 @@ def plot_basemap(
         self.to_crs(crs_ori, inplace=True)
         return axis
 
-    def write_hdf5(self, file_name, pickle_geometry_as_shapely=False):
+    def write_hdf5(self, file_name):
         """Write data frame and metadata in hdf5 format
 
         Parameters
         ----------
         file_name : str
             (path and) file name to write to.
-        pickle_geometry_as_shapely : bool
-            flag, indicating whether the "geometry" of the Exposures` `data` will be stored as
-            pickled shapely objects instead of wkb bytes. This has been the case for earlier
-            CLIMADA version, up to 6.0, and is perhaps faster but less durable,
-            because pickled data may evantually get unreadable for future shapely versions.
-            Default: False
         """
         LOGGER.info("Writing %s", file_name)
         store = pd.HDFStore(file_name, mode="w")
         pandas_df = pd.DataFrame(self.gdf)
         wkb_columns = []
         for col in pandas_df.columns:
             if str(pandas_df[col].dtype) == "geometry":
-                if pickle_geometry_as_shapely:
-                    pandas_df[col] = np.asarray(self.gdf[col])
-                else:
-                    pandas_df[col] = gpd.GeoSeries(pandas_df[col]).to_wkb()
-                    wkb_columns.append(col)
+                pandas_df[col] = gpd.GeoSeries(pandas_df[col]).to_wkb()
+                wkb_columns.append(col)
 
         # Avoid pandas PerformanceWarning when writing HDF5 data
         with warnings.catch_warnings():
diff --git a/climada/entity/exposures/test/test_base.py b/climada/entity/exposures/test/test_base.py
@@ -390,51 +390,48 @@ def test_io_hdf5_pass(self):
         # PerformanceWarning would result in test failure here
         import warnings
 
-        for pickle_geometry_as_shapely in [False, True]:
-            with warnings.catch_warnings():
-                warnings.simplefilter("error", category=pd.errors.PerformanceWarning)
-                exp.write_hdf5(
-                    file_name, pickle_geometry_as_shapely=pickle_geometry_as_shapely
-                )
-
-            exp_read = Exposures.from_hdf5(file_name)
-
-            self.assertEqual(exp.ref_year, exp_read.ref_year)
-            self.assertEqual(exp.value_unit, exp_read.value_unit)
-            self.assertEqual(exp.description, exp_read.description)
-            np.testing.assert_array_equal(exp.latitude, exp_read.latitude)
-            np.testing.assert_array_equal(exp.longitude, exp_read.longitude)
-            np.testing.assert_array_equal(exp.value, exp_read.value)
-            np.testing.assert_array_equal(
-                exp.data["deductible"].values, exp_read.data["deductible"].values
-            )
-            np.testing.assert_array_equal(
-                exp.data["cover"].values, exp_read.data["cover"].values
-            )
-            np.testing.assert_array_equal(
-                exp.data["region_id"].values, exp_read.data["region_id"].values
-            )
-            np.testing.assert_array_equal(
-                exp.data["category_id"].values, exp_read.data["category_id"].values
-            )
-            np.testing.assert_array_equal(
-                exp.data["impf_TC"].values, exp_read.data["impf_TC"].values
-            )
-            np.testing.assert_array_equal(
-                exp.data["centr_TC"].values, exp_read.data["centr_TC"].values
-            )
-            np.testing.assert_array_equal(
-                exp.data["impf_FL"].values, exp_read.data["impf_FL"].values
-            )
-            np.testing.assert_array_equal(
-                exp.data["centr_FL"].values, exp_read.data["centr_FL"].values
-            )
+        with warnings.catch_warnings():
+            warnings.simplefilter("error", category=pd.errors.PerformanceWarning)
+            exp.write_hdf5(file_name=file_name)
+
+        exp_read = Exposures.from_hdf5(file_name)
+
+        self.assertEqual(exp.ref_year, exp_read.ref_year)
+        self.assertEqual(exp.value_unit, exp_read.value_unit)
+        self.assertEqual(exp.description, exp_read.description)
+        np.testing.assert_array_equal(exp.latitude, exp_read.latitude)
+        np.testing.assert_array_equal(exp.longitude, exp_read.longitude)
+        np.testing.assert_array_equal(exp.value, exp_read.value)
+        np.testing.assert_array_equal(
+            exp.data["deductible"].values, exp_read.data["deductible"].values
+        )
+        np.testing.assert_array_equal(
+            exp.data["cover"].values, exp_read.data["cover"].values
+        )
+        np.testing.assert_array_equal(
+            exp.data["region_id"].values, exp_read.data["region_id"].values
+        )
+        np.testing.assert_array_equal(
+            exp.data["category_id"].values, exp_read.data["category_id"].values
+        )
+        np.testing.assert_array_equal(
+            exp.data["impf_TC"].values, exp_read.data["impf_TC"].values
+        )
+        np.testing.assert_array_equal(
+            exp.data["centr_TC"].values, exp_read.data["centr_TC"].values
+        )
+        np.testing.assert_array_equal(
+            exp.data["impf_FL"].values, exp_read.data["impf_FL"].values
+        )
+        np.testing.assert_array_equal(
+            exp.data["centr_FL"].values, exp_read.data["centr_FL"].values
+        )
 
-            self.assertTrue(
-                u_coord.equal_crs(exp.crs, exp_read.crs),
-                f"{exp.crs} and {exp_read.crs} are different",
-            )
-            self.assertTrue(u_coord.equal_crs(exp.gdf.crs, exp_read.gdf.crs))
+        self.assertTrue(
+            u_coord.equal_crs(exp.crs, exp_read.crs),
+            f"{exp.crs} and {exp_read.crs} are different",
+        )
+        self.assertTrue(u_coord.equal_crs(exp.gdf.crs, exp_read.gdf.crs))
 
 
 class TestAddSea(unittest.TestCase):