Skip to content

Commit f95b267

Browse files
authored
Improve performance of LitPop (#720)
* Improve handling of region_id in LitPop * Assume data from LitPop is gridded when determining region IDs. * Add option to pass 'region_id' parameter to LitPop.from_shape. * Streamline gridpoints_core_calc * Avoid copying data by using np.asarray. * Streamline computations by relying on numpy broadcasting. * Avoid rounding the same data twice * Fix linter issue and typo * Normalize string delimiters * Fix typo * Add suggestions from GitHub discussions * Rename return value in local function. * Remove named argument call in `LitPop.from_shape`. * Improve `region_id` parameter documentation. * Fix return variable name in gridpoints_core_calc * Update CHANGELOG.md
1 parent 54dd0fc commit f95b267

File tree

3 files changed

+85
-57
lines changed

3 files changed

+85
-57
lines changed

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,8 @@ Removed:
3939
- Use `myst-nb` for parsing Jupyter Notebooks for the documentation instead of `nbsphinx` [#712](https://github.com/CLIMADA-project/climada_python/pull/712)
4040
- Installation guide now recommends installing CLIMADA directly via `conda install` [#714](https://github.com/CLIMADA-project/climada_python/pull/714)
4141
- `Exposures.affected_total_value` now takes a hazard intensity threshold as argument. Affected values are only those for which at least one event exceeds the threshold. (previously, all exposures points with an assigned centroid were considered affected) [#702](https://github.com/CLIMADA-project/climada_python/pull/702)
42+
- Add option to pass region ID to `LitPop.from_shape` [#720](https://github.com/CLIMADA-project/climada_python/pull/720)
43+
- Slightly improved performance on `LitPop`-internal computations [#720](https://github.com/CLIMADA-project/climada_python/pull/720)
4244

4345
### Fixed
4446

climada/entity/exposures/litpop/litpop.py

Lines changed: 81 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -460,8 +460,18 @@ def set_custom_shape(self, *args, **kwargs):
460460
self.__dict__ = LitPop.from_shape(*args, **kwargs).__dict__
461461

462462
@classmethod
463-
def from_shape(cls, shape, total_value, res_arcsec=30, exponents=(1,1), value_unit='USD',
464-
reference_year=DEF_REF_YEAR, gpw_version=GPW_VERSION, data_dir=SYSTEM_DIR):
463+
def from_shape(
464+
cls,
465+
shape,
466+
total_value,
467+
res_arcsec=30,
468+
exponents=(1,1),
469+
value_unit='USD',
470+
region_id=None,
471+
reference_year=DEF_REF_YEAR,
472+
gpw_version=GPW_VERSION,
473+
data_dir=SYSTEM_DIR,
474+
):
465475
"""init LitPop exposure object for a custom shape.
466476
Requires user input regarding the total value to be disaggregated.
467477
@@ -488,6 +498,12 @@ def from_shape(cls, shape, total_value, res_arcsec=30, exponents=(1,1), value_un
488498
Defining power with which lit (nightlights) and pop (gpw) go into LitPop.
489499
value_unit : str
490500
Unit of exposure values. The default is USD.
501+
region_id : int, optional
502+
The numeric ISO 3166 region associated with the shape. If set to a value,
503+
this single value will be set for every coordinate in the ``GeoDataFrame``
504+
of the resulting ``LitPop`` instance. If ``None`` (default), the region ID
505+
for every coordinate will be determined automatically (at a slight
506+
computational cost).
491507
reference_year : int, optional
492508
Reference year for data sources. Default: CONFIG.exposures.def_ref_year
493509
gpw_version : int, optional
@@ -512,10 +528,15 @@ def from_shape(cls, shape, total_value, res_arcsec=30, exponents=(1,1), value_un
512528
'GeoSeries. Loop over elements of series outside method.')
513529

514530
tag = Tag()
515-
litpop_gdf, _ = _get_litpop_single_polygon(shape, reference_year,
516-
res_arcsec, data_dir,
517-
gpw_version, exponents,
518-
)
531+
litpop_gdf, _ = _get_litpop_single_polygon(
532+
shape,
533+
reference_year,
534+
res_arcsec,
535+
data_dir,
536+
gpw_version,
537+
exponents,
538+
region_id
539+
)
519540

520541
# disaggregate total value proportional to LitPop values:
521542
if isinstance(total_value, (float, int)):
@@ -673,8 +694,9 @@ def _get_litpop_single_polygon(polygon, reference_year, res_arcsec, data_dir,
673694
Defining power with which lit (nightlights) and pop (gpw) go into LitPop. To get
674695
nightlights^3 without population count: (3, 0). To use population count alone: (0, 1).
675696
region_id : int, optional
676-
if provided, region_id of gdf is set to value.
677-
The default is None, this implies that region_id is not set.
697+
if provided, region_id of gdf is set to value. The default is ``None``, in which
698+
case the region is determined automatically for every location of the resulting
699+
GeoDataFrame.
678700
verbose : bool, optional
679701
Enable verbose logging about the used GPW version and reference year as well as about the
680702
boundary case where no grid points from the GPW grid are contained in the specified
@@ -755,18 +777,19 @@ def _get_litpop_single_polygon(polygon, reference_year, res_arcsec, data_dir,
755777
meta_out['width'],
756778
meta_out['height'])
757779
# init GeoDataFrame from data and coordinates:
758-
gdf = geopandas.GeoDataFrame({'value': litpop_array.flatten()}, crs=meta_out['crs'],
759-
geometry=geopandas.points_from_xy(
760-
np.round_(lon.flatten(), decimals = 8, out = None),
761-
np.round_(lat.flatten(), decimals = 8, out = None)
762-
)
763-
)
764-
gdf['latitude'] = np.round_(lat.flatten(), decimals = 8, out = None)
765-
gdf['longitude'] = np.round_(lon.flatten(), decimals = 8, out = None)
780+
latitude = np.round(lat.flatten(), decimals=8)
781+
longitude = np.round(lon.flatten(), decimals=8)
782+
gdf = geopandas.GeoDataFrame(
783+
{"value": litpop_array.flatten(), "latitude": latitude, "longitude": longitude},
784+
crs=meta_out['crs'],
785+
geometry=geopandas.points_from_xy(longitude, latitude),
786+
)
766787
if region_id is not None: # set region_id
767788
gdf['region_id'] = region_id
768789
else:
769-
gdf['region_id'] = u_coord.get_country_code(gdf.latitude, gdf.longitude)
790+
gdf['region_id'] = u_coord.get_country_code(
791+
gdf.latitude, gdf.longitude, gridded=True
792+
)
770793
# remove entries outside polygon with `dropna` and return GeoDataFrame:
771794
return gdf.dropna(), meta_out
772795

@@ -953,8 +976,10 @@ def reproject_input_data(data_array_list, meta_list,
953976
meta_out['width'] = data_out_list[-1].shape[1]
954977
return data_out_list, meta_out
955978

956-
def gridpoints_core_calc(data_arrays, offsets=None, exponents=None,
957-
total_val_rescale=None):
979+
980+
def gridpoints_core_calc(
981+
data_arrays, offsets=None, exponents=None, total_val_rescale=None
982+
):
958983
"""
959984
Combines N dense numerical arrays by point-wise multipilcation and
960985
optionally rescales to new total value:
@@ -992,47 +1017,50 @@ def gridpoints_core_calc(data_arrays, offsets=None, exponents=None,
9921017
9931018
Returns
9941019
-------
995-
result_array : np.array of same shape as arrays in data_arrays
1020+
np.array of same shape as arrays in data_arrays
9961021
Results from calculation described above.
9971022
"""
998-
# convert input data to arrays
999-
# check integrity of data_array input (length and type):
1000-
try:
1001-
data_arrays = [np.array(data) for data in data_arrays]
1002-
if len(list({data.shape for data in data_arrays})) > 1:
1003-
raise ValueError("Elements in data_arrays don't agree in shape.")
1004-
except AttributeError as err:
1005-
raise TypeError("data_arrays or contained elements have wrong type.") from err
1006-
1007-
# if None, defaults for offsets and exponents are set:
1008-
if offsets is None:
1009-
offsets = np.zeros(len(data_arrays))
1010-
if exponents is None:
1011-
exponents = np.ones(len(data_arrays))
1012-
if np.min(offsets) < 0:
1013-
raise ValueError("offset values < 0 are not allowed, because negative offset "
1014-
"values produce undesired negative disaggregation values.")
1015-
if np.min(exponents) < 0:
1016-
raise ValueError("exponents < 0 are not allowed, because negative exponents "
1017-
"produce 'inf' values where the initial value is 0.")
1023+
# Convert input data to arrays
1024+
# NOTE: Setting dtype will cause an error if shapes are not compatible
1025+
data = np.asarray(data_arrays, dtype=float)
1026+
1027+
def prepare_input(input_data, default_value, name):
1028+
"""Prepare the offset and exposure inputs
1029+
1030+
If they are ``None``, use the default values. If not, cast them to an array and
1031+
ensure that the array has only one dimension. Finally, check if the values are
1032+
positive.
1033+
"""
1034+
if input_data is None:
1035+
return default_value
1036+
1037+
array = np.asarray(input_data, dtype=float)
1038+
if array.ndim > 1:
1039+
raise ValueError(f"Provide {name} as 1D array or list of scalars")
1040+
if np.any(array < 0):
1041+
raise ValueError(
1042+
f"{name} values < 0 are not allowed, because negative {name} "
1043+
"values produce undesired negative disaggregation values."
1044+
)
1045+
return array
1046+
1047+
offsets = prepare_input(offsets, 0, "offsets")
1048+
exponents = prepare_input(exponents, 1, "exponents")
1049+
10181050
# Steps 1-3: arrays are multiplied after application of offets and exponents:
1019-
# (arrays are converted to float to prevent ValueError:
1020-
# "Integers to negative integer powers are not allowed.")
1021-
result_array = np.power(np.array(data_arrays[0]+offsets[0], dtype=float),
1022-
exponents[0])
1023-
for i in np.arange(1, len(data_arrays)):
1024-
result_array = np.multiply(result_array,
1025-
np.power(np.array(data_arrays[i]+offsets[i], dtype=float),
1026-
exponents[i])
1027-
)
1028-
1029-
# Steps 4+5: if total value for rescaling is provided, result_array is normalized and
1051+
# NOTE: Transpose so that broadcasting over last dimension works. Computing the
1052+
# product over the last axis is also much faster. Then transpose back.
1053+
data = (data.T + offsets) ** exponents
1054+
data = data.prod(axis=-1).T
1055+
1056+
# Steps 4+5: if total value for rescaling is provided, result is normalized and
10301057
# scaled with this total value (total_val_rescale):
10311058
if isinstance(total_val_rescale, (float, int)):
1032-
return np.divide(result_array, result_array.sum()) * total_val_rescale
1059+
return data / data.sum() * total_val_rescale
10331060
if total_val_rescale is not None:
10341061
raise TypeError("total_val_rescale must be int or float.")
1035-
return result_array
1062+
return data
1063+
10361064

10371065
# The following functions are only required if calc_admin1 is True,
10381066
# not for core LitPop. They are maintained here mainly for backward compatibility

climada/entity/exposures/test/test_litpop.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -205,13 +205,11 @@ def test_gridpoints_core_calc_input_errors(self):
205205
lp.gridpoints_core_calc(data_arrays_demo(4))
206206

207207
# wrong format:
208-
with self.assertRaises(TypeError):
208+
with self.assertRaises(ValueError):
209209
lp.gridpoints_core_calc(data, exponents=['a', 'b'])
210210
data.append('hello i am a string')
211211
with self.assertRaises(ValueError):
212212
lp.gridpoints_core_calc(data)
213-
with self.assertRaises(TypeError):
214-
lp.gridpoints_core_calc(777)
215213

216214
def test_gridpoints_core_calc_default_1(self):
217215
"""test function gridpoints_core_calc, i.e. core data combination
@@ -374,4 +372,4 @@ def test_get_value_unit_pass(self):
374372
if __name__ == "__main__":
375373
TESTS = unittest.TestLoader().loadTestsFromTestCase(TestLitPop)
376374
# TESTS.addTests(unittest.TestLoader().loadTestsFromTestCase(TestUncertainty))
377-
unittest.TextTestRunner(verbosity=2).run(TESTS)
375+
unittest.TextTestRunner(verbosity=2).run(TESTS)

0 commit comments

Comments
 (0)