Skip to content

Commit e9f2803

Browse files
author
Chahan Kropf
committed
Merge branch 'develop' into bugfix/lines_poly_impf
2 parents 1646381 + 912127e commit e9f2803

File tree

10 files changed

+472
-145
lines changed

10 files changed

+472
-145
lines changed

CHANGELOG.md

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,13 +17,20 @@ Removed:
1717
### Added
1818

1919
- `Impact.impact_at_reg` method for aggregating impacts per country or custom region [#642](https://github.com/CLIMADA-project/climada_python/pull/642)
20+
- `Impact.match_centroids` convenience method for matching (hazard) centroids to impact objects [#602](https://github.com/CLIMADA-project/climada_python/pull/602)
21+
- `climada.util.coordinates.match_centroids` method for matching (hazard) centroids to GeoDataFrames [#602](https://github.com/CLIMADA-project/climada_python/pull/602)
2022

21-
### Changed
23+
### Changed
2224

2325
- Improved error messages from `climada.CONFIG` in case of missing configuration values [#670](https://github.com/CLIMADA-project/climada_python/pull/670)
26+
- Refactored `Exposure.assign_centroids` using a new util function `u_coord.match_centroids` [#602](https://github.com/CLIMADA-project/climada_python/pull/602)
27+
- Renamed `climada.util.coordinate.assign_grid_points` to `match_grid_points` and `climada.util.coordinates.assign_coordinates` to `match_coordinates`
28+
[#602](https://github.com/CLIMADA-project/climada_python/pull/602)
29+
- Modified the method to disaggregate lines in the lines_polygons_handler utility module in order to better conserve the total length of all lines on average [#679](https://github.com/CLIMADA-project/climada_python/pull/679).
2430

2531
### Fixed
26-
- `util.lines_polys_handler` solve polygon disaggregation issue in metre-based projection [#666](https://github.com/CLIMADA-project/climada_python/pull/666)
32+
33+
- `util.lines_polys_handler` solve polygon disaggregation issue in metre-based projection [#666](https://github.com/CLIMADA-project/climada_python/pull/666)
2734

2835
### Deprecated
2936

climada/engine/impact.py

100755100644
Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1672,7 +1672,7 @@ def _selected_events_idx(self, event_ids, event_names, dates, nb_events):
16721672
return sel_ev
16731673

16741674
def _selected_exposures_idx(self, coord_exp):
1675-
assigned_idx = u_coord.assign_coordinates(self.coord_exp, coord_exp, threshold=0)
1675+
assigned_idx = u_coord.match_coordinates(self.coord_exp, coord_exp, threshold=0)
16761676
sel_exp = (assigned_idx >= 0).nonzero()[0]
16771677
if sel_exp.size == 0:
16781678
LOGGER.warning("No exposure coordinates match the selection.")
@@ -1783,6 +1783,38 @@ def stack_attribute(attr_name: str) -> np.ndarray:
17831783
**kwargs,
17841784
)
17851785

1786+
def match_centroids(self, hazard, distance='euclidean',
1787+
threshold=u_coord.NEAREST_NEIGHBOR_THRESHOLD):
1788+
"""
1789+
Finds the closest hazard centroid for each impact coordinate.
1790+
Creates a temporary GeoDataFrame and uses ``u_coord.match_centroids()``.
1791+
See there for details and parameters
1792+
1793+
Parameters
1794+
----------
1795+
hazard : Hazard
1796+
Hazard to match (with raster or vector centroids).
1797+
distance : str, optional
1798+
Distance to use in case of vector centroids.
1799+
Possible values are "euclidean", "haversine" and "approx".
1800+
Default: "euclidean"
1801+
threshold : float
1802+
If the distance (in km) to the nearest neighbor exceeds `threshold`,
1803+
the index `-1` is assigned.
1804+
Set `threshold` to 0, to disable nearest neighbor matching.
1805+
Default: 100 (km)
1806+
1807+
Returns
1808+
-------
1809+
np.array
1810+
array of closest Hazard centroids, aligned with the Impact's `coord_exp` array
1811+
"""
1812+
1813+
return u_coord.match_centroids(
1814+
self._build_exp().gdf,
1815+
hazard.centroids,
1816+
distance=distance,
1817+
threshold=threshold)
17861818

17871819
@dataclass
17881820
class ImpactFreqCurve():

climada/engine/test/test_impact.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -888,6 +888,20 @@ def test__exp_build_event(self):
888888
self.assertEqual(exp.value_unit, imp.unit)
889889
self.assertEqual(exp.ref_year, 0)
890890

891+
class TestMatchCentroids(unittest.TestCase):
892+
893+
def test_match_centroids(self):
894+
"Test that hazard centroids get assigned correctly"
895+
exp = ENT.exposures
896+
exp.assign_centroids(HAZ)
897+
fake_eai_exp = np.arange(len(exp.gdf))
898+
fake_at_event = np.arange(HAZ.size)
899+
fake_aai_agg = np.sum(fake_eai_exp)
900+
imp = Impact.from_eih(exp, ENT.impact_funcs, HAZ,
901+
fake_at_event, fake_eai_exp, fake_aai_agg)
902+
imp_centr = imp.match_centroids(HAZ)
903+
np.testing.assert_array_equal(imp_centr, exp.gdf.centr_TC)
904+
891905

892906
class TestImpactH5IO(unittest.TestCase):
893907
"""Tests for reading and writing Impact from/to H5 files"""
@@ -1106,4 +1120,5 @@ def write_tag(group, tag_kwds):
11061120
TESTS.addTests(unittest.TestLoader().loadTestsFromTestCase(TestImpact))
11071121
TESTS.addTests(unittest.TestLoader().loadTestsFromTestCase(TestImpactH5IO))
11081122
TESTS.addTests(unittest.TestLoader().loadTestsFromTestCase(TestImpactConcat))
1123+
TESTS.addTests(unittest.TestLoader().loadTestsFromTestCase(TestAssignCentroids))
11091124
unittest.TextTestRunner(verbosity=2).run(TESTS)

climada/entity/exposures/base.py

Lines changed: 16 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -369,6 +369,11 @@ def assign_centroids(self, hazard, distance='euclidean',
369369
threshold=u_coord.NEAREST_NEIGHBOR_THRESHOLD,
370370
overwrite=True):
371371
"""Assign for each exposure coordinate closest hazard coordinate.
372+
The Exposures ``gdf`` will be altered by this method. It will have an additional
373+
(or modified) column named ``centr_[hazard.HAZ_TYPE]`` after the call.
374+
375+
Uses the utility function ``u_coord.match_centroids``. See there for details
376+
and parameters.
372377
373378
The value -1 is used for distances larger than ``threshold`` in point distances.
374379
In case of raster hazards the value -1 is used for centroids outside of the raster.
@@ -392,9 +397,10 @@ def assign_centroids(self, hazard, distance='euclidean',
392397
393398
See Also
394399
--------
395-
climada.util.coordinates.assign_coordinates
400+
climada.util.coordinates.match_grid_points: method to associate centroids to
401+
exposure points when centroids is a raster
402+
climada.util.coordinates.match_coordinates:
396403
method to associate centroids to exposure points
397-
398404
Notes
399405
-----
400406
The default order of use is:
@@ -422,18 +428,16 @@ def assign_centroids(self, hazard, distance='euclidean',
422428

423429
LOGGER.info('Matching %s exposures with %s centroids.',
424430
str(self.gdf.shape[0]), str(hazard.centroids.size))
431+
425432
if not u_coord.equal_crs(self.crs, hazard.centroids.crs):
426433
raise ValueError('Set hazard and exposure to same CRS first!')
427-
if hazard.centroids.meta:
428-
assigned = u_coord.assign_grid_points(
429-
self.gdf.longitude.values, self.gdf.latitude.values,
430-
hazard.centroids.meta['width'], hazard.centroids.meta['height'],
431-
hazard.centroids.meta['transform'])
432-
else:
433-
assigned = u_coord.assign_coordinates(
434-
np.stack([self.gdf.latitude.values, self.gdf.longitude.values], axis=1),
435-
hazard.centroids.coord, distance=distance, threshold=threshold)
436-
self.gdf[centr_haz] = assigned
434+
# Note: equal_crs is tested here, rather than within match_centroids(),
435+
# because exp.gdf.crs may not be defined, but exp.crs must be defined.
436+
437+
assigned_centr = u_coord.match_centroids(self.gdf, hazard.centroids,
438+
distance=distance, threshold=threshold)
439+
self.gdf[centr_haz] = assigned_centr
440+
437441

438442
def set_geometry_points(self, scheduler=None):
439443
"""Set geometry attribute of GeoDataFrame with Points from latitude and

climada/hazard/base.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1452,7 +1452,7 @@ def select_tight(self, buffer=NEAREST_NEIGHBOR_THRESHOLD/ONE_LAT_KM,
14521452
See also
14531453
--------
14541454
self.select: Method to select centroids by lat/lon extent
1455-
util.coordinates.assign_coordinates: algorithm to match centroids.
1455+
util.coordinates.match_coordinates: algorithm to match centroids.
14561456
14571457
"""
14581458

@@ -2320,7 +2320,7 @@ def append(self, *others):
23202320
# map individual centroids objects to union
23212321
centroids = Centroids.union(*[haz.centroids for haz in haz_list])
23222322
hazcent_in_cent_idx_list = [
2323-
u_coord.assign_coordinates(haz.centroids.coord, centroids.coord, threshold=0)
2323+
u_coord.match_coordinates(haz.centroids.coord, centroids.coord, threshold=0)
23242324
for haz in haz_list_nonempty
23252325
]
23262326

@@ -2408,7 +2408,7 @@ def change_centroids(self, centroids, threshold=NEAREST_NEIGHBOR_THRESHOLD):
24082408
Centroids instance on which to map the hazard.
24092409
threshold: int or float
24102410
Threshold (in km) for mapping haz.centroids not in centroids.
2411-
Argument is passed to climada.util.coordinates.assign_coordinates.
2411+
Argument is passed to climada.util.coordinates.match_coordinates.
24122412
Default: 100 (km)
24132413
24142414
Returns
@@ -2423,7 +2423,7 @@ def change_centroids(self, centroids, threshold=NEAREST_NEIGHBOR_THRESHOLD):
24232423
24242424
See Also
24252425
--------
2426-
util.coordinates.assign_coordinates: algorithm to match centroids.
2426+
util.coordinates.match_coordinates: algorithm to match centroids.
24272427
24282428
"""
24292429
# define empty hazard
@@ -2432,7 +2432,7 @@ def change_centroids(self, centroids, threshold=NEAREST_NEIGHBOR_THRESHOLD):
24322432

24332433
# indices for mapping matrices onto common centroids
24342434
if centroids.meta:
2435-
new_cent_idx = u_coord.assign_grid_points(
2435+
new_cent_idx = u_coord.match_grid_points(
24362436
self.centroids.lon, self.centroids.lat,
24372437
centroids.meta['width'], centroids.meta['height'],
24382438
centroids.meta['transform'])
@@ -2441,7 +2441,7 @@ def change_centroids(self, centroids, threshold=NEAREST_NEIGHBOR_THRESHOLD):
24412441
"the raster defined by centroids.meta."
24422442
" Please choose a larger raster.")
24432443
else:
2444-
new_cent_idx = u_coord.assign_coordinates(
2444+
new_cent_idx = u_coord.match_coordinates(
24452445
self.centroids.coord, centroids.coord, threshold=threshold
24462446
)
24472447
if -1 in new_cent_idx:

climada/util/coordinates.py

Lines changed: 77 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -924,7 +924,12 @@ def get_region_gridpoints(countries=None, regions=None, resolution=150,
924924
lat, lon = [ar.ravel() for ar in [lat, lon]]
925925
return lat, lon
926926

927-
def assign_grid_points(x, y, grid_width, grid_height, grid_transform):
927+
def assign_grid_points(*args, **kwargs):
928+
"""This function has been renamed, use ``match_grid_points`` instead."""
929+
LOGGER.warning("This function has been renamed, use match_grid_points instead.")
930+
return match_grid_points(*args, **kwargs)
931+
932+
def match_grid_points(x, y, grid_width, grid_height, grid_transform):
928933
"""To each coordinate in `x` and `y`, assign the closest centroid in the given raster grid
929934
930935
Make sure that your grid specification is relative to the same coordinate reference system as
@@ -961,7 +966,12 @@ def assign_grid_points(x, y, grid_width, grid_height, grid_transform):
961966
assigned[(y_i < 0) | (y_i >= grid_height)] = -1
962967
return assigned
963968

964-
def assign_coordinates(coords, coords_to_assign, distance="euclidean",
969+
def assign_coordinates(*args, **kwargs):
970+
"""This function has been renamed, use ``match_coordinates`` instead."""
971+
LOGGER.warning("This function has been renamed, use match_coordinates instead.")
972+
return match_coordinates(*args, **kwargs)
973+
974+
def match_coordinates(coords, coords_to_assign, distance="euclidean",
965975
threshold=NEAREST_NEIGHBOR_THRESHOLD, **kwargs):
966976
"""To each coordinate in `coords`, assign a matching coordinate in `coords_to_assign`
967977
@@ -1057,6 +1067,71 @@ def assign_coordinates(coords, coords_to_assign, distance="euclidean",
10571067
coords_to_assign, coords[not_assigned_idx_mask], threshold, **kwargs)
10581068
return assigned_idx
10591069

1070+
def match_centroids(coord_gdf, centroids, distance='euclidean',
1071+
threshold=NEAREST_NEIGHBOR_THRESHOLD):
1072+
"""Assign to each gdf coordinate point its closest centroids's coordinate.
1073+
If disatances > threshold in points' distances, -1 is returned.
1074+
If centroids are in a raster and coordinate point is outside of it ``-1`` is assigned
1075+
1076+
Parameters
1077+
----------
1078+
coord_gdf : gpd.GeoDataFrame
1079+
GeoDataframe with defined latitude/longitude column and crs
1080+
centroids : Centroids
1081+
(Hazard) centroids to match (as raster or vector centroids).
1082+
distance : str, optional
1083+
Distance to use in case of vector centroids.
1084+
Possible values are "euclidean", "haversine" and "approx".
1085+
Default: "euclidean"
1086+
threshold : float, optional
1087+
If the distance (in km) to the nearest neighbor exceeds `threshold`,
1088+
the index `-1` is assigned.
1089+
Set `threshold` to 0, to disable nearest neighbor matching.
1090+
Default: ``NEAREST_NEIGHBOR_THRESHOLD`` (100km)
1091+
1092+
See Also
1093+
--------
1094+
climada.util.coordinates.match_grid_points: method to associate centroids to
1095+
coordinate points when centroids is a raster
1096+
climada.util.coordinates.match_coordinates: method to associate centroids to
1097+
coordinate points
1098+
1099+
Notes
1100+
-----
1101+
The default order of use is:
1102+
1103+
1. if centroid raster is defined, assign the closest raster point
1104+
to each gdf coordinate point.
1105+
2. if no raster, assign centroids to the nearest neighbor using
1106+
euclidian metric
1107+
1108+
Both cases can introduce innacuracies for coordinates in lat/lon
1109+
coordinates as distances in degrees differ from distances in meters
1110+
on the Earth surface, in particular for higher latitude and distances
1111+
larger than 100km. If more accuracy is needed, please use 'haversine'
1112+
distance metric. This however is slower for (quasi-)gridded data,
1113+
and works only for non-gridded data.
1114+
"""
1115+
1116+
try:
1117+
if not equal_crs(coord_gdf.crs, centroids.crs):
1118+
raise ValueError('Set hazard and GeoDataFrame to same CRS first!')
1119+
except AttributeError:
1120+
# If the coord_gdf has no crs defined (or no valid geometry column),
1121+
# no error is raised and it is assumed that the user set the crs correctly
1122+
pass
1123+
1124+
if centroids.meta:
1125+
assigned = match_grid_points(
1126+
coord_gdf.longitude.values, coord_gdf.latitude.values,
1127+
centroids.meta['width'], centroids.meta['height'],
1128+
centroids.meta['transform'])
1129+
else:
1130+
assigned = match_coordinates(
1131+
np.stack([coord_gdf.latitude.values, coord_gdf.longitude.values], axis=1),
1132+
centroids.coord, distance=distance, threshold=threshold)
1133+
return assigned
1134+
10601135
@numba.njit
10611136
def _dist_sqr_approx(lats1, lons1, cos_lats1, lats2, lons2):
10621137
"""Compute squared equirectangular approximation distance. Values need

climada/util/lines_polys_handler.py

Lines changed: 36 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -930,8 +930,19 @@ def _line_to_pnts(gdf_lines, res, to_meters):
930930
else:
931931
line_lengths = gdf_lines.length
932932

933+
# Add warning if lines are too short w.r.t. resolution
934+
failing_res_check_count = len(line_lengths[line_lengths > 10*res])
935+
if failing_res_check_count > 0:
936+
LOGGER.warning(
937+
"%d lines with a length < 10*resolution were found. "
938+
"Each of these lines is disaggregate to one point. "
939+
"Reaggregatint values will thus likely lead to overestimattion. "
940+
"Consider chosing a smaller resolution or filter out the short lines. ",
941+
failing_res_check_count
942+
)
943+
933944
line_fractions = [
934-
np.linspace(0, 1, num=_pnts_per_line(length, res))
945+
_line_fraction(length, res)
935946
for length in line_lengths
936947
]
937948

@@ -951,6 +962,29 @@ def _line_to_pnts(gdf_lines, res, to_meters):
951962
return gdf_points
952963

953964

965+
def _line_fraction(length, res):
966+
"""
967+
Compute the fraction in which to divide a line of given length at given resolution
968+
969+
Parameters
970+
----------
971+
length : float
972+
Length of a string
973+
res : float
974+
Resolution (length of a string element)
975+
976+
Returns
977+
-------
978+
np.ndarray
979+
Array of the fraction at which to divide the string of given length
980+
into points at the chosen resolution.
981+
982+
"""
983+
nb_points = _pnts_per_line(length, res)
984+
eff_res = 1 / nb_points
985+
start = eff_res / 2
986+
return np.arange(start, 1, eff_res)
987+
954988
def _pnts_per_line(length, res):
955989
"""Calculate number of points fitting along a line, given a certain
956990
resolution (spacing) res between points.
@@ -965,7 +999,7 @@ def _pnts_per_line(length, res):
965999
int
9661000
Number of points along line
9671001
"""
968-
return int(np.ceil(length / res) + 1)
1002+
return int(max(np.round(length / res), 1))
9691003

9701004

9711005
def _swap_geom_cols(gdf, geom_to, new_geom):

0 commit comments

Comments
 (0)