Skip to content

Commit 0d69269

Browse files
authored
Merge pull request #2 from mmyrte/master
StormEurope class and utility
2 parents 0640683 + 01f9d42 commit 0d69269

File tree

7 files changed

+254
-18
lines changed

7 files changed

+254
-18
lines changed

climada/hazard/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,3 +6,4 @@
66
from .tag import *
77
from .trop_cyclone import *
88
from .tc_tracks import *
9+
from .storm_europe import *

climada/hazard/centroids/base.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,10 @@
2121

2222
FILE_EXT = {'.mat': 'MAT',
2323
'.xls': 'XLS',
24-
'.xlsx': 'XLS'
25-
}
24+
'.xlsx': 'XLS',
25+
'.csv': 'CSV',
26+
}
27+
2628
""" Supported files format to read from """
2729

2830
class Centroids(object):
@@ -213,6 +215,11 @@ def lon(self):
213215
""" Get longitude from coord array """
214216
return self.coord[:, 1]
215217

218+
@property
219+
def size(self):
220+
""" Get count of centroids """
221+
return self.id.size
222+
216223
@staticmethod
217224
def get_sup_file_format():
218225
""" Get supported file extensions that can be read.

climada/hazard/centroids/source.py

Lines changed: 33 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,12 @@
3333
}
3434
""" Excel variable names """
3535

36+
DEF_VAR_CSV = {'lat': 'X',
37+
'lon': 'Y',
38+
'region_id': 'iso_n3',
39+
}
40+
""" CSV variable names """
41+
3642
LOGGER = logging.getLogger(__name__)
3743

3844
def read_excel(centroids, file_name, var_names):
@@ -111,6 +117,31 @@ def read_att_mat(centroids, cent, num_try, var_names):
111117
except KeyError:
112118
pass
113119

120+
def read_csv(centroids, file_name, var_names):
121+
""" Read csv centroids representations. Currently only supports lat/lon
122+
and region_id.
123+
"""
124+
# TODO iterate over additional variables in var_names
125+
if var_names is None:
126+
var_names = DEF_VAR_CSV
127+
128+
cent_pd = pd.read_csv(file_name)
129+
130+
centroids.id = np.array(cent_pd.index)
131+
centroids.coord = GridPoints(
132+
np.array(cent_pd[[
133+
var_names['lat'],
134+
var_names['lon'],
135+
]])
136+
)
137+
centroids.region_id = np.array(
138+
cent_pd[[var_names['region_id']]]
139+
)
140+
141+
centroids.tag.file_name = file_name
142+
centroids.tag.description = 'Read from csv'
143+
114144
READ_SET = {'XLS': (DEF_VAR_EXCEL, read_excel),
115-
'MAT': (DEF_VAR_MAT, read_mat)
116-
}
145+
'MAT': (DEF_VAR_MAT, read_mat),
146+
'CSV': (DEF_VAR_CSV, read_csv),
147+
}

climada/hazard/storm_europe.py

Lines changed: 172 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,172 @@
1+
"""
2+
Define StormEurope class.
3+
"""
4+
5+
__all__ = ['StormEurope']
6+
7+
import logging
8+
import numpy as np
9+
import xarray as xr
10+
import pandas as pd
11+
from scipy import sparse
12+
13+
from climada.hazard.base import Hazard
14+
from climada.hazard.centroids.base import Centroids
15+
from climada.hazard.tag import Tag as TagHazard
16+
from climada.util.files_handler import get_file_names, to_list
17+
from climada.util.constants import WISC_CENTROIDS
18+
19+
LOGGER = logging.getLogger(__name__)
20+
21+
HAZ_TYPE = 'WS'
22+
""" Hazard type acronym for Winter Storm """
23+
24+
25+
class StormEurope(Hazard):
26+
"""Contains european winter storm events.
27+
28+
Attributes:
29+
ssi (float): Storm Severity Index, as recorded in the footprint
30+
files; this is _not_ the same as that computed by the Matlab
31+
climada version.
32+
cf. Lamb and Frydendahl (1991)
33+
"Historic Storms of the North Sea, British Isles and
34+
Northwest Europe", ISBN: 978-0-521-37522-1
35+
SSI = v [m/s] ^ 3 * duration [h] * area [km^2 or m^2]
36+
"""
37+
intensity_thres = 15
38+
""" intensity threshold for storage in m/s """
39+
40+
vars_opt = Hazard.vars_opt.union({'ssi'})
41+
"""Name of the variables that aren't need to compute the impact."""
42+
43+
def __init__(self):
44+
"""Empty constructor. """
45+
Hazard.__init__(self, HAZ_TYPE)
46+
self.ssi = np.array([], int)
47+
48+
def read_footprints(self, path, description=None,
49+
ref_raster=None, centroids=None,
50+
files_omit='fp_era20c_1990012515_701_0.nc'):
51+
"""Clear instance and read WISC footprints. Read Assumes that all
52+
footprints have the same coordinates as the first file listed/first
53+
file in dir.
54+
55+
Parameters:
56+
path (str, list(str)): A location in the filesystem. Either a
57+
path to a single netCDF WISC footprint, or a folder containing
58+
only footprints, or a globbing pattern to one or more
59+
footprints.
60+
description (str, optional): description of the events, defaults to
61+
'WISC historical hazard set'
62+
ref_raster (str, optional): Reference netCDF file from which to
63+
construct a new barebones Centroids instance. Defaults to the
64+
first file in path.
65+
centroids (Centroids, optional): A Centroids struct, overriding
66+
ref_raster
67+
files_omit (str, list(str), optional): List of files to omit;
68+
defaults to one duplicate storm present in the WISC set as of
69+
2018-09-10.
70+
"""
71+
72+
self.clear()
73+
74+
file_names = get_file_names(path)
75+
76+
if ref_raster is not None and centroids is None:
77+
centroids = self._centroids_from_nc(ref_raster)
78+
elif ref_raster is not None and centroids is not None:
79+
LOGGER.warning('Overriding ref_raster with centroids')
80+
else:
81+
centroids = self._centroids_from_nc(file_name[0])
82+
83+
files_omit = to_list(files_omit)
84+
85+
for fn in file_names:
86+
if any(fo in fn for fo in files_omit):
87+
LOGGER.info("Omitting file %s", fn)
88+
continue
89+
self.append(self._read_one_nc(fn, centroids))
90+
91+
self.tag = TagHazard(
92+
HAZ_TYPE, 'Hazard set not saved, too large to pickle',
93+
description='WISC historical hazard set.'
94+
)
95+
if description is not None:
96+
self.tag.description = description
97+
98+
@classmethod
99+
def _read_one_nc(cls, file_name, centroids):
100+
""" Read a single WISC footprint. Assumes a time dimension of length
101+
1. Omits a footprint if another file with the same timestamp has
102+
already been read.
103+
104+
Parameters:
105+
nc (xarray.Dataset): File connection to netcdf
106+
file_name (str): Absolute or relative path to *.nc
107+
centroids (Centroids): Centr. instance that matches the
108+
coordinates used in the *.nc, only validated by size.
109+
"""
110+
nc = xr.open_dataset(file_name)
111+
112+
if centroids.size != (nc.sizes['latitude'] * nc.sizes['longitude']):
113+
raise ValueError('Number of centroids and grid size don\'t match.')
114+
115+
# xarray does not penalise repeated assignments, see
116+
# http://xarray.pydata.org/en/stable/data-structures.html
117+
stacked = nc.max_wind_gust.stack(
118+
intensity=('latitude', 'longitude', 'time')
119+
)
120+
stacked = stacked.where(stacked > cls.intensity_thres)
121+
stacked = stacked.fillna(0)
122+
123+
# fill in values from netCDF
124+
new_haz = StormEurope()
125+
new_haz.event_name = [nc.storm_name]
126+
new_haz.date = np.array([
127+
_datetime64_toordinal(nc.time.data[0])
128+
])
129+
new_haz.intensity = sparse.csr_matrix(stacked)
130+
new_haz.ssi = np.array([float(nc.ssi)])
131+
new_haz.time_bounds = np.array(nc.time_bounds)
132+
133+
# fill in default values
134+
new_haz.centroids = centroids
135+
new_haz.units = 'm/s'
136+
new_haz.event_id = np.array([1])
137+
new_haz.frequency = np.array([1])
138+
new_haz.fraction = new_haz.intensity.copy().tocsr()
139+
new_haz.fraction.data.fill(1)
140+
new_haz.orig = np.array([True])
141+
142+
nc.close()
143+
return new_haz
144+
145+
@staticmethod
146+
def _centroids_from_nc(file_name):
147+
""" Construct Centroids from the grid described by 'latitude'
148+
and 'longitude' variables in a netCDF file.
149+
"""
150+
nc = xr.open_dataset(file_name)
151+
lats = nc.latitude.data
152+
lons = nc.longitude.data
153+
ct = Centroids()
154+
ct.coord = np.array([
155+
np.repeat(lats, len(lons)),
156+
np.tile(lons, len(lats)),
157+
]).T
158+
ct.id = np.arange(0, len(ct.coord))
159+
ct.tag.description = 'Centroids constructed from: ' + file_name
160+
161+
return ct
162+
163+
def plot_ssi(self):
164+
""" Ought to plot the SSI versus the xs_freq, which presumably is the
165+
excess frequency. """
166+
pass
167+
168+
169+
def _datetime64_toordinal(datetime):
170+
""" Converts from a numpy datetime64 object to an ordinal date.
171+
See https://stackoverflow.com/a/21916253 for the horrible details. """
172+
return pd.to_datetime(datetime.tolist()).toordinal()

climada/util/files_handler.py

Lines changed: 20 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
import math
1212
import requests
1313
import tqdm
14+
import glob
1415

1516
LOGGER = logging.getLogger(__name__)
1617

@@ -74,11 +75,14 @@ def to_list(num_exp, values, val_name):
7475
return val_list
7576

7677
def get_file_names(file_name):
77-
"""Return list of files contained.
78+
""" Return list of files contained. Supports globbing.
7879
7980
Parameters:
80-
file_name (str or list(str)): file name, or list of file names or name
81-
of the folder containing the files
81+
file_name (str or list(str)): Either a single string or a list of
82+
strings that are either
83+
- a file path
84+
- or the path of the folder containing the files
85+
- or a globbing pattern.
8286
8387
Returns:
8488
list
@@ -92,12 +96,18 @@ def get_file_names(file_name):
9296
return file_list
9397

9498
def _process_one_file_name(name, file_list):
95-
"""Apend to input list the file contained in name"""
96-
if os.path.splitext(name)[1] == '':
97-
tmp_files = os.listdir(name)
98-
# append only files (absolute path), not folders
99+
""" Apend to input list the file contained in name
100+
Tries globbing if name is neither dir nor file.
101+
"""
102+
if os.path.isdir(name):
103+
tmp_files = glob.glob(os.path.join(name, '*'))
99104
for file in tmp_files:
100-
if os.path.splitext(file)[1] != '':
101-
file_list.append(os.path.join(name, file))
102-
else:
105+
if os.path.isfile(file):
106+
file_list.append(file)
107+
if os.path.isfile(name):
103108
file_list.append(name)
109+
else:
110+
tmp_files = sorted(glob.glob(name))
111+
for file in tmp_files:
112+
if os.path.isfile(file):
113+
file_list.append(file)

climada/util/test/test_files.py

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
import unittest
77

88
from climada.util.files_handler import to_list, get_file_names, download_file
9-
from climada.util.constants import DATA_DIR
9+
from climada.util.constants import DATA_DIR, GLB_CENTROIDS_MAT, ENT_TEMPLATE_XLS
1010

1111
class TestDownloadUrl(unittest.TestCase):
1212
"""Test download_file function """
@@ -53,16 +53,17 @@ def test_list_wrong_length_fail(self):
5353
self.assertIn("Provide one or 3 values.", cm.output[0])
5454

5555
class TestGetFileNames(unittest.TestCase):
56-
"""Test get_file_names function"""
56+
""" Test get_file_names function. Only works with actually existing
57+
files and directories. """
5758
def test_one_file_copy(self):
5859
"""If input is one file name, return a list with this file name"""
59-
file_name = "test.mat"
60+
file_name = GLB_CENTROIDS_MAT
6061
out = get_file_names(file_name)
6162
self.assertEqual([file_name], out)
6263

6364
def test_several_file_copy(self):
6465
"""If input is a list with several file names, return the same list"""
65-
file_name = ["test1.mat", "test2.mat", "test3.mat", "test4.mat"]
66+
file_name = [GLB_CENTROIDS_MAT, ENT_TEMPLATE_XLS]
6667
out = get_file_names(file_name)
6768
self.assertEqual(file_name, out)
6869

@@ -79,6 +80,19 @@ def test_folder_contents(self):
7980
for file in out:
8081
self.assertNotEqual('', os.path.splitext(file)[1])
8182

83+
def test_globbing(self):
84+
""" If input is a glob pattern, return a list of matching visible
85+
files; omit folders.
86+
"""
87+
file_name = os.path.join(DATA_DIR, 'demo/')
88+
out = get_file_names(file_name + '*')
89+
90+
tmp_files = os.listdir(file_name)
91+
tmp_files = [file_name + f for f in tmp_files]
92+
tmp_files = [f for f in tmp_files if not os.path.isdir(f)
93+
and not f.startswith('.')]
94+
self.assertEqual(tmp_files, out)
95+
8296
# Execute Tests
8397
TESTS = unittest.TestLoader().loadTestsFromTestCase(TestToStrList)
8498
TESTS.addTests(unittest.TestLoader().loadTestsFromTestCase(TestGetFileNames))

requirements/env_climada.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ dependencies:
88
- h5py=2.7.1
99
- jsonschema=2.6.0
1010
- matplotlib=2.2.2
11+
- netcdf4=1.4.0
1112
- numba=0.37.0
1213
- numpy=1.14.2
1314
- pandas=0.23.0

0 commit comments

Comments
 (0)