Merge pull request #2 from mmyrte/master

gabrielaznar · web-flow · commit 0d69269cae75 · 2018-09-10T16:40:44.000+02:00
StormEurope class and utility
diff --git a/climada/hazard/__init__.py b/climada/hazard/__init__.py
@@ -6,3 +6,4 @@
 from .tag import *
 from .trop_cyclone import *
 from .tc_tracks import *
+from .storm_europe import *
diff --git a/climada/hazard/centroids/base.py b/climada/hazard/centroids/base.py
@@ -21,8 +21,10 @@
 
 FILE_EXT = {'.mat':  'MAT',
             '.xls':  'XLS',
-            '.xlsx': 'XLS'
-           }
+            '.xlsx': 'XLS',
+            '.csv': 'CSV',
+            }
+
 """ Supported files format to read from """
 
 class Centroids(object):
@@ -213,6 +215,11 @@ def lon(self):
         """ Get longitude from coord array """
         return self.coord[:, 1]
 
+    @property
+    def size(self):
+        """ Get count of centroids """
+        return self.id.size
+
     @staticmethod
     def get_sup_file_format():
         """ Get supported file extensions that can be read.
diff --git a/climada/hazard/centroids/source.py b/climada/hazard/centroids/source.py
@@ -33,6 +33,12 @@
                 }
 """ Excel variable names """
 
+DEF_VAR_CSV = {'lat': 'X',
+               'lon': 'Y',
+               'region_id': 'iso_n3',
+               }
+""" CSV variable names """
+
 LOGGER = logging.getLogger(__name__)
 
 def read_excel(centroids, file_name, var_names):
@@ -111,6 +117,31 @@ def read_att_mat(centroids, cent, num_try, var_names):
     except KeyError:
         pass
 
+def read_csv(centroids, file_name, var_names):
+    """ Read csv centroids representations. Currently only supports lat/lon
+        and region_id.
+    """
+    # TODO iterate over additional variables in var_names
+    if var_names is None:
+        var_names = DEF_VAR_CSV
+    
+    cent_pd = pd.read_csv(file_name)
+
+    centroids.id = np.array(cent_pd.index)
+    centroids.coord = GridPoints(
+        np.array(cent_pd[[
+            var_names['lat'],
+            var_names['lon'],
+        ]])
+    )
+    centroids.region_id = np.array(
+        cent_pd[[var_names['region_id']]]
+    )
+
+    centroids.tag.file_name = file_name
+    centroids.tag.description = 'Read from csv'
+
 READ_SET = {'XLS': (DEF_VAR_EXCEL, read_excel),
-            'MAT': (DEF_VAR_MAT, read_mat)
-           }
+            'MAT': (DEF_VAR_MAT, read_mat),
+            'CSV': (DEF_VAR_CSV, read_csv),
+            }
diff --git a/climada/hazard/storm_europe.py b/climada/hazard/storm_europe.py
@@ -0,0 +1,172 @@
+"""
+Define StormEurope class.
+"""
+
+__all__ = ['StormEurope']
+
+import logging
+import numpy as np
+import xarray as xr
+import pandas as pd
+from scipy import sparse
+
+from climada.hazard.base import Hazard
+from climada.hazard.centroids.base import Centroids
+from climada.hazard.tag import Tag as TagHazard
+from climada.util.files_handler import get_file_names, to_list
+from climada.util.constants import WISC_CENTROIDS
+
+LOGGER = logging.getLogger(__name__)
+
+HAZ_TYPE = 'WS'
+""" Hazard type acronym for Winter Storm """
+
+
+class StormEurope(Hazard):
+    """Contains european winter storm events.
+
+    Attributes:
+        ssi (float): Storm Severity Index, as recorded in the footprint
+            files; this is _not_ the same as that computed by the Matlab
+            climada version.
+            cf. Lamb and Frydendahl (1991)
+            "Historic Storms of the North Sea, British Isles and
+            Northwest Europe", ISBN: 978-0-521-37522-1
+            SSI = v [m/s] ^ 3 * duration [h] * area [km^2 or m^2]
+    """
+    intensity_thres = 15
+    """ intensity threshold for storage in m/s """
+
+    vars_opt = Hazard.vars_opt.union({'ssi'})
+    """Name of the variables that aren't need to compute the impact."""
+
+    def __init__(self):
+        """Empty constructor. """
+        Hazard.__init__(self, HAZ_TYPE)
+        self.ssi = np.array([], int)
+
+    def read_footprints(self, path, description=None, 
+                        ref_raster=None, centroids=None,
+                        files_omit='fp_era20c_1990012515_701_0.nc'):
+        """Clear instance and read WISC footprints. Read Assumes that all
+        footprints have the same coordinates as the first file listed/first
+        file in dir.
+
+        Parameters:
+            path (str, list(str)): A location in the filesystem. Either a
+                path to a single netCDF WISC footprint, or a folder containing
+                only footprints, or a globbing pattern to one or more
+                footprints.
+            description (str, optional): description of the events, defaults to
+                'WISC historical hazard set'
+            ref_raster (str, optional): Reference netCDF file from which to
+                construct a new barebones Centroids instance. Defaults to the
+                first file in path.
+            centroids (Centroids, optional): A Centroids struct, overriding 
+                ref_raster
+            files_omit (str, list(str), optional): List of files to omit;
+                defaults to one duplicate storm present in the WISC set as of
+                2018-09-10.
+        """
+
+        self.clear()
+
+        file_names = get_file_names(path)
+
+        if ref_raster is not None and centroids is None:
+            centroids = self._centroids_from_nc(ref_raster)
+        elif ref_raster is not None and centroids is not None:
+            LOGGER.warning('Overriding ref_raster with centroids')
+        else:
+            centroids = self._centroids_from_nc(file_name[0])
+
+        files_omit = to_list(files_omit)
+
+        for fn in file_names:
+            if any(fo in fn for fo in files_omit):
+                LOGGER.info("Omitting file %s", fn)
+                continue
+            self.append(self._read_one_nc(fn, centroids))
+
+        self.tag = TagHazard(
+            HAZ_TYPE, 'Hazard set not saved, too large to pickle',
+            description='WISC historical hazard set.'
+        )
+        if description is not None:
+            self.tag.description = description
+
+    @classmethod
+    def _read_one_nc(cls, file_name, centroids):
+        """ Read a single WISC footprint. Assumes a time dimension of length
+            1. Omits a footprint if another file with the same timestamp has
+            already been read.
+
+            Parameters:
+                nc (xarray.Dataset): File connection to netcdf
+                file_name (str): Absolute or relative path to *.nc
+                centroids (Centroids): Centr. instance that matches the
+                    coordinates used in the *.nc, only validated by size.
+        """
+        nc = xr.open_dataset(file_name)
+
+        if centroids.size != (nc.sizes['latitude'] * nc.sizes['longitude']):
+            raise ValueError('Number of centroids and grid size don\'t match.')
+
+        # xarray does not penalise repeated assignments, see
+        # http://xarray.pydata.org/en/stable/data-structures.html
+        stacked = nc.max_wind_gust.stack(
+            intensity=('latitude', 'longitude', 'time')
+        )
+        stacked = stacked.where(stacked > cls.intensity_thres)
+        stacked = stacked.fillna(0)
+
+        # fill in values from netCDF
+        new_haz = StormEurope()
+        new_haz.event_name = [nc.storm_name]
+        new_haz.date = np.array([
+            _datetime64_toordinal(nc.time.data[0])
+        ])
+        new_haz.intensity = sparse.csr_matrix(stacked)
+        new_haz.ssi = np.array([float(nc.ssi)])
+        new_haz.time_bounds = np.array(nc.time_bounds)
+
+        # fill in default values
+        new_haz.centroids = centroids
+        new_haz.units = 'm/s'
+        new_haz.event_id = np.array([1])
+        new_haz.frequency = np.array([1])
+        new_haz.fraction = new_haz.intensity.copy().tocsr()
+        new_haz.fraction.data.fill(1)
+        new_haz.orig = np.array([True])
+
+        nc.close()
+        return new_haz
+
+    @staticmethod
+    def _centroids_from_nc(file_name):
+        """ Construct Centroids from the grid described by 'latitude'
+            and 'longitude' variables in a netCDF file.
+        """
+        nc = xr.open_dataset(file_name)
+        lats = nc.latitude.data
+        lons = nc.longitude.data
+        ct = Centroids()
+        ct.coord = np.array([
+            np.repeat(lats, len(lons)),
+            np.tile(lons, len(lats)),
+        ]).T
+        ct.id = np.arange(0, len(ct.coord))
+        ct.tag.description = 'Centroids constructed from: ' + file_name
+
+        return ct
+
+    def plot_ssi(self):
+        """ Ought to plot the SSI versus the xs_freq, which presumably is the
+            excess frequency. """
+        pass
+
+
+def _datetime64_toordinal(datetime):
+    """ Converts from a numpy datetime64 object to an ordinal date.
+        See https://stackoverflow.com/a/21916253 for the horrible details. """
+    return pd.to_datetime(datetime.tolist()).toordinal()
diff --git a/climada/util/files_handler.py b/climada/util/files_handler.py
@@ -11,6 +11,7 @@
 import math
 import requests
 import tqdm
+import glob
 
 LOGGER = logging.getLogger(__name__)
 
@@ -74,11 +75,14 @@ def to_list(num_exp, values, val_name):
     return val_list
 
 def get_file_names(file_name):
-    """Return list of files contained.
+    """ Return list of files contained. Supports globbing.
 
     Parameters:
-        file_name (str or list(str)): file name, or list of file names or name
-            of the folder containing the files
+        file_name (str or list(str)): Either a single string or a list of
+            strings that are either 
+                - a file path
+                - or the path of the folder containing the files
+                - or a globbing pattern.
 
     Returns:
         list
@@ -92,12 +96,18 @@ def get_file_names(file_name):
     return file_list
 
 def _process_one_file_name(name, file_list):
-    """Apend to input list the file contained in name"""
-    if os.path.splitext(name)[1] == '':
-        tmp_files = os.listdir(name)
-        # append only files (absolute path), not folders
+    """ Apend to input list the file contained in name
+        Tries globbing if name is neither dir nor file.
+    """
+    if os.path.isdir(name):
+        tmp_files = glob.glob(os.path.join(name, '*'))
         for file in tmp_files:
-            if os.path.splitext(file)[1] != '':
-                file_list.append(os.path.join(name, file))
-    else:
+            if os.path.isfile(file):
+                file_list.append(file)
+    if os.path.isfile(name):
         file_list.append(name)
+    else:
+        tmp_files = sorted(glob.glob(name))
+        for file in tmp_files:
+            if os.path.isfile(file):
+                file_list.append(file)
diff --git a/climada/util/test/test_files.py b/climada/util/test/test_files.py
@@ -6,7 +6,7 @@
 import unittest
 
 from climada.util.files_handler import to_list, get_file_names, download_file
-from climada.util.constants import DATA_DIR
+from climada.util.constants import DATA_DIR, GLB_CENTROIDS_MAT, ENT_TEMPLATE_XLS
 
 class TestDownloadUrl(unittest.TestCase):
     """Test download_file function """
@@ -53,16 +53,17 @@ def test_list_wrong_length_fail(self):
         self.assertIn("Provide one or 3 values.", cm.output[0])
         
 class TestGetFileNames(unittest.TestCase):
-    """Test get_file_names function"""
+    """ Test get_file_names function. Only works with actually existing
+        files and directories. """
     def test_one_file_copy(self):
         """If input is one file name, return a list with this file name"""
-        file_name = "test.mat"
+        file_name = GLB_CENTROIDS_MAT
         out = get_file_names(file_name)
         self.assertEqual([file_name], out)
 
     def test_several_file_copy(self):
         """If input is a list with several file names, return the same list"""
-        file_name = ["test1.mat", "test2.mat", "test3.mat", "test4.mat"]
+        file_name = [GLB_CENTROIDS_MAT, ENT_TEMPLATE_XLS]
         out = get_file_names(file_name)
         self.assertEqual(file_name, out)
 
@@ -79,6 +80,19 @@ def test_folder_contents(self):
         for file in out:
             self.assertNotEqual('', os.path.splitext(file)[1])
 
+    def test_globbing(self):
+        """ If input is a glob pattern, return a list of matching visible
+            files; omit folders.
+        """
+        file_name = os.path.join(DATA_DIR, 'demo/')
+        out = get_file_names(file_name + '*')
+
+        tmp_files = os.listdir(file_name)
+        tmp_files = [file_name + f for f in tmp_files]
+        tmp_files = [f for f in tmp_files if not os.path.isdir(f) 
+                and not f.startswith('.')]
+        self.assertEqual(tmp_files, out)
+
 # Execute Tests
 TESTS = unittest.TestLoader().loadTestsFromTestCase(TestToStrList)
 TESTS.addTests(unittest.TestLoader().loadTestsFromTestCase(TestGetFileNames))
diff --git a/requirements/env_climada.yml b/requirements/env_climada.yml
@@ -8,6 +8,7 @@ dependencies:
   - h5py=2.7.1
   - jsonschema=2.6.0
   - matplotlib=2.2.2
+  - netcdf4=1.4.0
   - numba=0.37.0
   - numpy=1.14.2
   - pandas=0.23.0