pydata · DevDaoud · Feb 7, 2019 · Feb 7, 2019 · Feb 7, 2019 · Feb 7, 2019
diff --git a/doc/whats-new.rst b/doc/whats-new.rst
@@ -24,7 +24,7 @@ Breaking changes
 - Remove support for Python 2. This is the first version of xarray that is
   Python 3 only. (:issue:`1876`).
   By `Joe Hamman <https://github.com/jhamman>`_.
-- The `compat` argument to `Dataset` and the `encoding` argument to 
+- The `compat` argument to `Dataset` and the `encoding` argument to
   `DataArray` are deprecated and will be removed in a future release.
   (:issue:`1188`)
   By `Maximilian Roos <https://github.com/max-sixty>`_.
@@ -62,6 +62,9 @@ Enhancements
 - :py:meth:`pandas.Series.dropna` is now supported for a
   :py:class:`pandas.Series` indexed by a :py:class:`~xarray.CFTimeIndex`
   (:issue:`2688`). By `Spencer Clark <https://github.com/spencerkclark>`_.
+- Variables are now unpacked with scale_factor and offset dtypes if present in datasets.
+  According `to cf convetion <http://cfconventions.org/Data/cf-conventions/cf-conventions-1.7/build/ch08.html>`_.
+  By `Daoud Jahdou <https://github.com/daoudjahdou>`_.
 
 Bug fixes
 ~~~~~~~~~

diff --git a/xarray/coding/variables.py b/xarray/coding/variables.py
@@ -189,8 +189,36 @@ def _scale_offset_decoding(data, scale_factor, add_offset, dtype):
     return data
 
 
-def _choose_float_dtype(dtype, has_offset):
+def _choose_float_dtype(dtype, scale_factor, add_offset):
     """Return a float dtype that can losslessly represent `dtype` values."""
+    # Implementing cf-convention
+    # http://cfconventions.org/Data/cf-conventions/cf-conventions-1.7/build/ch08.html:
+    # Detail:
+    # If the scale_factor and add_offset attributes are of the same
+    # data type as the
+    # associated variable, the unpacked data is assumed to be of the same
+    # data type as the packed data. However, if the scale_factor
+    # and add_offset attributes are of a different data type
+    # from the variable (containing the packed data)
+    # then the unpacked data should match
+    # the type of these attributes, which must both be of type float or both
+    # be of type double. An additional restriction in this case is that
+    # the variable containing the packed data must
+    # be of type byte, short or int.
+    # It is not advised to unpack an int into a float as there
+    # is a potential precision loss.
+
+    # We first return scale_factor type
+    # as multiplying takes priority over
+    # adding values
+    if dtype is not type(scale_factor) and \
+            isinstance(scale_factor, np.generic):
+        return np.dtype(scale_factor)
+
+    if dtype is not type(add_offset) and \
+            isinstance(add_offset, np.generic):
+        return np.dtype(add_offset)
+
     # Keep float32 as-is.  Upcast half-precision to single-precision,
     # because float16 is "intended for storage but not computation"
     if dtype.itemsize <= 4 and np.issubdtype(dtype, np.floating):
@@ -201,7 +229,7 @@ def _choose_float_dtype(dtype, has_offset):
         # but a large integer offset could lead to loss of precision.
         # Sensitivity analysis can be tricky, so we just use a float64
         # if there's any offset at all - better unoptimised than wrong!
-        if not has_offset:
+        if not add_offset:
             return np.float32
     # For all other types and circumstances, we just use float64.
     # (safe because eg. complex numbers are not supported in NetCDF)
@@ -219,7 +247,9 @@ def encode(self, variable, name=None):
         dims, data, attrs, encoding = unpack_for_encoding(variable)
 
         if 'scale_factor' in encoding or 'add_offset' in encoding:
-            dtype = _choose_float_dtype(data.dtype, 'add_offset' in encoding)
+            scale_factor = pop_to(attrs, encoding, 'scale_factor', name=name)
+            add_offset = pop_to(attrs, encoding, 'add_offset', name=name)
+            dtype = _choose_float_dtype(data.dtype, scale_factor, add_offset)
             data = data.astype(dtype=dtype, copy=True)
             if 'add_offset' in encoding:
                 data -= pop_to(encoding, attrs, 'add_offset', name=name)
@@ -234,7 +264,8 @@ def decode(self, variable, name=None):
         if 'scale_factor' in attrs or 'add_offset' in attrs:
             scale_factor = pop_to(attrs, encoding, 'scale_factor', name=name)
             add_offset = pop_to(attrs, encoding, 'add_offset', name=name)
-            dtype = _choose_float_dtype(data.dtype, 'add_offset' in attrs)
+            dtype = _choose_float_dtype(data.dtype, scale_factor, add_offset)
+
             transform = partial(_scale_offset_decoding,
                                 scale_factor=scale_factor,
                                 add_offset=add_offset,

diff --git a/xarray/conventions.py b/xarray/conventions.py
@@ -270,7 +270,6 @@ def decode_cf_variable(name, var, concat_characters=True, mask_and_scale=True,
         Whether to stack characters into bytes along the last dimension of this
         array. Passed as an argument because we need to look at the full
         dataset to figure out if this is appropriate.
-
     Returns
     -------
     out : Variable
@@ -430,7 +429,6 @@ def decode_cf(obj, concat_characters=True, mask_and_scale=True,
         A variable or list of variables to exclude from being parsed from the
         dataset. This may be useful to drop variables with problems or
         inconsistent values.
-
     Returns
     -------
     decoded : Dataset

diff --git a/xarray/core/dtypes.py b/xarray/core/dtypes.py
@@ -30,7 +30,6 @@ def __eq__(self, other):
 INF = AlwaysGreaterThan()
 NINF = AlwaysLessThan()
 
-
 # Pairs of types that, if both found, should be promoted to object dtype
 # instead of following NumPy's own type-promotion rules. These type promotion
 # rules match pandas instead. For reference, see the NumPy type hierarchy:

diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py
@@ -1140,14 +1140,15 @@ def test_mask_and_scale(self):
                 v = nc.variables['x']
                 v.set_auto_maskandscale(False)
                 v.add_offset = 10
-                v.scale_factor = 0.1
+                v.scale_factor = np.float32(0.1)
                 v[:] = np.array([-1, -1, 0, 1, 2])
 
             # first make sure netCDF4 reads the masked and scaled data
             # correctly
             with nc4.Dataset(tmp_file, mode='r') as nc:
                 expected = np.ma.array([-1, -1, 10, 10.1, 10.2],
-                                       mask=[True, True, False, False, False])
+                                       mask=[True, True, False, False, False],
+                                       dtype=np.float32)
                 actual = nc.variables['x'][:]
                 assert_array_equal(expected, actual)
 
@@ -1156,6 +1157,25 @@ def test_mask_and_scale(self):
                 expected = create_masked_and_scaled_data()
                 assert_identical(expected, ds)
 
+    def test_mask_and_scale_with_float64_scale_factor(self):
+        with create_tmp_file() as tmp_file:
+            with nc4.Dataset(tmp_file, mode='w') as nc:
+                nc.createDimension('t', 5)
+                nc.createVariable('x', 'int16', ('t',), fill_value=-1)
+                v = nc.variables['x']
+                v.scale_factor = 0.01
+                v.add_offset = 10
+                v[:] = np.array([-1123, -1123, 123, 1123, 2123])
+            # We read the newly created netcdf file
+            with nc4.Dataset(tmp_file, mode='r') as nc:
+                # we open the dataset with forced promotion to 64 bit
+                with open_dataset(tmp_file) as ds:
+                    # Both dataset values should be equal
+                    # And both of float64 array type
+                    dsv = ds['x'].values
+                    ncv = nc.variables['x'][:]
+                    np.testing.assert_array_almost_equal(dsv, ncv, 15)
+
     def test_0dimensional_variable(self):
         # This fix verifies our work-around to this netCDF4-python bug:
         # https://github.com/Unidata/netcdf4-python/pull/220