|
48 | 48 | T_DatasetOrAbstractstore = Union[Dataset, AbstractDataStore]
|
49 | 49 |
|
50 | 50 |
|
51 |
| -class NativeEndiannessArray(indexing.ExplicitlyIndexedNDArrayMixin): |
52 |
| - """Decode arrays on the fly from non-native to native endianness |
53 |
| -
|
54 |
| - This is useful for decoding arrays from netCDF3 files (which are all |
55 |
| - big endian) into native endianness, so they can be used with Cython |
56 |
| - functions, such as those found in bottleneck and pandas. |
57 |
| -
|
58 |
| - >>> x = np.arange(5, dtype=">i2") |
59 |
| -
|
60 |
| - >>> x.dtype |
61 |
| - dtype('>i2') |
62 |
| -
|
63 |
| - >>> NativeEndiannessArray(x).dtype |
64 |
| - dtype('int16') |
65 |
| -
|
66 |
| - >>> indexer = indexing.BasicIndexer((slice(None),)) |
67 |
| - >>> NativeEndiannessArray(x)[indexer].dtype |
68 |
| - dtype('int16') |
69 |
| - """ |
70 |
| - |
71 |
| - __slots__ = ("array",) |
72 |
| - |
73 |
| - def __init__(self, array): |
74 |
| - self.array = indexing.as_indexable(array) |
75 |
| - |
76 |
| - @property |
77 |
| - def dtype(self): |
78 |
| - return np.dtype(self.array.dtype.kind + str(self.array.dtype.itemsize)) |
79 |
| - |
80 |
| - def __getitem__(self, key): |
81 |
| - return np.asarray(self.array[key], dtype=self.dtype) |
82 |
| - |
83 |
| - |
84 |
| -class BoolTypeArray(indexing.ExplicitlyIndexedNDArrayMixin): |
85 |
| - """Decode arrays on the fly from integer to boolean datatype |
86 |
| -
|
87 |
| - This is useful for decoding boolean arrays from integer typed netCDF |
88 |
| - variables. |
89 |
| -
|
90 |
| - >>> x = np.array([1, 0, 1, 1, 0], dtype="i1") |
91 |
| -
|
92 |
| - >>> x.dtype |
93 |
| - dtype('int8') |
94 |
| -
|
95 |
| - >>> BoolTypeArray(x).dtype |
96 |
| - dtype('bool') |
97 |
| -
|
98 |
| - >>> indexer = indexing.BasicIndexer((slice(None),)) |
99 |
| - >>> BoolTypeArray(x)[indexer].dtype |
100 |
| - dtype('bool') |
101 |
| - """ |
102 |
| - |
103 |
| - __slots__ = ("array",) |
104 |
| - |
105 |
| - def __init__(self, array): |
106 |
| - self.array = indexing.as_indexable(array) |
107 |
| - |
108 |
| - @property |
109 |
| - def dtype(self): |
110 |
| - return np.dtype("bool") |
111 |
| - |
112 |
| - def __getitem__(self, key): |
113 |
| - return np.asarray(self.array[key], dtype=self.dtype) |
114 |
| - |
115 |
| - |
116 | 51 | def _var_as_tuple(var: Variable) -> T_VarTuple:
|
117 | 52 | return var.dims, var.data, var.attrs.copy(), var.encoding.copy()
|
118 | 53 |
|
119 | 54 |
|
120 |
| -def maybe_encode_nonstring_dtype(var: Variable, name: T_Name = None) -> Variable: |
121 |
| - if "dtype" in var.encoding and var.encoding["dtype"] not in ("S1", str): |
122 |
| - dims, data, attrs, encoding = _var_as_tuple(var) |
123 |
| - dtype = np.dtype(encoding.pop("dtype")) |
124 |
| - if dtype != var.dtype: |
125 |
| - if np.issubdtype(dtype, np.integer): |
126 |
| - if ( |
127 |
| - np.issubdtype(var.dtype, np.floating) |
128 |
| - and "_FillValue" not in var.attrs |
129 |
| - and "missing_value" not in var.attrs |
130 |
| - ): |
131 |
| - warnings.warn( |
132 |
| - f"saving variable {name} with floating " |
133 |
| - "point data as an integer dtype without " |
134 |
| - "any _FillValue to use for NaNs", |
135 |
| - SerializationWarning, |
136 |
| - stacklevel=10, |
137 |
| - ) |
138 |
| - data = np.around(data) |
139 |
| - data = data.astype(dtype=dtype) |
140 |
| - var = Variable(dims, data, attrs, encoding, fastpath=True) |
141 |
| - return var |
142 |
| - |
143 |
| - |
144 |
| -def maybe_default_fill_value(var: Variable) -> Variable: |
145 |
| - # make NaN the fill value for float types: |
146 |
| - if ( |
147 |
| - "_FillValue" not in var.attrs |
148 |
| - and "_FillValue" not in var.encoding |
149 |
| - and np.issubdtype(var.dtype, np.floating) |
150 |
| - ): |
151 |
| - var.attrs["_FillValue"] = var.dtype.type(np.nan) |
152 |
| - return var |
153 |
| - |
154 |
| - |
155 |
| -def maybe_encode_bools(var: Variable) -> Variable: |
156 |
| - if ( |
157 |
| - (var.dtype == bool) |
158 |
| - and ("dtype" not in var.encoding) |
159 |
| - and ("dtype" not in var.attrs) |
160 |
| - ): |
161 |
| - dims, data, attrs, encoding = _var_as_tuple(var) |
162 |
| - attrs["dtype"] = "bool" |
163 |
| - data = duck_array_ops.astype(data, dtype="i1", copy=True) |
164 |
| - var = Variable(dims, data, attrs, encoding, fastpath=True) |
165 |
| - return var |
166 |
| - |
167 |
| - |
168 | 55 | def _infer_dtype(array, name: T_Name = None) -> np.dtype:
|
169 | 56 | """Given an object array with no missing values, infer its dtype from its
|
170 | 57 | first element
|
@@ -292,13 +179,13 @@ def encode_cf_variable(
|
292 | 179 | variables.CFScaleOffsetCoder(),
|
293 | 180 | variables.CFMaskCoder(),
|
294 | 181 | variables.UnsignedIntegerCoder(),
|
| 182 | + variables.NonStringCoder(), |
| 183 | + variables.DefaultFillvalueCoder(), |
| 184 | + variables.BooleanCoder(), |
295 | 185 | ]:
|
296 | 186 | var = coder.encode(var, name=name)
|
297 | 187 |
|
298 |
| - # TODO(shoyer): convert all of these to use coders, too: |
299 |
| - var = maybe_encode_nonstring_dtype(var, name=name) |
300 |
| - var = maybe_default_fill_value(var) |
301 |
| - var = maybe_encode_bools(var) |
| 188 | + # TODO(kmuehlbauer): check if ensure_dtype_not_object can be moved to backends: |
302 | 189 | var = ensure_dtype_not_object(var, name=name)
|
303 | 190 |
|
304 | 191 | for attr_name in CF_RELATED_DATA:
|
@@ -389,19 +276,15 @@ def decode_cf_variable(
|
389 | 276 | if decode_times:
|
390 | 277 | var = times.CFDatetimeCoder(use_cftime=use_cftime).decode(var, name=name)
|
391 | 278 |
|
392 |
| - dimensions, data, attributes, encoding = variables.unpack_for_decoding(var) |
393 |
| - # TODO(shoyer): convert everything below to use coders |
| 279 | + if decode_endianness and not var.dtype.isnative: |
| 280 | + var = variables.EndianCoder().decode(var) |
| 281 | + original_dtype = var.dtype |
394 | 282 |
|
395 |
| - if decode_endianness and not data.dtype.isnative: |
396 |
| - # do this last, so it's only done if we didn't already unmask/scale |
397 |
| - data = NativeEndiannessArray(data) |
398 |
| - original_dtype = data.dtype |
| 283 | + var = variables.BooleanCoder().decode(var) |
399 | 284 |
|
400 |
| - encoding.setdefault("dtype", original_dtype) |
| 285 | + dimensions, data, attributes, encoding = variables.unpack_for_decoding(var) |
401 | 286 |
|
402 |
| - if "dtype" in attributes and attributes["dtype"] == "bool": |
403 |
| - del attributes["dtype"] |
404 |
| - data = BoolTypeArray(data) |
| 287 | + encoding.setdefault("dtype", original_dtype) |
405 | 288 |
|
406 | 289 | if not is_duck_dask_array(data):
|
407 | 290 | data = indexing.LazilyIndexedArray(data)
|
|
0 commit comments