@@ -220,34 +220,38 @@ class CFMaskCoder(VariableCoder):
220
220
def encode (self , variable : Variable , name : T_Name = None ):
221
221
dims , data , attrs , encoding = unpack_for_encoding (variable )
222
222
223
+ # get dtype from encoding if available, otherwise use data.dtype
223
224
dtype = np .dtype (encoding .get ("dtype" , data .dtype ))
224
225
fv = encoding .get ("_FillValue" )
225
226
mv = encoding .get ("missing_value" )
226
227
227
- if (
228
- fv is not None
229
- and mv is not None
230
- and not duck_array_ops .allclose_or_equiv (fv , mv )
231
- ):
232
- raise ValueError (
233
- f"Variable { name !r} has conflicting _FillValue ({ fv } ) and missing_value ({ mv } ). Cannot encode data."
234
- )
235
-
236
- if fv is not None :
237
- # Ensure _FillValue is cast to same dtype as data's
238
- encoding ["_FillValue" ] = dtype .type (fv )
239
- fill_value = pop_to (encoding , attrs , "_FillValue" , name = name )
240
- if not pd .isnull (fill_value ):
241
- data = duck_array_ops .fillna (data , fill_value )
242
-
243
- if mv is not None :
244
- # Ensure missing_value is cast to same dtype as data's
245
- encoding ["missing_value" ] = dtype .type (mv )
246
- fill_value = pop_to (encoding , attrs , "missing_value" , name = name )
247
- if not pd .isnull (fill_value ) and fv is None :
248
- data = duck_array_ops .fillna (data , fill_value )
228
+ if fv is not None or mv is not None :
229
+ if (
230
+ fv is not None
231
+ and mv is not None
232
+ and not duck_array_ops .allclose_or_equiv (fv , mv )
233
+ ):
234
+ raise ValueError (
235
+ f"Variable { name !r} has conflicting _FillValue ({ fv } ) and missing_value ({ mv } ). Cannot encode data."
236
+ )
249
237
250
- return Variable (dims , data , attrs , encoding , fastpath = True )
238
+ if fv is not None :
239
+ # Ensure _FillValue is cast to same dtype as data's
240
+ encoding ["_FillValue" ] = dtype .type (fv )
241
+ fill_value = pop_to (encoding , attrs , "_FillValue" , name = name )
242
+ if not pd .isnull (fill_value ):
243
+ data = duck_array_ops .fillna (data , fill_value )
244
+
245
+ if mv is not None :
246
+ # Only use mv if _FillValue isn't available
247
+ # Ensure missing_value is cast to same dtype as data's
248
+ encoding ["missing_value" ] = attrs .get ("_FillValue" , dtype .type (mv ))
249
+ fill_value = pop_to (encoding , attrs , "missing_value" , name = name )
250
+ if not pd .isnull (fill_value ) and fv is None :
251
+ data = duck_array_ops .fillna (data , fill_value )
252
+ return Variable (dims , data , attrs , encoding , fastpath = True )
253
+ else :
254
+ return variable
251
255
252
256
def decode (self , variable : Variable , name : T_Name = None ):
253
257
dims , data , attrs , encoding = unpack_for_decoding (variable )
@@ -272,7 +276,13 @@ def decode(self, variable: Variable, name: T_Name = None):
272
276
stacklevel = 3 ,
273
277
)
274
278
275
- dtype , decoded_fill_value = dtypes .maybe_promote (data .dtype )
279
+ if "scale_factor" not in attrs and "add_offset" not in attrs :
280
+ dtype , decoded_fill_value = dtypes .maybe_promote (data .dtype )
281
+ else :
282
+ dtype , decoded_fill_value = (
283
+ _choose_float_dtype (data .dtype , attrs ),
284
+ np .nan ,
285
+ )
276
286
277
287
if encoded_fill_values :
278
288
transform = partial (
@@ -319,6 +329,10 @@ def _choose_float_dtype(
319
329
and offset_type == scale_type
320
330
and scale_type in [np .float32 , np .float64 ]
321
331
):
332
+ # in case of int32 -> we need upcast to float64
333
+ # due to precision issues
334
+ if dtype .itemsize == 4 and np .issubdtype (dtype , np .integer ):
335
+ return np .float64
322
336
return np .dtype (scale_type ).type
323
337
# Not CF conforming and add_offset given:
324
338
# A scale factor is entirely safe (vanishing into the mantissa),
@@ -354,7 +368,12 @@ def encode(self, variable: Variable, name: T_Name = None) -> Variable:
354
368
scale_factor = pop_to (encoding , attrs , "scale_factor" , name = name )
355
369
add_offset = pop_to (encoding , attrs , "add_offset" , name = name )
356
370
if scale_factor or add_offset :
357
- dtype = _choose_float_dtype (data .dtype , attrs )
371
+ # if we have a _FillValue/masked_value we do not want to cast now
372
+ # but leave that to CFMaskCoder
373
+ dtype = data .dtype
374
+ if "_FillValue" not in encoding and "missing_value" not in encoding :
375
+ dtype = _choose_float_dtype (data .dtype , attrs )
376
+ # but still we need a copy prevent changing original data
358
377
data = data .astype (dtype = dtype , copy = True )
359
378
if add_offset :
360
379
data -= add_offset
@@ -373,7 +392,13 @@ def decode(self, variable: Variable, name: T_Name = None) -> Variable:
373
392
scale_factor = np .asarray (scale_factor ).item ()
374
393
if np .ndim (add_offset ) > 0 :
375
394
add_offset = np .asarray (add_offset ).item ()
376
- dtype = _choose_float_dtype (data .dtype , encoding )
395
+ # if we have a _FillValue/masked_value we already have the wanted
396
+ # floating point dtype here (via CFMaskCoder), so no check is necessary
397
+ # only check in other cases
398
+ dtype = data .dtype
399
+ if "_FillValue" not in encoding and "missing_value" not in encoding :
400
+ dtype = _choose_float_dtype (dtype , encoding )
401
+
377
402
transform = partial (
378
403
_scale_offset_decoding ,
379
404
scale_factor = scale_factor ,
0 commit comments