Skip to content

Commit 00facc9

Browse files
committed
encode infinity fill value; spec tweaks
1 parent 8c8dbab commit 00facc9

File tree

3 files changed

+84
-45
lines changed

3 files changed

+84
-45
lines changed

docs/spec/v2.rst

Lines changed: 25 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -103,14 +103,23 @@ library::
103103
Data type encoding
104104
~~~~~~~~~~~~~~~~~~
105105

106-
Simple data types are encoded within the array metadata resource as a string,
106+
Simple data types are encoded within the array metadata as a string,
107107
following the `NumPy array protocol type string (typestr) format
108108
<http://docs.scipy.org/doc/numpy/reference/arrays.interface.html>`_. The format
109-
consists of 3 parts: a character describing the byteorder of the data (``<``:
110-
little-endian, ``>``: big-endian, ``|``: not-relevant), a character code giving
111-
the basic type of the array, and an integer providing the number of bytes the
112-
type uses. The byte order MUST be specified. E.g., ``"<f8"``, ``">i4"``,
113-
``"|b1"`` and ``"|S12"`` are valid data types.
109+
consists of 3 parts:
110+
111+
* One character describing the byteorder of the data (``"<"``: little-endian;
112+
``">"``: big-endian; ``"|"``: not-relevant)
113+
* One character code giving the basic type of the array (``"b"``: Boolean (integer
114+
type where all values are only True or False); ``"i"``: integer; ``"u"``: unsigned
115+
integer; ``"f"``: floating point; ``"c"``: complex floating point; ``"m"``: timedelta;
116+
``"M"``: datetime; ``"S"``: string (fixed-length sequence of char); ``"U"``: unicode
117+
(fixed-length sequence of Py_UNICODE); ``"V"``: other (void * – each item is a
118+
fixed-size chunk of memory))
119+
* An integer specifying the number of bytes the type uses.
120+
121+
The byte order MUST be specified. E.g., ``"<f8"``, ``">i4"``, ``"|b1"`` and
122+
``"|S12"`` are valid data type encodings.
114123

115124
Structured data types (i.e., with multiple named fields) are encoded as a list
116125
of two-element lists, following `NumPy array protocol type descriptions (descr)
@@ -122,11 +131,17 @@ data type composed of three single-byte unsigned integers labelled "r", "g" and
122131
Fill value encoding
123132
~~~~~~~~~~~~~~~~~~~
124133

125-
Not a Number (NaN) MUST be encoded as the JSON string "NaN" if used as the
126-
value of the "fill_value" field.
134+
For simple floating point data types, the following table MUST be used to
135+
encode values of the "fill_value" field:
136+
137+
================= ===============
138+
Value JSON encoding
139+
================= ===============
140+
Not a Number ``"NaN"``
141+
Positive Infinity ``"Infinity"``
142+
Negative Infinity ``"-Infinity"``
143+
================= ===============
127144

128-
When decoding the "fill_value" field, the JSON string "NaN" MUST be decoded
129-
as Not a Number (NaN) if the dtype basic type is floating point ("f").
130145

131146
Chunks
132147
~~~~~~

zarr/meta.py

Lines changed: 23 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -102,20 +102,36 @@ def encode_group_metadata(meta=None):
102102
return b
103103

104104

105+
FLOAT_FILLS = {
106+
'NaN': np.nan,
107+
'Infinity': np.PINF,
108+
'-Infinity': np.NINF
109+
}
110+
111+
105112
def decode_fill_value(v, dtype):
106-
if v == 'NaN' and dtype.kind == 'f':
107-
return np.nan
113+
if dtype.kind == 'f':
114+
if v == 'NaN':
115+
return np.nan
116+
elif v == 'Infinity':
117+
return np.PINF
118+
elif v == '-Infinity':
119+
return np.NINF
120+
else:
121+
return v
108122
else:
109123
return v
110124

111125

112126
def encode_fill_value(v):
113127
try:
114-
isnan = np.isnan(v)
115-
except TypeError:
116-
return v
117-
else:
118-
if isnan:
128+
if np.isnan(v):
119129
return 'NaN'
130+
elif np.isposinf(v):
131+
return 'Infinity'
132+
elif np.isneginf(v):
133+
return '-Infinity'
120134
else:
121135
return v
136+
except TypeError:
137+
return v

zarr/tests/test_meta.py

Lines changed: 36 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -106,37 +106,45 @@ def test_encode_decode_array_2():
106106
eq(meta['fill_value'], meta_dec['fill_value'])
107107

108108

109-
def test_encode_decode_array_nan_fill_value():
109+
def test_encode_decode_array_fill_values():
110110

111-
meta = dict(
112-
shape=(100,),
113-
chunks=(10,),
114-
dtype=np.dtype('f8'),
115-
compression='zlib',
116-
compression_opts=1,
117-
fill_value=np.nan,
118-
order='C'
111+
fills = (
112+
(np.nan, "NaN", np.isnan),
113+
(np.NINF, "-Infinity", np.isneginf),
114+
(np.PINF, "Infinity", np.isposinf),
119115
)
120116

121-
meta_json = '''{
122-
"chunks": [10],
123-
"compression": "zlib",
124-
"compression_opts": 1,
125-
"dtype": "<f8",
126-
"fill_value": "NaN",
127-
"order": "C",
128-
"shape": [100],
129-
"zarr_format": %s
130-
}''' % ZARR_FORMAT
131-
132-
# test encoding
133-
meta_enc = encode_array_metadata(meta)
134-
assert_json_eq(meta_json, meta_enc)
135-
136-
# test decoding
137-
meta_dec = decode_array_metadata(meta_enc)
138-
actual = meta_dec['fill_value']
139-
assert np.isnan(actual)
117+
for v, s, f in fills:
118+
119+
meta = dict(
120+
shape=(100,),
121+
chunks=(10,),
122+
dtype=np.dtype('f8'),
123+
compression='zlib',
124+
compression_opts=1,
125+
fill_value=v,
126+
order='C'
127+
)
128+
129+
meta_json = '''{
130+
"chunks": [10],
131+
"compression": "zlib",
132+
"compression_opts": 1,
133+
"dtype": "<f8",
134+
"fill_value": "%s",
135+
"order": "C",
136+
"shape": [100],
137+
"zarr_format": %s
138+
}''' % (s, ZARR_FORMAT)
139+
140+
# test encoding
141+
meta_enc = encode_array_metadata(meta)
142+
assert_json_eq(meta_json, meta_enc)
143+
144+
# test decoding
145+
meta_dec = decode_array_metadata(meta_enc)
146+
actual = meta_dec['fill_value']
147+
assert f(actual)
140148

141149

142150
def test_decode_array_unsupported_format():

0 commit comments

Comments
 (0)