Skip to content

Commit 45417bb

Browse files
committed
BUG: (GH4708) A zero length series written to HDF cannot be read back.
1 parent 4db583d commit 45417bb

File tree

3 files changed

+25
-13
lines changed

3 files changed

+25
-13
lines changed

doc/source/release.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -209,6 +209,7 @@ API Changes
209209

210210
- ``HDFStore``
211211

212+
- A zero length series written to HDF cannot be read back. (:issue:`4708`)
212213
- ``append_to_multiple`` automatically synchronizes writing rows to multiple
213214
tables and adds a ``dropna`` kwarg (:issue:`4698`)
214215
- handle a passed ``Series`` in table format (:issue:`4330`)

pandas/io/pytables.py

Lines changed: 19 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2213,6 +2213,11 @@ def read_multi_index(self, key):
22132213

22142214
def read_index_node(self, node):
22152215
data = node[:]
2216+
# If the index was an empty array write_array_empty() will
2217+
# have written a sentinel. Here we relace it with the original.
2218+
if 'shape' in node._v_attrs \
2219+
and self._is_empty_array(getattr(node._v_attrs, 'shape')):
2220+
data = np.empty(getattr(node._v_attrs, 'shape'), dtype=getattr(node._v_attrs, 'value_type'))
22162221
kind = _ensure_decoded(node._v_attrs.kind)
22172222
name = None
22182223

@@ -2251,12 +2256,16 @@ def write_array_empty(self, key, value):
22512256
getattr(self.group, key)._v_attrs.value_type = str(value.dtype)
22522257
getattr(self.group, key)._v_attrs.shape = value.shape
22532258

2259+
def _is_empty_array(self, shape):
2260+
"""Returns true if any axis is zero length."""
2261+
return any(x == 0 for x in shape)
2262+
22542263
def write_array(self, key, value, items=None):
22552264
if key in self.group:
22562265
self._handle.removeNode(self.group, key)
22572266

22582267
# Transform needed to interface with pytables row/col notation
2259-
empty_array = any(x == 0 for x in value.shape)
2268+
empty_array = self._is_empty_array(value.shape)
22602269
transposed = False
22612270

22622271
if not empty_array:
@@ -2305,17 +2314,18 @@ def write_array(self, key, value, items=None):
23052314
vlarr = self._handle.createVLArray(self.group, key,
23062315
_tables().ObjectAtom())
23072316
vlarr.append(value)
2308-
elif value.dtype.type == np.datetime64:
2309-
self._handle.createArray(self.group, key, value.view('i8'))
2310-
getattr(self.group, key)._v_attrs.value_type = 'datetime64'
2311-
elif value.dtype.type == np.timedelta64:
2312-
self._handle.createArray(self.group, key, value.view('i8'))
2313-
getattr(self.group, key)._v_attrs.value_type = 'timedelta64'
23142317
else:
23152318
if empty_array:
23162319
self.write_array_empty(key, value)
23172320
else:
2318-
self._handle.createArray(self.group, key, value)
2321+
if value.dtype.type == np.datetime64:
2322+
self._handle.createArray(self.group, key, value.view('i8'))
2323+
getattr(self.group, key)._v_attrs.value_type = 'datetime64'
2324+
elif value.dtype.type == np.timedelta64:
2325+
self._handle.createArray(self.group, key, value.view('i8'))
2326+
getattr(self.group, key)._v_attrs.value_type = 'timedelta64'
2327+
else:
2328+
self._handle.createArray(self.group, key, value)
23192329

23202330
getattr(self.group, key)._v_attrs.transposed = transposed
23212331

@@ -2362,11 +2372,7 @@ def shape(self):
23622372
def read(self, **kwargs):
23632373
self.validate_read(kwargs)
23642374
index = self.read_index('index')
2365-
if len(index) > 0:
2366-
values = self.read_array('values')
2367-
else:
2368-
values = []
2369-
2375+
values = self.read_array('values')
23702376
return Series(values, index=index, name=self.name)
23712377

23722378
def write(self, obj, **kwargs):

pandas/io/tests/test_pytables.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2358,6 +2358,11 @@ def test_empty_series_frame(self):
23582358
self._check_roundtrip(df1, tm.assert_frame_equal)
23592359
self._check_roundtrip(df2, tm.assert_frame_equal)
23602360

2361+
def test_empty_series(self):
2362+
for dtype in [np.int64, np.float64, np.object, 'm8[ns]', 'M8[ns]']:
2363+
s = Series(dtype=dtype)
2364+
self._check_roundtrip(s, tm.assert_series_equal)
2365+
23612366
def test_can_serialize_dates(self):
23622367

23632368
rng = [x.date() for x in bdate_range('1/1/2000', '1/30/2000')]

0 commit comments

Comments
 (0)