Skip to content

Commit 58ed629

Browse files
committed
BUG: apply to a Series with a timedelta (GH5458)
ENH: improved timedelta inference for non-ns dtypes
1 parent 724c688 commit 58ed629

File tree

6 files changed

+80
-59
lines changed

6 files changed

+80
-59
lines changed

pandas/core/common.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1546,7 +1546,7 @@ def _possibly_convert_objects(values, convert_dates=True,
15461546
if convert_timedeltas == 'coerce':
15471547
from pandas.tseries.timedeltas import \
15481548
_possibly_cast_to_timedelta
1549-
values = _possibly_cast_to_timedelta(values)
1549+
values = _possibly_cast_to_timedelta(values, coerce=True)
15501550

15511551
# if we are all nans then leave me alone
15521552
if not isnull(new_values).all():
@@ -1641,7 +1641,7 @@ def _possibly_cast_to_datetime(value, dtype, coerce=False):
16411641
elif is_timedelta64:
16421642
from pandas.tseries.timedeltas import \
16431643
_possibly_cast_to_timedelta
1644-
value = _possibly_cast_to_timedelta(value)
1644+
value = _possibly_cast_to_timedelta(value, coerce=True)
16451645
except:
16461646
pass
16471647

pandas/src/inference.pyx

Lines changed: 23 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
cimport util
22
from tslib import NaT
33

4+
iNaT = util.get_nat()
5+
46
_TYPE_MAP = {
57
np.int8: 'integer',
68
np.int16: 'integer',
@@ -61,14 +63,21 @@ def infer_dtype(object _values):
6163
return 'datetime64'
6264
elif is_timedelta_or_timedelta64_array(values):
6365
return 'timedelta'
66+
6467
elif util.is_integer_object(val):
68+
# a timedelta will show true here as well
69+
if is_timedelta(val):
70+
if is_timedelta_or_timedelta64_array(values):
71+
return 'timedelta'
72+
6573
if is_integer_array(values):
6674
return 'integer'
6775
elif is_integer_float_array(values):
6876
return 'mixed-integer-float'
6977
elif is_timedelta_or_timedelta64_array(values):
7078
return 'timedelta'
7179
return 'mixed-integer'
80+
7281
elif is_datetime(val):
7382
if is_datetime_array(values):
7483
return 'datetime'
@@ -120,6 +129,9 @@ def infer_dtype_list(list values):
120129
pass
121130

122131

132+
cdef inline bint is_null_datetimelike(v):
133+
return util._checknull(v) or (util.is_integer_object(v) and v == iNaT) or v is NaT
134+
123135
cdef inline bint is_datetime(object o):
124136
return PyDateTime_Check(o)
125137

@@ -268,7 +280,7 @@ def is_datetime_array(ndarray[object] values):
268280
return False
269281
for i in range(n):
270282
v = values[i]
271-
if not (is_datetime(v) or util._checknull(v) or v is NaT):
283+
if not (is_datetime(v) or is_null_datetimelike(v)):
272284
return False
273285
return True
274286

@@ -280,7 +292,7 @@ def is_datetime64_array(ndarray values):
280292
return False
281293
for i in range(n):
282294
v = values[i]
283-
if not (util.is_datetime64_object(v) or util._checknull(v) or v is NaT):
295+
if not (util.is_datetime64_object(v) or is_null_datetimelike(v)):
284296
return False
285297
return True
286298

@@ -291,19 +303,23 @@ def is_timedelta(object o):
291303
def is_timedelta_array(ndarray values):
292304
import datetime
293305
cdef int i, n = len(values)
306+
cdef object v
294307
if n == 0:
295308
return False
296309
for i in range(n):
297-
if not isinstance(values[i],datetime.timedelta):
310+
v = values[i]
311+
if not (isinstance(v,datetime.timedelta) or is_null_datetimelike(v)):
298312
return False
299313
return True
300314

301315
def is_timedelta64_array(ndarray values):
302316
cdef int i, n = len(values)
317+
cdef object v
303318
if n == 0:
304319
return False
305320
for i in range(n):
306-
if not isinstance(values[i],np.timedelta64):
321+
v = values[i]
322+
if not (isinstance(v,np.timedelta64) or is_null_datetimelike(v)):
307323
return False
308324
return True
309325

@@ -316,7 +332,8 @@ def is_timedelta_or_timedelta64_array(ndarray values):
316332
return False
317333
for i in range(n):
318334
v = values[i]
319-
if not (isinstance(v,datetime.timedelta) or isinstance(v,np.timedelta64) or util._checknull(v) or v is NaT):
335+
if not (isinstance(v,datetime.timedelta) or isinstance(v,np.timedelta64) or
336+
is_null_datetimelike(v)):
320337
return False
321338
return True
322339

@@ -499,7 +516,7 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0,
499516
break
500517
elif is_timedelta(val):
501518
if convert_timedelta:
502-
itimedeltas[i] = convert_to_timedelta64(val, 'ns')
519+
itimedeltas[i] = convert_to_timedelta64(val, 'ns', False)
503520
seen_timedelta = 1
504521
else:
505522
seen_object = 1

pandas/tests/test_series.py

Lines changed: 25 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2246,45 +2246,59 @@ def test_operators_empty_int_corner(self):
22462246

22472247
def test_constructor_dtype_timedelta64(self):
22482248

2249+
# basic
22492250
td = Series([timedelta(days=i) for i in range(3)])
22502251
self.assert_(td.dtype == 'timedelta64[ns]')
22512252

2252-
# mixed with NaT
2253-
from pandas import tslib
2254-
td = Series([timedelta(days=i)
2255-
for i in range(3)] + [tslib.NaT ], dtype='m8[ns]' )
2253+
td = Series([timedelta(days=1)])
2254+
self.assert_(td.dtype == 'timedelta64[ns]')
2255+
2256+
td = Series([timedelta(days=1),timedelta(days=2),np.timedelta64(1,'s')])
22562257
self.assert_(td.dtype == 'timedelta64[ns]')
22572258

2258-
td = Series([timedelta(days=i)
2259-
for i in range(3)] + [tslib.iNaT ], dtype='m8[ns]' )
2259+
# mixed with NaT
2260+
from pandas import tslib
2261+
td = Series([timedelta(days=1),tslib.NaT ], dtype='m8[ns]' )
22602262
self.assert_(td.dtype == 'timedelta64[ns]')
22612263

2262-
td = Series([timedelta(days=i)
2263-
for i in range(3)] + [np.nan ], dtype='m8[ns]' )
2264+
td = Series([timedelta(days=1),np.nan ], dtype='m8[ns]' )
22642265
self.assert_(td.dtype == 'timedelta64[ns]')
22652266

22662267
td = Series([np.timedelta64(300000000), pd.NaT],dtype='m8[ns]')
22672268
self.assert_(td.dtype == 'timedelta64[ns]')
22682269

22692270
# improved inference
2271+
# GH5689
22702272
td = Series([np.timedelta64(300000000), pd.NaT])
22712273
self.assert_(td.dtype == 'timedelta64[ns]')
22722274

2275+
td = Series([np.timedelta64(300000000), tslib.iNaT])
2276+
self.assert_(td.dtype == 'timedelta64[ns]')
2277+
2278+
td = Series([np.timedelta64(300000000), np.nan])
2279+
self.assert_(td.dtype == 'timedelta64[ns]')
2280+
22732281
td = Series([pd.NaT, np.timedelta64(300000000)])
22742282
self.assert_(td.dtype == 'timedelta64[ns]')
22752283

2284+
td = Series([np.timedelta64(1,'s')])
2285+
self.assert_(td.dtype == 'timedelta64[ns]')
2286+
22762287
# these are frequency conversion astypes
22772288
#for t in ['s', 'D', 'us', 'ms']:
22782289
# self.assertRaises(TypeError, td.astype, 'm8[%s]' % t)
22792290

22802291
# valid astype
22812292
td.astype('int64')
22822293

2283-
# this is an invalid casting
2284-
self.assertRaises(Exception, Series, [timedelta(days=i)
2285-
for i in range(3)] + ['foo' ], dtype='m8[ns]' )
2294+
# invalid casting
22862295
self.assertRaises(TypeError, td.astype, 'int32')
22872296

2297+
# this is an invalid casting
2298+
def f():
2299+
Series([timedelta(days=1), 'foo'],dtype='m8[ns]')
2300+
self.assertRaises(Exception, f)
2301+
22882302
# leave as object here
22892303
td = Series([timedelta(days=i) for i in range(3)] + ['foo'])
22902304
self.assert_(td.dtype == 'object')

pandas/tseries/tests/test_timedeltas.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,16 @@ def conv(v):
173173
expected = np.timedelta64(timedelta(seconds=1))
174174
self.assert_(result == expected)
175175

176+
def test_to_timedelta_via_apply(self):
177+
178+
# GH 5458
179+
expected = Series([np.timedelta64(1,'s')])
180+
result = Series(['00:00:01']).apply(to_timedelta)
181+
tm.assert_series_equal(result, expected)
182+
183+
result = Series([to_timedelta('00:00:01')])
184+
tm.assert_series_equal(result, expected)
185+
176186
def test_timedelta_ops(self):
177187
_skip_if_numpy_not_friendly()
178188

pandas/tslib.pxd

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
from numpy cimport ndarray, int64_t
22

33
cdef convert_to_tsobject(object, object, object)
4-
cdef convert_to_timedelta64(object, object)
4+
cdef convert_to_timedelta64(object, object, object)

pandas/tslib.pyx

Lines changed: 19 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1155,52 +1155,26 @@ def array_to_datetime(ndarray[object] values, raise_=False, dayfirst=False,
11551155

11561156
return oresult
11571157

1158-
def array_to_timedelta64(ndarray[object] values, coerce=True):
1158+
def array_to_timedelta64(ndarray[object] values, coerce=False):
11591159
""" convert an ndarray to an array of ints that are timedeltas
11601160
force conversion if coerce = True,
1161-
else return an object array """
1161+
else will raise if cannot convert """
11621162
cdef:
11631163
Py_ssize_t i, n
1164-
object val
1165-
ndarray[int64_t] result
1164+
ndarray[int64_t] iresult
11661165

11671166
n = values.shape[0]
1168-
result = np.empty(n, dtype='i8')
1169-
for i in range(n):
1170-
val = values[i]
1171-
1172-
# in py3 this is already an int, don't convert
1173-
if is_integer_object(val):
1174-
result[i] = val
1175-
1176-
elif isinstance(val,timedelta) or isinstance(val,np.timedelta64):
1177-
1178-
if isinstance(val, np.timedelta64):
1179-
if val.dtype != 'm8[ns]':
1180-
val = val.astype('m8[ns]')
1181-
val = val.item()
1182-
else:
1183-
val = _delta_to_nanoseconds(np.timedelta64(val).item())
1184-
1185-
result[i] = val
1186-
1187-
elif _checknull_with_nat(val):
1188-
result[i] = iNaT
1189-
1190-
else:
1191-
1192-
# just return, don't convert
1193-
if not coerce:
1194-
return values.copy()
1195-
1196-
result[i] = iNaT
1167+
result = np.empty(n, dtype='m8[ns]')
1168+
iresult = result.view('i8')
11971169

1198-
return result
1170+
for i in range(n):
1171+
result[i] = convert_to_timedelta64(values[i], 'ns', coerce)
1172+
return iresult
11991173

1200-
def convert_to_timedelta(object ts, object unit='ns'):
1201-
return convert_to_timedelta64(ts, unit)
1174+
def convert_to_timedelta(object ts, object unit='ns', coerce=False):
1175+
return convert_to_timedelta64(ts, unit, coerce)
12021176

1203-
cdef convert_to_timedelta64(object ts, object unit):
1177+
cdef convert_to_timedelta64(object ts, object unit, object coerce):
12041178
"""
12051179
Convert an incoming object to a timedelta64 if possible
12061180
@@ -1210,6 +1184,8 @@ cdef convert_to_timedelta64(object ts, object unit):
12101184
- np.int64 (with unit providing a possible modifier)
12111185
- None/NaT
12121186
1187+
if coerce, set a non-valid value to NaT
1188+
12131189
Return a ns based int64
12141190
12151191
# kludgy here until we have a timedelta scalar
@@ -1237,7 +1213,9 @@ cdef convert_to_timedelta64(object ts, object unit):
12371213

12381214
if _np_version_under1p7:
12391215
if not isinstance(ts, timedelta):
1240-
raise AssertionError("Invalid type for timedelta scalar: %s" % type(ts))
1216+
if coerce:
1217+
return np.timedelta64(iNaT)
1218+
raise ValueError("Invalid type for timedelta scalar: %s" % type(ts))
12411219
if not PY2:
12421220
# convert to microseconds in timedelta64
12431221
ts = np.timedelta64(int(ts.total_seconds()*1e9 + ts.microseconds*1000))
@@ -1247,7 +1225,9 @@ cdef convert_to_timedelta64(object ts, object unit):
12471225
if isinstance(ts, timedelta):
12481226
ts = np.timedelta64(ts)
12491227
elif not isinstance(ts, np.timedelta64):
1250-
raise AssertionError("Invalid type for timedelta scalar: %s" % type(ts))
1228+
if coerce:
1229+
return np.timedelta64(iNaT)
1230+
raise ValueError("Invalid type for timedelta scalar: %s" % type(ts))
12511231
return ts.astype('timedelta64[ns]')
12521232

12531233
def repr_timedelta64(object value, format=None):

0 commit comments

Comments
 (0)