Skip to content
Closed
16 changes: 14 additions & 2 deletions pandas/_libs/tslib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -485,7 +485,16 @@ cpdef array_to_datetime(
# string
seen_string = True

if len(val) == 0 or val in nat_strings:
if len(val) == 0:
if is_coerce:
iresult[i] = NPY_NAT
continue
elif is_ignore:
raise TypeError("Empty string is not a valid datetime")
else:
raise ValueError("Empty string is not a valid datetime")

if val in nat_strings:
iresult[i] = NPY_NAT
continue

Expand Down Expand Up @@ -710,7 +719,10 @@ cdef array_to_datetime_object(
# GH 25978. No need to parse NaT-like or datetime-like vals
oresult[i] = val
elif isinstance(val, str):
if len(val) == 0 or val in nat_strings:
if len(val) == 0:
oresult[i] = val
continue
if val in nat_strings:
oresult[i] = 'NaT'
continue
try:
Expand Down
4 changes: 3 additions & 1 deletion pandas/_libs/tslibs/conversion.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -596,7 +596,9 @@ cdef _TSObject _convert_str_to_tsobject(object ts, tzinfo tz, str unit,
int out_local = 0, out_tzoffset = 0
bint do_parse_datetime_string = False

if len(ts) == 0 or ts in nat_strings:
if len(ts) == 0:
raise ValueError("Empty string is not a valid timestamp")
elif ts in nat_strings:
ts = NaT
elif ts == 'now':
# Issue 9000, we short-circuit rather than going
Expand Down
4 changes: 3 additions & 1 deletion pandas/_libs/tslibs/timedeltas.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -294,7 +294,9 @@ cdef inline int64_t parse_timedelta_string(str ts) except? -1:
# have_value : track if we have at least 1 leading unit
# have_hhmmss : tracks if we have a regular format hh:mm:ss

if len(ts) == 0 or ts in nat_strings:
if len(ts) == 0:
raise ValueError("Empty string is not a valid timedelta")
if ts in nat_strings:
return NPY_NAT

for c in ts:
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -2182,7 +2182,7 @@ def isna(self):
For datetimes, `NaT` (Not a Time) is considered as an NA value.

>>> idx = pd.DatetimeIndex([pd.Timestamp('1940-04-25'),
... pd.Timestamp(''), None, pd.NaT])
... pd.Timestamp('NaT'), None, pd.NaT])
>>> idx
DatetimeIndex(['1940-04-25', 'NaT', 'NaT', 'NaT'],
dtype='datetime64[ns]', freq=None)
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/groupby/test_apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -643,7 +643,7 @@ def predictions(tool):
{
"Key": ["B", "B", "A", "A"],
"State": ["step1", "step2", "step1", "step2"],
"oTime": ["", "2016-09-19 05:24:33", "", "2016-09-19 23:59:04"],
"oTime": ["NaT", "2016-09-19 05:24:33", "NaT", "2016-09-19 23:59:04"],
"Machine": ["23", "36L", "36R", "36R"],
}
)
Expand Down
4 changes: 1 addition & 3 deletions pandas/tests/io/parser/test_parse_dates.py
Original file line number Diff line number Diff line change
Expand Up @@ -1129,9 +1129,7 @@ def test_parse_dates_empty_string(all_parsers):
data = "Date,test\n2012-01-01,1\n,2"
result = parser.read_csv(StringIO(data), parse_dates=["Date"], na_filter=False)

expected = DataFrame(
[[datetime(2012, 1, 1), 1], [pd.NaT, 2]], columns=["Date", "test"]
)
expected = DataFrame([[datetime(2012, 1, 1), 1], ["", 2]], columns=["Date", "test"])
tm.assert_frame_equal(result, expected)


Expand Down
9 changes: 7 additions & 2 deletions pandas/tests/scalar/test_nat.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,8 +104,13 @@ def test_identity(klass, value):
@pytest.mark.parametrize("klass", [Timestamp, Timedelta, Period])
@pytest.mark.parametrize("value", ["", "nat", "NAT", None, np.nan])
def test_equality(klass, value):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would instead remove the empty and put this in a new test for that case (and assert the error)

these are duplicating the constructor tests so should have either / or (i think they are fine here)

if klass is Period and value == "":
pytest.skip("Period cannot parse empty string")
if value == "":
if klass is Period and value == "":
pytest.skip("Period cannot parse empty string")
elif klass is Timedelta:
pytest.skip("Timedelta cannot parse empty string")
elif klass is Timestamp:
pytest.skip("Timestamp cannot parse empty string")

assert klass(value).value == iNaT

Expand Down
6 changes: 6 additions & 0 deletions pandas/tests/scalar/timedelta/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,12 @@ def test_construction():
Timedelta("foo bar")


def test_construction_empty_string():
# Issue #36550, empty string
with pytest.raises(ValueError):
Timedelta("")


@pytest.mark.parametrize(
"item",
list(
Expand Down
4 changes: 4 additions & 0 deletions pandas/tests/scalar/timestamp/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -552,6 +552,10 @@ def test_constructor_fromisocalendar(self):
assert result == expected_stdlib
assert isinstance(result, Timestamp)

def test_constructior_empty_string(self):
with pytest.raises(ValueError):
Timestamp("")


def test_constructor_ambigous_dst():
# GH 24329
Expand Down
18 changes: 18 additions & 0 deletions pandas/tests/series/methods/test_isin.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,3 +80,21 @@ def test_isin_empty(self, empty):

result = s.isin(empty)
tm.assert_series_equal(expected, result)

@pytest.mark.parametrize(
"values, in_list, expected",
[
([""], ["", pd.Timedelta(0)], [True]),
(["", pd.Timedelta(0)], [""], [True, False]),
([""], ["", pd.to_datetime("2020-01-01")], [True]),
(["", pd.to_datetime("2020-01-01")], [""], [True, False]),
],
)
def test_empty_string_category(self, values, in_list, expected):
# Issue #36550
# Mixed empty string with datetimelike
s = pd.Series(values)
pd.testing.assert_series_equal(
s.isin(in_list),
pd.Series(expected),
)
18 changes: 14 additions & 4 deletions pandas/tests/tools/test_to_datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -1090,6 +1090,16 @@ def test_to_datetime_fixed_offset(self):
result = to_datetime(dates)
assert result.tz == fixed_off

def test_to_datetime_empty_string(self):
with pytest.raises(ValueError):
pd.to_datetime("", errors="raise")

result = pd.to_datetime("", errors="ignore")
assert result == ""

result = pd.to_datetime("", errors="coerce")
assert result is pd.NaT


class TestToDatetimeUnit:
@pytest.mark.parametrize("cache", [True, False])
Expand Down Expand Up @@ -1574,11 +1584,11 @@ def test_to_datetime_with_apply(self, cache):
def test_to_datetime_types(self, cache):

# empty string
result = to_datetime("", cache=cache)
assert result is NaT
with pytest.raises(ValueError):
result = to_datetime("", cache=cache)

result = to_datetime(["", ""], cache=cache)
assert isna(result).all()
with pytest.raises(ValueError):
result = to_datetime(["", ""], cache=cache)

# ints
result = Timestamp(0)
Expand Down
6 changes: 3 additions & 3 deletions pandas/tests/tools/test_to_timedelta.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,15 @@
import pytest

import pandas as pd
from pandas import Series, TimedeltaIndex, isna, to_timedelta
from pandas import Series, TimedeltaIndex, to_timedelta
import pandas._testing as tm


class TestTimedeltas:
def test_to_timedelta(self):

result = to_timedelta(["", ""])
assert isna(result).all()
with pytest.raises(ValueError):
to_timedelta(["", ""])

# pass thru
result = to_timedelta(np.array([np.timedelta64(1, "s")]))
Expand Down