Skip to content

Commit 2c29c7a

Browse files
authored
Merge pull request #5681 from PrimozGodec/pandas-compat-datetime
[FIX] pandas_compat: do not parse column of numbers (object dtype) to datetime
2 parents 3634e37 + f622c56 commit 2c29c7a

File tree

2 files changed

+29
-0
lines changed

2 files changed

+29
-0
lines changed

Orange/data/pandas_compat.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,16 @@ def _is_datetime(s):
157157
return True
158158
try:
159159
if is_object_dtype(s):
160+
# pd.to_datetime would sucessfuly parse column of numbers to datetime
161+
# but for column of object dtype with numbers we want to be either
162+
# discret or string - following code try to parse column to numeric
163+
# if connversion to numeric is sucessful return False
164+
try:
165+
pd.to_numeric(s)
166+
return False
167+
except (ValueError, TypeError):
168+
pass
169+
160170
# utc=True - to allow different timezones in a series object
161171
pd.to_datetime(s, infer_datetime_format=True, utc=True)
162172
return True

Orange/data/tests/test_pandas.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -383,6 +383,25 @@ def test_table_from_frame_timezones(self):
383383
],
384384
)
385385

386+
def test_table_from_frame_no_datetim(self):
387+
"""
388+
In case when dtype of column is object and column contains numbers only,
389+
column could be recognized as a TimeVarialbe since pd.to_datetime can parse
390+
numbers as datetime. That column must be result either in StringVariable
391+
or DiscreteVariable since it's dtype is object.
392+
"""
393+
from Orange.data.pandas_compat import table_from_frame
394+
395+
df = pd.DataFrame([[1], [2], [3]], dtype="object")
396+
table = table_from_frame(df)
397+
# check if exactly ContinuousVariable and not subtype TimeVariable
398+
self.assertIsInstance(table.domain.metas[0], StringVariable)
399+
400+
df = pd.DataFrame([[1], [2], [2]], dtype="object")
401+
table = table_from_frame(df)
402+
# check if exactly ContinuousVariable and not subtype TimeVariable
403+
self.assertIsInstance(table.domain.attributes[0], DiscreteVariable)
404+
386405
def test_time_variable_compatible(self):
387406
from Orange.data.pandas_compat import table_from_frame
388407

0 commit comments

Comments
 (0)