Skip to content

Commit b77c021

Browse files
authored
Merge pull request #1424 from lanzagar/timevar
[ENH] SqlTable: Automatically recognize date/time fields
2 parents ff08bcd + baee17a commit b77c021

File tree

4 files changed

+83
-8
lines changed

4 files changed

+83
-8
lines changed

Orange/data/sql/table.py

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
psycopg2.pool = Orange.misc.import_late_warning("psycopg2.pool")
1717

1818
from .. import domain, variable, table, instance, filter,\
19-
DiscreteVariable, ContinuousVariable, StringVariable
19+
DiscreteVariable, ContinuousVariable, StringVariable, TimeVariable
2020
from Orange.data.sql import filter as sql_filter
2121

2222

@@ -104,8 +104,12 @@ def get_domain(self, type_hints=None, guess_values=False):
104104

105105
def add_to_sql(var, field_name):
106106
if var.is_continuous:
107-
var.to_sql = ToSql("({})::double precision".format(
108-
self.quote_identifier(field_name)))
107+
if isinstance(var, TimeVariable):
108+
var.to_sql = ToSql("extract(epoch from {})".format(
109+
self.quote_identifier(field_name)))
110+
else:
111+
var.to_sql = ToSql("({})::double precision".format(
112+
self.quote_identifier(field_name)))
109113
elif var.is_discrete:
110114
var.to_sql = ToSql("({})::text".format(
111115
self.quote_identifier(field_name)))
@@ -137,10 +141,19 @@ def get_variable(self, field_name, type_code, inspect_values=False):
137141
INT_TYPES = (20, 21, 23) # bigint, int, smallint
138142
CHAR_TYPES = (25, 1042, 1043,) # text, char, varchar
139143
BOOLEAN_TYPES = (16,) # bool
144+
DATE_TYPES = (1082, 1114, 1184, ) # date, timestamp, timestamptz
145+
# time, timestamp, timestamptz, timetz
146+
TIME_TYPES = (1083, 1114, 1184, 1266,)
140147

141148
if type_code in FLOATISH_TYPES:
142149
return ContinuousVariable(field_name)
143150

151+
if type_code in TIME_TYPES + DATE_TYPES:
152+
tv = TimeVariable(field_name)
153+
tv.have_date |= type_code in DATE_TYPES
154+
tv.have_time |= type_code in TIME_TYPES
155+
return tv
156+
144157
if type_code in INT_TYPES: # bigint, int, smallint
145158
if inspect_values:
146159
values = self.get_distinct_values(field_name)

Orange/data/variable.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -986,3 +986,12 @@ def parse(self, datestr):
986986
try: return dt.timestamp()
987987
except OverflowError:
988988
return -(self.UNIX_EPOCH - dt).total_seconds()
989+
990+
def to_val(self, s):
991+
"""
992+
Convert a value, given as an instance of an arbitrary type, to a float.
993+
"""
994+
if isinstance(s, str):
995+
return self.parse(s)
996+
else:
997+
return super().to_val(s)

Orange/tests/sql/test_sql_table.py

Lines changed: 55 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,13 @@
22
# pylint: disable=missing-docstring
33

44
import unittest
5+
import unittest.mock
56

67
import numpy as np
78
from numpy.testing import assert_almost_equal
89

910
from Orange.data import filter, ContinuousVariable, DiscreteVariable, \
10-
StringVariable, Table, Domain
11+
StringVariable, TimeVariable, Table, Domain
1112
from Orange.data.sql.table import SqlTable
1213
from Orange.preprocess.discretize import EqualWidth
1314
from Orange.statistics.basic_stats import BasicStats, DomainBasicStats
@@ -367,16 +368,66 @@ def test_meta_varchar(self):
367368
sql_table = SqlTable(conn, table_name, inspect_values=True)
368369
self.assertFirstMetaIsInstance(sql_table, StringVariable)
369370

370-
def test_date(self):
371+
def test_time_date(self):
371372
table = np.array(['2014-04-12', '2014-04-13', '2014-04-14',
372373
'2014-04-15', '2014-04-16']).reshape(-1, 1)
373374
conn, table_name = self.create_sql_table(table, ['date'])
374375

375376
sql_table = SqlTable(conn, table_name, inspect_values=False)
376-
self.assertFirstMetaIsInstance(sql_table, StringVariable)
377+
self.assertFirstAttrIsInstance(sql_table, TimeVariable)
377378

378379
sql_table = SqlTable(conn, table_name, inspect_values=True)
379-
self.assertFirstMetaIsInstance(sql_table, StringVariable)
380+
self.assertFirstAttrIsInstance(sql_table, TimeVariable)
381+
382+
def test_time_time(self):
383+
table = np.array(['17:39:51', '11:51:48.46', '05:20:21.492149',
384+
'21:47:06', '04:47:35.8']).reshape(-1, 1)
385+
conn, table_name = self.create_sql_table(table, ['time'])
386+
387+
sql_table = SqlTable(conn, table_name, inspect_values=False)
388+
self.assertFirstAttrIsInstance(sql_table, TimeVariable)
389+
390+
sql_table = SqlTable(conn, table_name, inspect_values=True)
391+
self.assertFirstAttrIsInstance(sql_table, TimeVariable)
392+
393+
def test_time_timetz(self):
394+
table = np.array(['17:39:51+0200', '11:51:48.46+01', '05:20:21.4921',
395+
'21:47:06-0600', '04:47:35.8+0330']).reshape(-1, 1)
396+
conn, table_name = self.create_sql_table(table, ['timetz'])
397+
398+
sql_table = SqlTable(conn, table_name, inspect_values=False)
399+
self.assertFirstAttrIsInstance(sql_table, TimeVariable)
400+
401+
sql_table = SqlTable(conn, table_name, inspect_values=True)
402+
self.assertFirstAttrIsInstance(sql_table, TimeVariable)
403+
404+
def test_time_timestamp(self):
405+
table = np.array(['2014-07-15 17:39:51.348149',
406+
'2008-10-05 11:51:48.468149',
407+
'2008-11-03 05:20:21.492149',
408+
'2015-01-02 21:47:06.228149',
409+
'2016-04-16 04:47:35.892149']).reshape(-1, 1)
410+
conn, table_name = self.create_sql_table(table, ['timestamp'])
411+
412+
sql_table = SqlTable(conn, table_name, inspect_values=False)
413+
self.assertFirstAttrIsInstance(sql_table, TimeVariable)
414+
415+
sql_table = SqlTable(conn, table_name, inspect_values=True)
416+
self.assertFirstAttrIsInstance(sql_table, TimeVariable)
417+
418+
def test_time_timestamptz(self):
419+
table = np.array(['2014-07-15 17:39:51.348149+0200',
420+
'2008-10-05 11:51:48.468149+02',
421+
'2008-11-03 05:20:21.492149+01',
422+
'2015-01-02 21:47:06.228149+0100',
423+
'2016-04-16 04:47:35.892149+0330']).reshape(-1, 1)
424+
conn, table_name = self.create_sql_table(table, ['timestamptz'])
425+
426+
sql_table = SqlTable(conn, table_name, inspect_values=False)
427+
self.assertFirstAttrIsInstance(sql_table, TimeVariable)
428+
429+
sql_table = SqlTable(conn, table_name, inspect_values=True)
430+
self.assertFirstAttrIsInstance(sql_table, TimeVariable)
380431

381432
def test_double_precision(self):
382433
table = np.arange(25).reshape((-1, 1))

Orange/tests/test_variable.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -312,12 +312,14 @@ class TestTimeVariable(VariableTest):
312312
('1969-12-31 23:59:59', -1, '1969-12-31 23:59:59'),
313313
('1900-01-01', -2208988800, '1900-01-01'),
314314
('nan', np.nan, '?'),
315+
('1444651991.81', 1444651991.81, '2015-10-12 12:13:11.810000'),
316+
(1444651991.81, 1444651991.81, '2015-10-12 12:13:11.810000'),
315317
]
316318

317319
def test_parse_repr(self):
318320
for datestr, timestamp, outstr in self.TESTS:
319321
var = TimeVariable('time')
320-
ts = var.parse(datestr)
322+
ts = var.to_val(datestr) # calls parse for strings
321323
if not np.isnan(ts):
322324
self.assertEqual(ts, timestamp, msg=datestr)
323325
self.assertEqual(var.repr_val(ts), outstr, msg=datestr)

0 commit comments

Comments
 (0)