Skip to content

Commit cc9ecf5

Browse files
author
Kevin D Smith
committed
Add support for date / time / datetime methods on double columns (#96)
1 parent f6fb010 commit cc9ecf5

File tree

5 files changed

+334
-17
lines changed

5 files changed

+334
-17
lines changed

swat/cas/table.py

Lines changed: 25 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@
4343
from ..utils import dict2kwargs, getattr_safe_property, xdict
4444
from ..utils.compat import (int_types, binary_types, text_types, items_types,
4545
patch_pandas_sort, char_types, num_types)
46+
from ..utils.datetime import is_date_format, is_datetime_format, is_time_format
4647
from ..utils.keyword import dekeywordify
4748

4849
# pylint: disable=W0212, W0221, W0613, R0904, C0330
@@ -8854,22 +8855,37 @@ class DatetimeColumnMethods(object):
88548855

88558856
def __init__(self, column):
88568857
self._column = column
8857-
self._dtype = column.dtype
8858-
if self._dtype not in ['date', 'datetime', 'time']:
8858+
8859+
columninfo = column._columninfo
8860+
8861+
self._dtype = columninfo['Type'][0]
8862+
if self._dtype not in ['date', 'datetime', 'time', 'double']:
88598863
raise TypeError('datetime methods are only usable on CAS dates, '
8860-
'times, and datetimes')
8864+
'times, datetimes, and doubles')
8865+
8866+
fmt = columninfo['Format'][0]
8867+
if self._dtype == 'double':
8868+
if is_date_format(fmt):
8869+
self._dtype = 'sas-date'
8870+
elif is_datetime_format(fmt):
8871+
self._dtype = 'sas-datetime'
8872+
elif is_time_format(fmt):
8873+
self._dtype = 'sas-time'
8874+
else:
8875+
raise TypeError('double columns must have a date, time, or '
8876+
'datetime format')
88618877

88628878
def _compute(self, *args, **kwargs):
88638879
''' Call the _compute method on the table column '''
88648880
return self._column._compute(*args, **kwargs)
88658881

88668882
def _get_part(self, func):
88678883
''' Get the specified part of the datetime '''
8868-
if self._dtype == 'date':
8884+
if self._dtype in ['date', 'sas-date']:
88698885
if func in ['hour', 'minute']:
88708886
return self._compute(func, '0')
88718887
return self._compute(func, '%s({value})' % func)
8872-
if self._dtype == 'time':
8888+
if self._dtype in ['time', 'sas-time']:
88738889
if func in ['hour', 'minute']:
88748890
return self._compute(func, '%s({value})' % func)
88758891
return self._compute(func, '%s(today())' % func)
@@ -8905,14 +8921,14 @@ def minute(self):
89058921
@property
89068922
def second(self):
89078923
''' The second of the datetime '''
8908-
if self._dtype == 'date':
8924+
if self._dtype in ['date', 'sas-date']:
89098925
return self._compute('second', '0')
89108926
return self._compute('second', 'int(second({value}))')
89118927

89128928
@property
89138929
def microsecond(self):
89148930
''' The microsecond of the datetime '''
8915-
if self._dtype == 'date':
8931+
if self._dtype in ['date', 'sas-date']:
89168932
return self._compute('microsecond', '0')
89178933
return self._compute('microsecond', 'int(mod(second({value}), 1) * 1000000)')
89188934

@@ -8923,9 +8939,9 @@ def nanosecond(self):
89238939

89248940
def _get_date(self):
89258941
''' Return an expression that will return the date only '''
8926-
if self._dtype == 'date':
8942+
if self._dtype in ['date', 'sas-date']:
89278943
return '{value}'
8928-
if self._dtype == 'time':
8944+
if self._dtype in ['time', 'sas-time']:
89298945
return 'today()'
89308946
return 'datepart({value})'
89318947

swat/cas/transformers.py

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -235,12 +235,17 @@ def ctb2tabular(_sw_table, soptions='', connection=None):
235235
dt_formats = get_option('cas.dataset.datetime_formats')
236236
if isinstance(dt_formats, six.string_types):
237237
dt_formats = [dt_formats]
238-
datetime_regex = re.compile(r'^(%s)\d*\.\d*$' % '|'.join(dt_formats), flags=re.I)
238+
datetime_regex = re.compile(r'^(%s)(\d*\.\d*)?$' % '|'.join(dt_formats), flags=re.I)
239239

240240
d_formats = get_option('cas.dataset.date_formats')
241-
if isinstance(dt_formats, six.string_types):
241+
if isinstance(d_formats, six.string_types):
242242
d_formats = [d_formats]
243-
date_regex = re.compile(r'^(%s)\d*\.\d*$' % '|'.join(d_formats), flags=re.I)
243+
date_regex = re.compile(r'^(%s)(\d*\.\d*)?$' % '|'.join(d_formats), flags=re.I)
244+
245+
t_formats = get_option('cas.dataset.time_formats')
246+
if isinstance(t_formats, six.string_types):
247+
t_formats = [t_formats]
248+
time_regex = re.compile(r'^(%s)(\d*\.\d*)?$' % '|'.join(t_formats), flags=re.I)
244249

245250
# Construct columns
246251
ncolumns = check(_sw_table.getNColumns(), _sw_table)
@@ -278,7 +283,9 @@ def ctb2tabular(_sw_table, soptions='', connection=None):
278283
dtypes.append((col.name, 'f8'))
279284
colinfo[col.name] = col
280285
if col.format:
281-
if datetime_regex.match(col.format):
286+
# Times are converted to datetime because datetimes are native
287+
# DataFrame types whereas times are simply Python time objects.
288+
if datetime_regex.match(col.format) or time_regex.match(col.format):
282289
datetimes.append(col.name)
283290
elif date_regex.match(col.format):
284291
dates.append(col.name)

swat/config.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -242,11 +242,17 @@ def check_string_list(val):
242242
'B8601TM', 'B8601TX', 'B8601TZ', 'DATEAMPM', 'DATETIME', 'DTDATE',
243243
'DTMONYY', 'DTWEEKV', 'DTWKDATX', 'DTYEAR', 'DTYYQC', 'E8601DN',
244244
'E8601DT', 'E8601DX', 'E8601DZ', 'E8601LX', 'E8601LZ', 'E8601TM',
245-
'E8601TX', 'E8601TZ', 'EURDFDT', 'HHMM', 'HOUR', 'MDYAMPM', 'MMSS',
246-
'NLDATM[A-Z]*', 'NLTIMAP', 'NLTIME', 'TIMEAMPM', 'TIME', 'TOD'],
245+
'E8601TX', 'E8601TZ', 'EURDFDT', 'MDYAMPM', 'NLDATM[A-Z]*'],
247246
'Format names used to indicate the column should be converted\n'
248247
'to a Python datetime object.')
249248

249+
register_option('cas.dataset.time_formats', 'string or list of strings',
250+
check_string_list,
251+
['HHMM', 'HOUR', 'MMSS', 'NLTIMAP', 'NLTIME', 'TIMEAMPM',
252+
'TIME', 'TOD'],
253+
'Format names used to indicate the column should be converted\n'
254+
'to a Python time object.')
255+
250256
register_option('cas.dataset.index_name', 'string or list of strings',
251257
check_string_list, '_Index_',
252258
'The name or names of the columns to be automatically converted\n'

swat/tests/cas/test_table.py

Lines changed: 193 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3760,8 +3760,199 @@ def test_dt_methods(self):
37603760

37613761
tbl = self.s.addtable(table='datetime', caslib=self.srcLib,
37623762
**pd_dmh.args.addtable).casTable
3763-
tbl['dbldatetime'] = tbl['datetime'] * 1.0
3764-
# tbl['sasdatetime'] = tbl['datetime'] / 1000000.
3763+
3764+
with self.assertRaises(TypeError):
3765+
self.table['Model'].dt.year
3766+
3767+
# year
3768+
self.assertColsEqual(df.date.dt.year, tbl.date.dt.year, sort=True)
3769+
self.assertColsEqual(df.time.dt.year, tbl.time.dt.year, sort=True)
3770+
self.assertColsEqual(df.datetime.dt.year, tbl.datetime.dt.year, sort=True)
3771+
3772+
# month
3773+
self.assertColsEqual(df.date.dt.month, tbl.date.dt.month, sort=True)
3774+
self.assertColsEqual(df.time.dt.month, tbl.time.dt.month, sort=True)
3775+
self.assertColsEqual(df.datetime.dt.month, tbl.datetime.dt.month, sort=True)
3776+
3777+
# day
3778+
self.assertColsEqual(df.date.dt.day, tbl.date.dt.day, sort=True)
3779+
self.assertColsEqual(df.time.dt.day, tbl.time.dt.day, sort=True)
3780+
self.assertColsEqual(df.datetime.dt.day, tbl.datetime.dt.day, sort=True)
3781+
3782+
# hour
3783+
self.assertColsEqual(df.date.dt.hour, tbl.date.dt.hour, sort=True)
3784+
self.assertColsEqual(df.time.dt.hour, tbl.time.dt.hour, sort=True)
3785+
self.assertColsEqual(df.datetime.dt.hour, tbl.datetime.dt.hour, sort=True)
3786+
3787+
# minute
3788+
self.assertColsEqual(df.date.dt.minute, tbl.date.dt.minute, sort=True)
3789+
self.assertColsEqual(df.time.dt.minute, tbl.time.dt.minute, sort=True)
3790+
self.assertColsEqual(df.datetime.dt.minute, tbl.datetime.dt.minute, sort=True)
3791+
3792+
# second
3793+
self.assertColsEqual(df.date.dt.second, tbl.date.dt.second, sort=True)
3794+
self.assertColsEqual(df.time.dt.second, tbl.time.dt.second, sort=True)
3795+
self.assertColsEqual(df.datetime.dt.second, tbl.datetime.dt.second, sort=True)
3796+
3797+
# microsecond
3798+
# TODO: Needs to be implemented yet
3799+
self.assertColsEqual(df.date.dt.microsecond,
3800+
tbl.date.dt.microsecond, sort=True)
3801+
self.assertColsEqual(df.time.dt.microsecond,
3802+
tbl.time.dt.microsecond, sort=True)
3803+
self.assertColsEqual(df.datetime.dt.microsecond,
3804+
tbl.datetime.dt.microsecond, sort=True)
3805+
3806+
# nanosecond
3807+
# NOTE: nanosecond precision is not supported
3808+
self.assertColsEqual(df.date.dt.nanosecond,
3809+
tbl.date.dt.nanosecond, sort=True)
3810+
self.assertColsEqual(df.time.dt.nanosecond,
3811+
tbl.time.dt.nanosecond, sort=True)
3812+
self.assertColsEqual(df.datetime.dt.nanosecond,
3813+
tbl.datetime.dt.nanosecond, sort=True)
3814+
3815+
# week
3816+
self.assertColsEqual(df.date.dt.week,
3817+
tbl.date.dt.week, sort=True)
3818+
self.assertColsEqual(df.time.dt.week,
3819+
tbl.time.dt.week, sort=True)
3820+
self.assertColsEqual(df.datetime.dt.week,
3821+
tbl.datetime.dt.week, sort=True)
3822+
3823+
# weekofyear
3824+
self.assertColsEqual(df.date.dt.weekofyear,
3825+
tbl.date.dt.weekofyear, sort=True)
3826+
self.assertColsEqual(df.time.dt.weekofyear,
3827+
tbl.time.dt.weekofyear, sort=True)
3828+
self.assertColsEqual(df.datetime.dt.weekofyear,
3829+
tbl.datetime.dt.weekofyear, sort=True)
3830+
3831+
# dayofweek
3832+
self.assertColsEqual(df.date.dt.dayofweek,
3833+
tbl.date.dt.dayofweek, sort=True)
3834+
self.assertColsEqual(df.time.dt.dayofweek,
3835+
tbl.time.dt.dayofweek, sort=True)
3836+
self.assertColsEqual(df.datetime.dt.dayofweek,
3837+
tbl.datetime.dt.dayofweek, sort=True)
3838+
3839+
# weekday
3840+
self.assertColsEqual(df.date.dt.weekday,
3841+
tbl.date.dt.weekday, sort=True)
3842+
self.assertColsEqual(df.time.dt.weekday,
3843+
tbl.time.dt.weekday, sort=True)
3844+
self.assertColsEqual(df.datetime.dt.weekday,
3845+
tbl.datetime.dt.weekday, sort=True)
3846+
3847+
# dayofyear
3848+
self.assertColsEqual(df.date.dt.dayofyear,
3849+
tbl.date.dt.dayofyear, sort=True)
3850+
self.assertColsEqual(df.time.dt.dayofyear,
3851+
tbl.time.dt.dayofyear, sort=True)
3852+
self.assertColsEqual(df.datetime.dt.dayofyear,
3853+
tbl.datetime.dt.dayofyear, sort=True)
3854+
3855+
# quarter
3856+
self.assertColsEqual(df.date.dt.quarter,
3857+
tbl.date.dt.quarter, sort=True)
3858+
self.assertColsEqual(df.time.dt.quarter,
3859+
tbl.time.dt.quarter, sort=True)
3860+
self.assertColsEqual(df.datetime.dt.quarter,
3861+
tbl.datetime.dt.quarter, sort=True)
3862+
3863+
# is_month_start
3864+
self.assertColsEqual(df.date.dt.is_month_start,
3865+
tbl.date.dt.is_month_start, sort=True)
3866+
self.assertColsEqual(df.time.dt.is_month_start,
3867+
tbl.time.dt.is_month_start, sort=True)
3868+
self.assertColsEqual(df.datetime.dt.is_month_start,
3869+
tbl.datetime.dt.is_month_start, sort=True)
3870+
3871+
# is_month_end
3872+
self.assertColsEqual(df.date.dt.is_month_end,
3873+
tbl.date.dt.is_month_end, sort=True)
3874+
self.assertColsEqual(df.time.dt.is_month_end,
3875+
tbl.time.dt.is_month_end, sort=True)
3876+
self.assertColsEqual(df.datetime.dt.is_month_end,
3877+
tbl.datetime.dt.is_month_end, sort=True)
3878+
3879+
# is_quarter_start
3880+
self.assertColsEqual(df.date.dt.is_quarter_start,
3881+
tbl.date.dt.is_quarter_start, sort=True)
3882+
self.assertColsEqual(df.time.dt.is_quarter_start,
3883+
tbl.time.dt.is_quarter_start, sort=True)
3884+
self.assertColsEqual(df.datetime.dt.is_quarter_start,
3885+
tbl.datetime.dt.is_quarter_start, sort=True)
3886+
3887+
# is_quarter_end
3888+
self.assertColsEqual(df.date.dt.is_quarter_end,
3889+
tbl.date.dt.is_quarter_end, sort=True)
3890+
self.assertColsEqual(df.time.dt.is_quarter_end,
3891+
tbl.time.dt.is_quarter_end, sort=True)
3892+
self.assertColsEqual(df.datetime.dt.is_quarter_end,
3893+
tbl.datetime.dt.is_quarter_end, sort=True)
3894+
3895+
# is_year_start
3896+
self.assertColsEqual(df.date.dt.is_year_start,
3897+
tbl.date.dt.is_year_start, sort=True)
3898+
self.assertColsEqual(df.time.dt.is_year_start,
3899+
tbl.time.dt.is_year_start, sort=True)
3900+
self.assertColsEqual(df.datetime.dt.is_year_start,
3901+
tbl.datetime.dt.is_year_start, sort=True)
3902+
3903+
# is_year_end
3904+
self.assertColsEqual(df.date.dt.is_year_end,
3905+
tbl.date.dt.is_year_end, sort=True)
3906+
self.assertColsEqual(df.time.dt.is_year_end,
3907+
tbl.time.dt.is_year_end, sort=True)
3908+
self.assertColsEqual(df.datetime.dt.is_year_end,
3909+
tbl.datetime.dt.is_year_end, sort=True)
3910+
3911+
# daysinmonth
3912+
self.assertColsEqual(df.date.dt.daysinmonth,
3913+
tbl.date.dt.daysinmonth, sort=True)
3914+
self.assertColsEqual(df.time.dt.daysinmonth,
3915+
tbl.time.dt.daysinmonth, sort=True)
3916+
self.assertColsEqual(df.datetime.dt.daysinmonth,
3917+
tbl.datetime.dt.daysinmonth, sort=True)
3918+
3919+
# days_in_month
3920+
self.assertColsEqual(df.date.dt.days_in_month,
3921+
tbl.date.dt.days_in_month, sort=True)
3922+
self.assertColsEqual(df.time.dt.days_in_month,
3923+
tbl.time.dt.days_in_month, sort=True)
3924+
self.assertColsEqual(df.datetime.dt.days_in_month,
3925+
tbl.datetime.dt.days_in_month, sort=True)
3926+
3927+
@unittest.skipIf(pd_version <= (0, 14, 0), 'Need newer version of Pandas')
3928+
def test_sas_dt_methods(self):
3929+
if self.s._protocol in ['http', 'https']:
3930+
tm.TestCase.skipTest(self, 'REST does not support data messages')
3931+
3932+
import swat.tests as st
3933+
3934+
myFile = os.path.join(os.path.dirname(st.__file__), 'datasources', 'datetime.csv')
3935+
3936+
df = pd.read_csv(myFile, parse_dates=[0, 1, 2])
3937+
df.sort_values(['datetime'], inplace=True)
3938+
df2 = df[:]
3939+
3940+
from swat.cas.utils.datetime import (python2sas_date,
3941+
python2sas_time,
3942+
python2sas_datetime)
3943+
df2['date'] = df2['date'].apply(python2sas_date)
3944+
df2['datetime'] = df2['datetime'].apply(python2sas_datetime)
3945+
df2['time'] = df2['time'].apply(python2sas_time)
3946+
3947+
from swat.cas import datamsghandlers as dmh
3948+
3949+
pd_dmh = dmh.PandasDataFrame(
3950+
df2,
3951+
formats={'date': 'nldate', 'time': 'nltime', 'datetime': 'nldatm'},
3952+
labels={'date': 'Date', 'time': 'Time', 'datetime': 'Datetime'})
3953+
3954+
tbl = self.s.addtable(table='datetime', caslib=self.srcLib,
3955+
**pd_dmh.args.addtable).casTable
37653956

37663957
with self.assertRaises(TypeError):
37673958
self.table['Model'].dt.year

0 commit comments

Comments
 (0)