Skip to content

Commit 5d9d7fa

Browse files
authored
feat: implement ZonedDateSecondParameter (#443)
* implement timestamp parameter minimally * apply formatter * change normalize only to remove microsecond * add tests * implement with zoned date second * remove column separator case --------- Co-authored-by: Shuntaro Takahashi <shuntaro-takahashi@m3.com>
1 parent ca41084 commit 5d9d7fa

3 files changed

Lines changed: 100 additions & 1 deletion

File tree

gokart/__init__.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,13 @@
11
from gokart.build import WorkerSchedulerFactory, build # noqa:F401
22
from gokart.info import make_tree_info, tree_info # noqa:F401
33
from gokart.pandas_type_config import PandasTypeConfig # noqa:F401
4-
from gokart.parameter import ExplicitBoolParameter, ListTaskInstanceParameter, SerializableParameter, TaskInstanceParameter # noqa:F401
4+
from gokart.parameter import ( # noqa:F401
5+
ExplicitBoolParameter,
6+
ListTaskInstanceParameter,
7+
SerializableParameter,
8+
TaskInstanceParameter,
9+
ZonedDateSecondParameter,
10+
)
511
from gokart.run import run # noqa:F401
612
from gokart.task import TaskOnKart # noqa:F401
713
from gokart.testing import test_run # noqa:F401

gokart/parameter.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
import bz2
2+
import datetime
23
import json
34
from logging import getLogger
45
from typing import Generic, Protocol, TypeVar
6+
from warnings import warn
57

68
import luigi
79
from luigi import task_register
@@ -119,3 +121,35 @@ def parse(self, s: str) -> S:
119121

120122
def serialize(self, x: S) -> str:
121123
return x.gokart_serialize()
124+
125+
126+
class ZonedDateSecondParameter(luigi.Parameter):
127+
"""
128+
ZonedDateSecondParameter supports a datetime.datetime object with timezone information.
129+
130+
A ZonedDateSecondParameter is a `ISO 8601 <http://en.wikipedia.org/wiki/ISO_8601>`_ formatted
131+
date, time specified to the second and timezone. For example, ``2013-07-10T19:07:38+09:00`` specifies July 10, 2013 at
132+
19:07:38 +09:00. The separator `:` can be omitted for Python3.11 and later.
133+
"""
134+
135+
def __init__(self, **kwargs):
136+
super().__init__(**kwargs)
137+
138+
def parse(self, s):
139+
# special character 'Z' is replaced with '+00:00'
140+
# because Python 3.11 and later support fromisoformat with Z at the end of the string.
141+
if s.endswith('Z'):
142+
s = s[:-1] + '+00:00'
143+
dt = datetime.datetime.fromisoformat(s)
144+
if dt.tzinfo is None:
145+
warn('The input does not have timezone information. Please consider using luigi.DateSecondParameter instead.', stacklevel=1)
146+
return dt
147+
148+
def serialize(self, dt):
149+
return dt.isoformat()
150+
151+
def normalize(self, dt):
152+
# override _DatetimeParameterBase.normalize to avoid do nothing to normalize except removing microsecond.
153+
# microsecond is removed because the number of digits of microsecond is not fixed.
154+
# See also luigi's implementation https://github.com/spotify/luigi/blob/v3.6.0/luigi/parameter.py#L612
155+
return dt.replace(microsecond=0)
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
import datetime
2+
import unittest
3+
4+
from luigi.cmdline_parser import CmdlineParser
5+
6+
from gokart import TaskOnKart, ZonedDateSecondParameter
7+
8+
9+
class ZonedDateSecondParameterTaskWithoutDefault(TaskOnKart):
10+
task_namespace = __name__
11+
dt: datetime.datetime = ZonedDateSecondParameter()
12+
13+
def run(self):
14+
self.dump(self.dt)
15+
16+
17+
class ZonedDateSecondParameterTaskWithDefault(TaskOnKart):
18+
task_namespace = __name__
19+
dt: datetime.datetime = ZonedDateSecondParameter(default=datetime.datetime(2025, 2, 21, 12, 0, 0, tzinfo=datetime.timezone(datetime.timedelta(hours=9))))
20+
21+
def run(self):
22+
self.dump(self.dt)
23+
24+
25+
class ZonedDateSecondParameterTest(unittest.TestCase):
26+
def setUp(self):
27+
self.default_datetime = datetime.datetime(2025, 2, 21, 12, 0, 0, tzinfo=datetime.timezone(datetime.timedelta(hours=9)))
28+
self.default_datetime_str = '2025-02-21T12:00:00+09:00'
29+
30+
def test_default(self):
31+
with CmdlineParser.global_instance([f'{__name__}.ZonedDateSecondParameterTaskWithDefault']) as cp:
32+
assert cp.get_task_obj().dt == self.default_datetime
33+
34+
def test_parse_param_with_tz_suffix(self):
35+
with CmdlineParser.global_instance([f'{__name__}.ZonedDateSecondParameterTaskWithDefault', '--dt', '2024-01-20T11:00:00+09:00']) as cp:
36+
assert cp.get_task_obj().dt == datetime.datetime(2024, 1, 20, 11, 0, 0, tzinfo=datetime.timezone(datetime.timedelta(hours=9)))
37+
38+
def test_parse_param_with_Z_suffix(self):
39+
with CmdlineParser.global_instance([f'{__name__}.ZonedDateSecondParameterTaskWithDefault', '--dt', '2024-01-20T11:00:00Z']) as cp:
40+
assert cp.get_task_obj().dt == datetime.datetime(2024, 1, 20, 11, 0, 0, tzinfo=datetime.timezone(datetime.timedelta(hours=0)))
41+
42+
def test_parse_param_without_timezone_input(self):
43+
with CmdlineParser.global_instance([f'{__name__}.ZonedDateSecondParameterTaskWithoutDefault', '--dt', '2025-02-21T12:00:00']) as cp:
44+
assert cp.get_task_obj().dt == datetime.datetime(2025, 2, 21, 12, 0, 0, tzinfo=None)
45+
46+
def test_parse_method(self):
47+
actual = ZonedDateSecondParameter().parse(self.default_datetime_str)
48+
expected = self.default_datetime
49+
self.assertEqual(actual, expected)
50+
51+
def test_serialize_task(self):
52+
task = ZonedDateSecondParameterTaskWithoutDefault(dt=self.default_datetime)
53+
actual = str(task)
54+
expected = f'(dt={self.default_datetime_str})'
55+
self.assertTrue(actual.endswith(expected))
56+
57+
58+
if __name__ == '__main__':
59+
unittest.main()

0 commit comments

Comments
 (0)