Skip to content

Commit eacec5c

Browse files
SNOW-1819521: Add support for Series.dt.strftime (10 directives) (#2781)
<!--- Please answer these questions before creating your pull request. Thanks! ---> 1. Which Jira issue is this PR addressing? Make sure that there is an accompanying issue to your PR. <!--- In this section, please add a Snowflake Jira issue number. Note that if a corresponding GitHub issue exists, you should still include the Snowflake Jira issue number. For example, for GitHub issue #1400, you should add "SNOW-1335071" here. ---> Fixes SNOW-1819521 2. Fill out the following pre-review checklist: - [x] I am adding a new automated test(s) to verify correctness of my new code - [ ] If this test skips Local Testing mode, I'm requesting review from @snowflakedb/local-testing - [ ] I am adding new logging messages - [ ] I am adding a new telemetry message - [ ] I am adding new credentials - [ ] I am adding a new dependency - [ ] If this is a new feature/behavior, I'm adding the Local Testing parity changes. - [ ] I acknowledge that I have ensured my changes to be thread-safe. Follow the link for more information: [Thread-safe Developer Guidelines](https://github.com/snowflakedb/snowpark-python/blob/main/CONTRIBUTING.md#thread-safe-development) 3. Please describe how your code solves the related issue. Add support for Series.dt.strftime (10 directives).
1 parent d2cc2b8 commit eacec5c

File tree

7 files changed

+222
-45
lines changed

7 files changed

+222
-45
lines changed

CHANGELOG.md

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,17 @@
2727
- Added support for `DataFrame.from_dict` and `DataFrame.from_records`.
2828
- Added support for mixed case field names in struct type columns.
2929
- Added support for `SeriesGroupBy.unique`
30+
- Added support for `Series.dt.strftime` with the following directives:
31+
- %d: Day of the month as a zero-padded decimal number.
32+
- %m: Month as a zero-padded decimal number.
33+
- %Y: Year with century as a decimal number.
34+
- %H: Hour (24-hour clock) as a zero-padded decimal number.
35+
- %M: Minute as a zero-padded decimal number.
36+
- %S: Second as a zero-padded decimal number.
37+
- %f: Microsecond as a decimal number, zero-padded to 6 digits.
38+
- %j: Day of the year as a zero-padded decimal number.
39+
- %X: Locale’s appropriate time representation.
40+
- %%: A literal '%' character.
3041

3142
#### Bug Fixes
3243

docs/source/modin/supported/series_dt_supported.rst

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,9 @@ the method in the left column.
9898
+-----------------------------+---------------------------------+----------------------------------------------------+
9999
| ``normalize`` | Y | |
100100
+-----------------------------+---------------------------------+----------------------------------------------------+
101-
| ``strftime`` | N | |
101+
| ``strftime`` | P | ``N`` if `date_format` contains directives other |
102+
| | | than (`%d`, `%m`, `%Y`, `%H`, `%M`, `%S`, `%f`, |
103+
| | | `%j`, `%X`, `%%`). |
102104
+-----------------------------+---------------------------------+----------------------------------------------------+
103105
| ``round`` | P | ``N`` if `ambiguous` or `nonexistent` are set to a |
104106
| | | non-default value. |

src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py

Lines changed: 97 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18162,7 +18162,7 @@ def dt_total_seconds(self, include_index: bool = False) -> "SnowflakeQueryCompil
1816218162
)
1816318163
)
1816418164

18165-
def dt_strftime(self, date_format: str) -> None:
18165+
def dt_strftime(self, date_format: str) -> "SnowflakeQueryCompiler":
1816618166
"""
1816718167
Format underlying date-time data using specified format.
1816818168

@@ -18172,8 +18172,102 @@ def dt_strftime(self, date_format: str) -> None:
1817218172
Returns:
1817318173
New QueryCompiler containing formatted date-time values.
1817418174
"""
18175-
ErrorMessage.not_implemented(
18176-
"Snowpark pandas doesn't yet support the method 'Series.dt.strftime'"
18175+
18176+
def strftime_func(column: SnowparkColumn) -> SnowparkColumn:
18177+
directive_to_function_map: dict[str, Callable] = {
18178+
"d": (
18179+
# Day of the month as a zero-padded decimal number
18180+
lambda column: lpad(
18181+
dayofmonth(column), pandas_lit(2), pandas_lit("0")
18182+
)
18183+
),
18184+
"m": (
18185+
# Month as a zero-padded decimal number
18186+
lambda column: lpad(month(column), pandas_lit(2), pandas_lit("0"))
18187+
),
18188+
"Y": (
18189+
# Year with century as a decimal number
18190+
lambda column: lpad(year(column), pandas_lit(4), pandas_lit("0"))
18191+
),
18192+
"H": (
18193+
# Hour (24-hour clock) as a zero-padded decimal number
18194+
lambda column: lpad(hour(column), pandas_lit(2), pandas_lit("0"))
18195+
),
18196+
"M": (
18197+
# Minute as a zero-padded decimal number
18198+
lambda column: lpad(minute(column), pandas_lit(2), pandas_lit("0"))
18199+
),
18200+
"S": (
18201+
# Second as a zero-padded decimal number
18202+
lambda column: lpad(second(column), pandas_lit(2), pandas_lit("0"))
18203+
),
18204+
"f": (
18205+
# Microsecond as a decimal number, zero-padded to 6 digits
18206+
lambda column: lpad(
18207+
floor(date_part("ns", column) / 1000),
18208+
pandas_lit(6),
18209+
pandas_lit("0"),
18210+
)
18211+
),
18212+
"j": (
18213+
# Day of the year as a zero-padded decimal number
18214+
lambda column: lpad(
18215+
dayofyear(column), pandas_lit(3), pandas_lit("0")
18216+
)
18217+
),
18218+
"X": (
18219+
# Locale’s appropriate time representation
18220+
lambda column: trunc(to_time(column), pandas_lit("second"))
18221+
),
18222+
"%": (
18223+
# A literal '%' character
18224+
lambda column: pandas_lit("%")
18225+
),
18226+
}
18227+
18228+
parts = re.split("%.", date_format)
18229+
directive_first = False
18230+
if parts[0] == "":
18231+
parts = parts[1:]
18232+
directive_first = True
18233+
if parts[-1] == "":
18234+
parts = parts[:-1]
18235+
directives = re.findall("%.", date_format)
18236+
cols = []
18237+
for i in range(min(len(parts), len(directives))):
18238+
directive_function = directive_to_function_map.get(directives[i][1:])
18239+
if not directive_function:
18240+
raise ErrorMessage.not_implemented(
18241+
f"Snowpark pandas 'Series.dt.strftime' method does not yet support the directive '%{directives[i][1:]}'"
18242+
)
18243+
18244+
if directive_first:
18245+
cols.append(directive_function(column))
18246+
cols.append(pandas_lit(parts[i]))
18247+
else:
18248+
cols.append(pandas_lit(parts[i]))
18249+
cols.append(directive_function(column))
18250+
18251+
if len(parts) > len(directives):
18252+
cols.append(pandas_lit(parts[-1]))
18253+
if len(parts) < len(directives):
18254+
directive_function = directive_to_function_map.get(directives[-1][1:])
18255+
if not directive_function:
18256+
raise ErrorMessage.not_implemented(
18257+
f"Snowpark pandas 'Series.dt.strftime' method does not yet support the directive '%{directives[-1][1:]}'"
18258+
)
18259+
cols.append(directive_function(column))
18260+
18261+
if len(cols) == 1:
18262+
return iff(column.is_null(), pandas_lit(None), cols[0])
18263+
else:
18264+
return iff(column.is_null(), pandas_lit(None), concat(*cols))
18265+
18266+
return SnowflakeQueryCompiler(
18267+
self._modin_frame.apply_snowpark_function_to_columns(
18268+
strftime_func,
18269+
include_index=False,
18270+
)
1817718271
)
1817818272

1817918273
def topn(

src/snowflake/snowpark/modin/plugin/docstrings/series_utils.py

Lines changed: 43 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2218,7 +2218,49 @@ def normalize():
22182218
pass
22192219

22202220
def strftime():
2221-
pass
2221+
"""
2222+
Convert to Index using specified date_format.
2223+
2224+
Return an Index of formatted strings specified by date_format, which supports the same string format as the python standard library. Details of the string format can be found in python string format doc.
2225+
2226+
Formats supported by the C strftime API but not by the python string format doc (such as “%R”, “%r”) are not officially supported and should be preferably replaced with their supported equivalents (such as “%H:%M”, “%I:%M:%S %p”).
2227+
2228+
Note that PeriodIndex support additional directives, detailed in Period.strftime.
2229+
2230+
Parameters
2231+
----------
2232+
date_format : str
2233+
Date format string (e.g. “%Y-%m-%d”).
2234+
2235+
Returns
2236+
-------
2237+
ndarray[object]
2238+
NumPy ndarray of formatted strings.
2239+
2240+
See also
2241+
--------
2242+
to_datetime
2243+
Convert the given argument to datetime.
2244+
DatetimeIndex.normalize
2245+
Return DatetimeIndex with times to midnight.
2246+
DatetimeIndex.round
2247+
Round the DatetimeIndex to the specified freq.
2248+
DatetimeIndex.floor
2249+
Floor the DatetimeIndex to the specified freq.
2250+
Timestamp.strftime
2251+
Format a single Timestamp.
2252+
Period.strftime
2253+
Format a single Period.
2254+
2255+
Examples
2256+
--------
2257+
>>> rng = pd.date_range(pd.Timestamp("2018-03-10 09:00"),
2258+
... periods=3, freq='s')
2259+
>>> rng.strftime('%B %d, %Y, %r') # doctest: +SKIP
2260+
Index(['March 10, 2018, 09:00:00 AM', 'March 10, 2018, 09:00:01 AM',
2261+
'March 10, 2018, 09:00:02 AM'],
2262+
dtype='object')
2263+
"""
22222264

22232265
def round():
22242266
"""

tests/integ/modin/series/test_dt_accessor.py

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -433,6 +433,74 @@ def test_days_in_month(property):
433433
)
434434

435435

436+
@sql_count_checker(query_count=1)
437+
@pytest.mark.parametrize(
438+
"date_format",
439+
[
440+
"a%d-%m-%Y-%H-%M-%S-%f-%j-%X-%%b",
441+
"%d-%m-%Y-%H-%M-%S-%f-%j-%X-%%b",
442+
"a%d-%m-%Y-%H-%M-%S-%f-%j-%X-%%",
443+
"%d-%m-%Y-%H-%M-%S-%f-%j-%X-%%",
444+
"%%%M",
445+
"%%M",
446+
"abc%",
447+
],
448+
)
449+
def test_strftime(date_format):
450+
datetime_index = native_pd.DatetimeIndex(
451+
[
452+
"2014-04-04 23:56:01.000000001",
453+
"2014-07-18 21:24:02.000000002",
454+
"2015-11-22 22:14:03.000000003",
455+
"2015-11-23 20:12:04.1234567890",
456+
pd.NaT,
457+
],
458+
)
459+
native_ser = native_pd.Series(datetime_index)
460+
snow_ser = pd.Series(native_ser)
461+
eval_snowpark_pandas_result(
462+
snow_ser,
463+
native_ser,
464+
lambda s: s.dt.strftime(date_format=date_format),
465+
)
466+
467+
468+
@sql_count_checker(query_count=0)
469+
@pytest.mark.parametrize(
470+
"date_format",
471+
[
472+
"%a",
473+
"%A",
474+
"%w",
475+
"%b",
476+
"%B",
477+
"%y",
478+
"%I",
479+
"%p",
480+
"%z",
481+
"%Z",
482+
"%U",
483+
"%W",
484+
"%c",
485+
"%x",
486+
],
487+
)
488+
def test_strftime_neg(date_format):
489+
datetime_index = native_pd.DatetimeIndex(
490+
[
491+
"2014-04-04 23:56:01.000000001",
492+
"2014-07-18 21:24:02.000000002",
493+
"2015-11-22 22:14:03.000000003",
494+
"2015-11-23 20:12:04.1234567890",
495+
pd.NaT,
496+
],
497+
)
498+
native_ser = native_pd.Series(datetime_index)
499+
snow_ser = pd.Series(native_ser)
500+
with pytest.raises(NotImplementedError):
501+
snow_ser.dt.strftime(date_format=date_format)
502+
503+
436504
@dt_properties
437505
@sql_count_checker(query_count=1)
438506
def test_dt_property_with_tz(property_name):

tests/integ/modin/series/test_dt_accessor_unsupported.py

Lines changed: 0 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -26,42 +26,3 @@ def test_dt_namespace_accessor_datetime64(self, freq):
2626
msg = "Snowpark pandas doesn't yet support the property 'Series.dt.freq'"
2727
with pytest.raises(NotImplementedError, match=msg):
2828
ser.dt.freq
29-
30-
@pytest.mark.parametrize(
31-
"date, format_string, expected",
32-
[
33-
(
34-
native_pd.date_range("20130101", periods=5),
35-
"%Y/%m/%d",
36-
native_pd.Series(
37-
[
38-
"2013/01/01",
39-
"2013/01/02",
40-
"2013/01/03",
41-
"2013/01/04",
42-
"2013/01/05",
43-
]
44-
),
45-
),
46-
(
47-
native_pd.date_range("2015-02-03 11:22:33.4567", periods=5),
48-
"%Y/%m/%d %H-%M-%S",
49-
native_pd.Series(
50-
[
51-
"2015/02/03 11-22-33",
52-
"2015/02/04 11-22-33",
53-
"2015/02/05 11-22-33",
54-
"2015/02/06 11-22-33",
55-
"2015/02/07 11-22-33",
56-
]
57-
),
58-
),
59-
],
60-
)
61-
@sql_count_checker(query_count=0)
62-
def test_strftime(self, date, format_string, expected):
63-
# GH 10086
64-
ser = pd.Series(date)
65-
msg = "Snowpark pandas doesn't yet support the method 'Series.dt.strftime'"
66-
with pytest.raises(NotImplementedError, match=msg):
67-
ser.dt.strftime(format_string)

tests/unit/modin/test_series_dt.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,6 @@ def mock_query_compiler_for_dt_series() -> SnowflakeQueryCompiler:
3535
[
3636
(lambda s: s.dt.timetz, "timetz"),
3737
(lambda s: s.dt.to_period(), "to_period"),
38-
(lambda s: s.dt.strftime(date_format="YY/MM/DD"), "strftime"),
3938
(lambda s: s.dt.qyear, "qyear"),
4039
(lambda s: s.dt.start_time, "start_time"),
4140
(lambda s: s.dt.end_time, "end_time"),

0 commit comments

Comments
 (0)