Skip to content

Commit d322b7b

Browse files
authored
feat: feat: add to_time, to_local_time, to_date functions (#1387)
* feat: add to_time, to_local_time, to_date, to_char functions Additionally fix conditional on formatters (since it is *args it cannot be None) Refactor name to avoid possible collision with f. * address comments in PR * chore: add tests for today
1 parent f914fc8 commit d322b7b

File tree

3 files changed

+146
-17
lines changed

3 files changed

+146
-17
lines changed

python/datafusion/functions.py

Lines changed: 64 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,6 @@
4242

4343
if TYPE_CHECKING:
4444
from datafusion.context import SessionContext
45-
4645
__all__ = [
4746
"abs",
4847
"acos",
@@ -268,13 +267,18 @@
268267
"sum",
269268
"tan",
270269
"tanh",
270+
"to_char",
271+
"to_date",
271272
"to_hex",
273+
"to_local_time",
274+
"to_time",
272275
"to_timestamp",
273276
"to_timestamp_micros",
274277
"to_timestamp_millis",
275278
"to_timestamp_nanos",
276279
"to_timestamp_seconds",
277280
"to_unixtime",
281+
"today",
278282
"translate",
279283
"trim",
280284
"trunc",
@@ -1010,67 +1014,111 @@ def now() -> Expr:
10101014
return Expr(f.now())
10111015

10121016

1017+
def to_char(arg: Expr, formatter: Expr) -> Expr:
1018+
"""Returns a string representation of a date, time, timestamp or duration.
1019+
1020+
For usage of ``formatter`` see the rust chrono package ``strftime`` package.
1021+
1022+
[Documentation here.](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)
1023+
"""
1024+
return Expr(f.to_char(arg.expr, formatter.expr))
1025+
1026+
1027+
def _unwrap_exprs(args: tuple[Expr, ...]) -> list:
1028+
return [arg.expr for arg in args]
1029+
1030+
1031+
def to_date(arg: Expr, *formatters: Expr) -> Expr:
1032+
"""Converts a value to a date (YYYY-MM-DD).
1033+
1034+
Supports strings, numeric and timestamp types as input.
1035+
Integers and doubles are interpreted as days since the unix epoch.
1036+
Strings are parsed as YYYY-MM-DD (e.g. '2023-07-20')
1037+
if ``formatters`` are not provided.
1038+
1039+
For usage of ``formatters`` see the rust chrono package ``strftime`` package.
1040+
1041+
[Documentation here.](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)
1042+
"""
1043+
return Expr(f.to_date(arg.expr, *_unwrap_exprs(formatters)))
1044+
1045+
1046+
def to_local_time(*args: Expr) -> Expr:
1047+
"""Converts a timestamp with a timezone to a timestamp without a timezone.
1048+
1049+
This function handles daylight saving time changes.
1050+
"""
1051+
return Expr(f.to_local_time(*_unwrap_exprs(args)))
1052+
1053+
1054+
def to_time(arg: Expr, *formatters: Expr) -> Expr:
1055+
"""Converts a value to a time. Supports strings and timestamps as input.
1056+
1057+
If ``formatters`` is not provided strings are parsed as HH:MM:SS, HH:MM or
1058+
HH:MM:SS.nnnnnnnnn;
1059+
1060+
For usage of ``formatters`` see the rust chrono package ``strftime`` package.
1061+
1062+
[Documentation here.](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)
1063+
"""
1064+
return Expr(f.to_time(arg.expr, *_unwrap_exprs(formatters)))
1065+
1066+
10131067
def to_timestamp(arg: Expr, *formatters: Expr) -> Expr:
10141068
"""Converts a string and optional formats to a ``Timestamp`` in nanoseconds.
10151069
10161070
For usage of ``formatters`` see the rust chrono package ``strftime`` package.
10171071
10181072
[Documentation here.](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)
10191073
"""
1020-
if formatters is None:
1021-
return f.to_timestamp(arg.expr)
1022-
1023-
formatters = [f.expr for f in formatters]
1024-
return Expr(f.to_timestamp(arg.expr, *formatters))
1074+
return Expr(f.to_timestamp(arg.expr, *_unwrap_exprs(formatters)))
10251075

10261076

10271077
def to_timestamp_millis(arg: Expr, *formatters: Expr) -> Expr:
10281078
"""Converts a string and optional formats to a ``Timestamp`` in milliseconds.
10291079
10301080
See :py:func:`to_timestamp` for a description on how to use formatters.
10311081
"""
1032-
formatters = [f.expr for f in formatters]
1033-
return Expr(f.to_timestamp_millis(arg.expr, *formatters))
1082+
return Expr(f.to_timestamp_millis(arg.expr, *_unwrap_exprs(formatters)))
10341083

10351084

10361085
def to_timestamp_micros(arg: Expr, *formatters: Expr) -> Expr:
10371086
"""Converts a string and optional formats to a ``Timestamp`` in microseconds.
10381087
10391088
See :py:func:`to_timestamp` for a description on how to use formatters.
10401089
"""
1041-
formatters = [f.expr for f in formatters]
1042-
return Expr(f.to_timestamp_micros(arg.expr, *formatters))
1090+
return Expr(f.to_timestamp_micros(arg.expr, *_unwrap_exprs(formatters)))
10431091

10441092

10451093
def to_timestamp_nanos(arg: Expr, *formatters: Expr) -> Expr:
10461094
"""Converts a string and optional formats to a ``Timestamp`` in nanoseconds.
10471095
10481096
See :py:func:`to_timestamp` for a description on how to use formatters.
10491097
"""
1050-
formatters = [f.expr for f in formatters]
1051-
return Expr(f.to_timestamp_nanos(arg.expr, *formatters))
1098+
return Expr(f.to_timestamp_nanos(arg.expr, *_unwrap_exprs(formatters)))
10521099

10531100

10541101
def to_timestamp_seconds(arg: Expr, *formatters: Expr) -> Expr:
10551102
"""Converts a string and optional formats to a ``Timestamp`` in seconds.
10561103
10571104
See :py:func:`to_timestamp` for a description on how to use formatters.
10581105
"""
1059-
formatters = [f.expr for f in formatters]
1060-
return Expr(f.to_timestamp_seconds(arg.expr, *formatters))
1106+
return Expr(f.to_timestamp_seconds(arg.expr, *_unwrap_exprs(formatters)))
10611107

10621108

10631109
def to_unixtime(string: Expr, *format_arguments: Expr) -> Expr:
10641110
"""Converts a string and optional formats to a Unixtime."""
1065-
args = [f.expr for f in format_arguments]
1066-
return Expr(f.to_unixtime(string.expr, *args))
1111+
return Expr(f.to_unixtime(string.expr, *_unwrap_exprs(format_arguments)))
10671112

10681113

10691114
def current_date() -> Expr:
10701115
"""Returns current UTC date as a Date32 value."""
10711116
return Expr(f.current_date())
10721117

10731118

1119+
today = current_date
1120+
1121+
10741122
def current_time() -> Expr:
10751123
"""Returns current UTC time as a Time64 value."""
10761124
return Expr(f.current_time())

python/tests/test_functions.py

Lines changed: 74 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
# specific language governing permissions and limitations
1616
# under the License.
1717
import math
18-
from datetime import datetime, timezone
18+
from datetime import date, datetime, time, timezone
1919

2020
import numpy as np
2121
import pyarrow as pa
@@ -958,6 +958,12 @@ def test_temporal_functions(df):
958958
f.to_timestamp_nanos(
959959
literal("2023-09-07 05:06:14.523952000"), literal("%Y-%m-%d %H:%M:%S.%f")
960960
),
961+
f.to_time(literal("12:30:45")),
962+
f.to_time(literal("12-30-45"), literal("%H-%M-%S")),
963+
f.to_date(literal("2017-05-31")),
964+
f.to_date(literal("2017-05-31"), literal("%Y-%m-%d")),
965+
f.to_local_time(column("d")),
966+
f.to_char(column("d"), literal("%d-%m-%Y")),
961967
)
962968
result = df.collect()
963969
assert len(result) == 1
@@ -1032,6 +1038,73 @@ def test_temporal_functions(df):
10321038
[datetime(2023, 9, 7, 5, 6, 14, 523952, tzinfo=DEFAULT_TZ)] * 3,
10331039
type=pa.timestamp("ns"),
10341040
)
1041+
assert result.column(17) == pa.array(
1042+
[time(12, 30, 45)] * 3,
1043+
type=pa.time64("ns"),
1044+
)
1045+
assert result.column(18) == pa.array(
1046+
[time(12, 30, 45)] * 3,
1047+
type=pa.time64("ns"),
1048+
)
1049+
assert result.column(19) == pa.array(
1050+
[date(2017, 5, 31)] * 3,
1051+
type=pa.date32(),
1052+
)
1053+
assert result.column(20) == pa.array(
1054+
[date(2017, 5, 31)] * 3,
1055+
type=pa.date32(),
1056+
)
1057+
assert result.column(21) == pa.array(
1058+
[
1059+
datetime(2022, 12, 31, tzinfo=DEFAULT_TZ),
1060+
datetime(2027, 6, 26, tzinfo=DEFAULT_TZ),
1061+
datetime(2020, 7, 2, tzinfo=DEFAULT_TZ),
1062+
],
1063+
type=pa.timestamp("us"),
1064+
)
1065+
1066+
assert result.column(22) == pa.array(
1067+
[
1068+
"31-12-2022",
1069+
"26-06-2027",
1070+
"02-07-2020",
1071+
],
1072+
type=pa.string(),
1073+
)
1074+
1075+
1076+
def test_to_time_invalid_input(df):
1077+
with pytest.raises(Exception, match=r"Error parsing 'not-a-time' as time"):
1078+
df.select(f.to_time(literal("not-a-time"))).collect()
1079+
1080+
1081+
def test_to_time_mismatched_formatter(df):
1082+
with pytest.raises(Exception, match=r"Error parsing '12:30:45' as time"):
1083+
df.select(f.to_time(literal("12:30:45"), literal("%Y-%m-%d"))).collect()
1084+
1085+
1086+
def test_to_date_invalid_input(df):
1087+
with pytest.raises(Exception, match=r"Date32"):
1088+
df.select(f.to_date(literal("not-a-date"))).collect()
1089+
1090+
1091+
def test_temporal_formatter_requires_expr():
1092+
with pytest.raises(AttributeError, match="'str' object has no attribute 'expr'"):
1093+
f.to_time(literal("12:30:45"), "not-an-expr")
1094+
1095+
1096+
def test_today_returns_date32(df):
1097+
result = df.select(f.today().alias("today")).collect()[0]
1098+
assert result.column(0).type == pa.date32()
1099+
1100+
1101+
def test_today_alias_matches_current_date(df):
1102+
result = df.select(
1103+
f.current_date().alias("current_date"),
1104+
f.today().alias("today"),
1105+
).collect()[0]
1106+
1107+
assert result.column(0) == result.column(1)
10351108

10361109

10371110
def test_arrow_cast(df):

src/functions.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -601,6 +601,9 @@ expr_fn!(
601601
"Converts the number to its equivalent hexadecimal representation."
602602
);
603603
expr_fn!(now);
604+
expr_fn_vec!(to_date);
605+
expr_fn_vec!(to_local_time);
606+
expr_fn_vec!(to_time);
604607
expr_fn_vec!(to_timestamp);
605608
expr_fn_vec!(to_timestamp_millis);
606609
expr_fn_vec!(to_timestamp_nanos);
@@ -613,6 +616,7 @@ expr_fn!(date_part, part date);
613616
expr_fn!(date_trunc, part date);
614617
expr_fn!(date_bin, stride source origin);
615618
expr_fn!(make_date, year month day);
619+
expr_fn!(to_char, datetime format);
616620

617621
expr_fn!(translate, string from to, "Replaces each character in string that matches a character in the from set with the corresponding character in the to set. If from is longer than to, occurrences of the extra characters in from are deleted.");
618622
expr_fn_vec!(
@@ -1045,6 +1049,10 @@ pub(crate) fn init_module(m: &Bound<'_, PyModule>) -> PyResult<()> {
10451049
m.add_wrapped(wrap_pyfunction!(tan))?;
10461050
m.add_wrapped(wrap_pyfunction!(tanh))?;
10471051
m.add_wrapped(wrap_pyfunction!(to_hex))?;
1052+
m.add_wrapped(wrap_pyfunction!(to_char))?;
1053+
m.add_wrapped(wrap_pyfunction!(to_date))?;
1054+
m.add_wrapped(wrap_pyfunction!(to_local_time))?;
1055+
m.add_wrapped(wrap_pyfunction!(to_time))?;
10481056
m.add_wrapped(wrap_pyfunction!(to_timestamp))?;
10491057
m.add_wrapped(wrap_pyfunction!(to_timestamp_millis))?;
10501058
m.add_wrapped(wrap_pyfunction!(to_timestamp_nanos))?;

0 commit comments

Comments
 (0)