diff --git a/docs/source/user-guide/common-operations/functions.rst b/docs/source/user-guide/common-operations/functions.rst index a0b95c908..5a4bf3de4 100644 --- a/docs/source/user-guide/common-operations/functions.rst +++ b/docs/source/user-guide/common-operations/functions.rst @@ -78,6 +78,15 @@ Convert to timestamps using :py:func:`~datafusion.functions.to_timestamp` df.select(f.to_timestamp(col('"Total"')).alias("timestamp")) +Extracting parts of a date using :py:func:`~datafusion.functions.date_part` (alias :py:func:`~datafusion.functions.extract`) + +.. ipython:: python + + df.select( + f.date_part(literal("month"), f.to_timestamp(col('"Total"'))).alias("month"), + f.extract(literal("day"), f.to_timestamp(col('"Total"'))).alias("day") + ) + String ------ diff --git a/python/datafusion/functions.py b/python/datafusion/functions.py index 6ad4c50c2..15ad8822f 100644 --- a/python/datafusion/functions.py +++ b/python/datafusion/functions.py @@ -128,6 +128,7 @@ "empty", "encode", "ends_with", + "extract", "exp", "factorial", "find_in_set", @@ -994,6 +995,14 @@ def date_part(part: Expr, date: Expr) -> Expr: return Expr(f.date_part(part.expr, date.expr)) +def extract(part: Expr, date: Expr) -> Expr: + """Extracts a subfield from the date. + + This is an alias for :py:func:`date_part`. + """ + return date_part(part, date) + + def date_trunc(part: Expr, date: Expr) -> Expr: """Truncates the date to a specified level of precision.""" return Expr(f.date_trunc(part.expr, date.expr)) diff --git a/python/tests/test_functions.py b/python/tests/test_functions.py index 0d40032bb..0d2fa8f94 100644 --- a/python/tests/test_functions.py +++ b/python/tests/test_functions.py @@ -866,6 +866,7 @@ def test_temporal_functions(df): f.to_timestamp_seconds(literal("2023-09-07 05:06:14.523952")), f.to_timestamp_millis(literal("2023-09-07 05:06:14.523952")), f.to_timestamp_micros(literal("2023-09-07 05:06:14.523952")), + f.extract(literal("day"), column("d")), ) result = df.collect() assert len(result) == 1 @@ -903,6 +904,7 @@ def test_temporal_functions(df): assert result.column(9) == pa.array( [datetime(2023, 9, 7, 5, 6, 14, 523952)] * 3, type=pa.timestamp("us") ) + assert result.column(10) == pa.array([31, 26, 2], type=pa.float64()) def test_case(df):