Skip to content

Commit f01f30c

Browse files
ntjohnson1claude
andauthored
Add docstring examples for Scalar temporal functions (#1424)
* Add docstring examples for Scalar temporal functions Add example usage to docstrings for Scalar temporal functions to improve documentation. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * Remove examples for aliases * Fix claude's attempt to cheat with sql * Make examples follow google docstyle --------- Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 3dfd6ee commit f01f30c

File tree

1 file changed

+170
-8
lines changed

1 file changed

+170
-8
lines changed

python/datafusion/functions.py

Lines changed: 170 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1562,6 +1562,19 @@ def now() -> Expr:
15621562
"""Returns the current timestamp in nanoseconds.
15631563
15641564
This will use the same value for all instances of now() in same statement.
1565+
1566+
Examples:
1567+
>>> ctx = dfn.SessionContext()
1568+
>>> df = ctx.from_pydict({"a": [1]})
1569+
>>> result = df.select(
1570+
... dfn.functions.now().alias("now")
1571+
... )
1572+
1573+
Use .value instead of .as_py() because nanosecond timestamps
1574+
require pandas to convert to Python datetime objects.
1575+
1576+
>>> result.collect_column("now")[0].value > 0
1577+
True
15651578
"""
15661579
return Expr(f.now())
15671580

@@ -1622,6 +1635,17 @@ def to_timestamp(arg: Expr, *formatters: Expr) -> Expr:
16221635
For usage of ``formatters`` see the rust chrono package ``strftime`` package.
16231636
16241637
[Documentation here.](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)
1638+
1639+
Examples:
1640+
>>> ctx = dfn.SessionContext()
1641+
>>> df = ctx.from_pydict({"a": ["2021-01-01T00:00:00"]})
1642+
>>> result = df.select(
1643+
... dfn.functions.to_timestamp(
1644+
... dfn.col("a")
1645+
... ).alias("ts")
1646+
... )
1647+
>>> str(result.collect_column("ts")[0].as_py())
1648+
'2021-01-01 00:00:00'
16251649
"""
16261650
return Expr(f.to_timestamp(arg.expr, *_unwrap_exprs(formatters)))
16271651

@@ -1630,6 +1654,17 @@ def to_timestamp_millis(arg: Expr, *formatters: Expr) -> Expr:
16301654
"""Converts a string and optional formats to a ``Timestamp`` in milliseconds.
16311655
16321656
See :py:func:`to_timestamp` for a description on how to use formatters.
1657+
1658+
Examples:
1659+
>>> ctx = dfn.SessionContext()
1660+
>>> df = ctx.from_pydict({"a": ["2021-01-01T00:00:00"]})
1661+
>>> result = df.select(
1662+
... dfn.functions.to_timestamp_millis(
1663+
... dfn.col("a")
1664+
... ).alias("ts")
1665+
... )
1666+
>>> str(result.collect_column("ts")[0].as_py())
1667+
'2021-01-01 00:00:00'
16331668
"""
16341669
return Expr(f.to_timestamp_millis(arg.expr, *_unwrap_exprs(formatters)))
16351670

@@ -1638,6 +1673,17 @@ def to_timestamp_micros(arg: Expr, *formatters: Expr) -> Expr:
16381673
"""Converts a string and optional formats to a ``Timestamp`` in microseconds.
16391674
16401675
See :py:func:`to_timestamp` for a description on how to use formatters.
1676+
1677+
Examples:
1678+
>>> ctx = dfn.SessionContext()
1679+
>>> df = ctx.from_pydict({"a": ["2021-01-01T00:00:00"]})
1680+
>>> result = df.select(
1681+
... dfn.functions.to_timestamp_micros(
1682+
... dfn.col("a")
1683+
... ).alias("ts")
1684+
... )
1685+
>>> str(result.collect_column("ts")[0].as_py())
1686+
'2021-01-01 00:00:00'
16411687
"""
16421688
return Expr(f.to_timestamp_micros(arg.expr, *_unwrap_exprs(formatters)))
16431689

@@ -1646,6 +1692,17 @@ def to_timestamp_nanos(arg: Expr, *formatters: Expr) -> Expr:
16461692
"""Converts a string and optional formats to a ``Timestamp`` in nanoseconds.
16471693
16481694
See :py:func:`to_timestamp` for a description on how to use formatters.
1695+
1696+
Examples:
1697+
>>> ctx = dfn.SessionContext()
1698+
>>> df = ctx.from_pydict({"a": ["2021-01-01T00:00:00"]})
1699+
>>> result = df.select(
1700+
... dfn.functions.to_timestamp_nanos(
1701+
... dfn.col("a")
1702+
... ).alias("ts")
1703+
... )
1704+
>>> str(result.collect_column("ts")[0].as_py())
1705+
'2021-01-01 00:00:00'
16491706
"""
16501707
return Expr(f.to_timestamp_nanos(arg.expr, *_unwrap_exprs(formatters)))
16511708

@@ -1654,25 +1711,68 @@ def to_timestamp_seconds(arg: Expr, *formatters: Expr) -> Expr:
16541711
"""Converts a string and optional formats to a ``Timestamp`` in seconds.
16551712
16561713
See :py:func:`to_timestamp` for a description on how to use formatters.
1714+
1715+
Examples:
1716+
>>> ctx = dfn.SessionContext()
1717+
>>> df = ctx.from_pydict({"a": ["2021-01-01T00:00:00"]})
1718+
>>> result = df.select(
1719+
... dfn.functions.to_timestamp_seconds(
1720+
... dfn.col("a")
1721+
... ).alias("ts")
1722+
... )
1723+
>>> str(result.collect_column("ts")[0].as_py())
1724+
'2021-01-01 00:00:00'
16571725
"""
16581726
return Expr(f.to_timestamp_seconds(arg.expr, *_unwrap_exprs(formatters)))
16591727

16601728

16611729
def to_unixtime(string: Expr, *format_arguments: Expr) -> Expr:
1662-
"""Converts a string and optional formats to a Unixtime."""
1730+
"""Converts a string and optional formats to a Unixtime.
1731+
1732+
Examples:
1733+
>>> ctx = dfn.SessionContext()
1734+
>>> df = ctx.from_pydict({"a": ["1970-01-01T00:00:00"]})
1735+
>>> result = df.select(dfn.functions.to_unixtime(dfn.col("a")).alias("u"))
1736+
>>> result.collect_column("u")[0].as_py()
1737+
0
1738+
"""
16631739
return Expr(f.to_unixtime(string.expr, *_unwrap_exprs(format_arguments)))
16641740

16651741

16661742
def current_date() -> Expr:
1667-
"""Returns current UTC date as a Date32 value."""
1743+
"""Returns current UTC date as a Date32 value.
1744+
1745+
Examples:
1746+
>>> ctx = dfn.SessionContext()
1747+
>>> df = ctx.from_pydict({"a": [1]})
1748+
>>> result = df.select(
1749+
... dfn.functions.current_date().alias("d")
1750+
... )
1751+
>>> result.collect_column("d")[0].as_py() is not None
1752+
True
1753+
"""
16681754
return Expr(f.current_date())
16691755

16701756

16711757
today = current_date
16721758

16731759

16741760
def current_time() -> Expr:
1675-
"""Returns current UTC time as a Time64 value."""
1761+
"""Returns current UTC time as a Time64 value.
1762+
1763+
Examples:
1764+
>>> ctx = dfn.SessionContext()
1765+
>>> df = ctx.from_pydict({"a": [1]})
1766+
>>> result = df.select(
1767+
... dfn.functions.current_time().alias("t")
1768+
... )
1769+
1770+
Use .value instead of .as_py() because nanosecond timestamps
1771+
require pandas to convert to Python datetime objects.
1772+
1773+
>>> result.collect_column("t")[0].value > 0
1774+
True
1775+
"""
16761776
return Expr(f.current_time())
16771777

16781778

@@ -1685,7 +1785,17 @@ def datepart(part: Expr, date: Expr) -> Expr:
16851785

16861786

16871787
def date_part(part: Expr, date: Expr) -> Expr:
1688-
"""Extracts a subfield from the date."""
1788+
"""Extracts a subfield from the date.
1789+
1790+
Examples:
1791+
>>> ctx = dfn.SessionContext()
1792+
>>> df = ctx.from_pydict({"a": ["2021-07-15T00:00:00"]})
1793+
>>> df = df.select(dfn.functions.to_timestamp(dfn.col("a")).alias("a"))
1794+
>>> result = df.select(
1795+
... dfn.functions.date_part(dfn.lit("year"), dfn.col("a")).alias("y"))
1796+
>>> result.collect_column("y")[0].as_py()
1797+
2021
1798+
"""
16891799
return Expr(f.date_part(part.expr, date.expr))
16901800

16911801

@@ -1698,7 +1808,20 @@ def extract(part: Expr, date: Expr) -> Expr:
16981808

16991809

17001810
def date_trunc(part: Expr, date: Expr) -> Expr:
1701-
"""Truncates the date to a specified level of precision."""
1811+
"""Truncates the date to a specified level of precision.
1812+
1813+
Examples:
1814+
>>> ctx = dfn.SessionContext()
1815+
>>> df = ctx.from_pydict({"a": ["2021-07-15T12:34:56"]})
1816+
>>> df = df.select(dfn.functions.to_timestamp(dfn.col("a")).alias("a"))
1817+
>>> result = df.select(
1818+
... dfn.functions.date_trunc(
1819+
... dfn.lit("month"), dfn.col("a")
1820+
... ).alias("t")
1821+
... )
1822+
>>> str(result.collect_column("t")[0].as_py())
1823+
'2021-07-01 00:00:00'
1824+
"""
17021825
return Expr(f.date_trunc(part.expr, date.expr))
17031826

17041827

@@ -1711,12 +1834,39 @@ def datetrunc(part: Expr, date: Expr) -> Expr:
17111834

17121835

17131836
def date_bin(stride: Expr, source: Expr, origin: Expr) -> Expr:
1714-
"""Coerces an arbitrary timestamp to the start of the nearest specified interval."""
1837+
"""Coerces an arbitrary timestamp to the start of the nearest specified interval.
1838+
1839+
Examples:
1840+
>>> ctx = dfn.SessionContext()
1841+
>>> df = ctx.from_pydict({"timestamp": ['2021-07-15 12:34:56', '2021-01-01']})
1842+
>>> result = df.select(
1843+
... dfn.functions.date_bin(
1844+
... dfn.string_literal("15 minutes"),
1845+
... dfn.col("timestamp"),
1846+
... dfn.string_literal("2001-01-01 00:00:00")
1847+
... ).alias("b")
1848+
... )
1849+
>>> str(result.collect_column("b")[0].as_py())
1850+
'2021-07-15 12:30:00'
1851+
>>> str(result.collect_column("b")[1].as_py())
1852+
'2021-01-01 00:00:00'
1853+
"""
17151854
return Expr(f.date_bin(stride.expr, source.expr, origin.expr))
17161855

17171856

17181857
def make_date(year: Expr, month: Expr, day: Expr) -> Expr:
1719-
"""Make a date from year, month and day component parts."""
1858+
"""Make a date from year, month and day component parts.
1859+
1860+
Examples:
1861+
>>> from datetime import date
1862+
>>> ctx = dfn.SessionContext()
1863+
>>> df = ctx.from_pydict({"y": [2024], "m": [1], "d": [15]})
1864+
>>> result = df.select(
1865+
... dfn.functions.make_date(dfn.col("y"), dfn.col("m"),
1866+
... dfn.col("d")).alias("dt"))
1867+
>>> result.collect_column("dt")[0].as_py()
1868+
datetime.date(2024, 1, 15)
1869+
"""
17201870
return Expr(f.make_date(year.expr, month.expr, day.expr))
17211871

17221872

@@ -1839,7 +1989,19 @@ def named_struct(name_pairs: list[tuple[str, Expr]]) -> Expr:
18391989

18401990

18411991
def from_unixtime(arg: Expr) -> Expr:
1842-
"""Converts an integer to RFC3339 timestamp format string."""
1992+
"""Converts an integer to RFC3339 timestamp format string.
1993+
1994+
Examples:
1995+
>>> ctx = dfn.SessionContext()
1996+
>>> df = ctx.from_pydict({"a": [0]})
1997+
>>> result = df.select(
1998+
... dfn.functions.from_unixtime(
1999+
... dfn.col("a")
2000+
... ).alias("ts")
2001+
... )
2002+
>>> str(result.collect_column("ts")[0].as_py())
2003+
'1970-01-01 00:00:00'
2004+
"""
18432005
return Expr(f.from_unixtime(arg.expr))
18442006

18452007

0 commit comments

Comments
 (0)