Skip to content

Commit b9a958e

Browse files
ntjohnson1claude
andauthored
Add docstring examples for Scalar math functions (#1421)
* Add docstring examples for Scalar math functions Add example usage to docstrings for Scalar math functions to improve documentation. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * Fix copy past error on name * Remove example from alias * Examples google docstyle --------- Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 89751b5 commit b9a958e

File tree

1 file changed

+205
-23
lines changed

1 file changed

+205
-23
lines changed

python/datafusion/functions.py

Lines changed: 205 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -484,10 +484,12 @@ def window(
484484
def abs(arg: Expr) -> Expr:
485485
"""Return the absolute value of a given number.
486486
487-
Returns:
488-
--------
489-
Expr
490-
A new expression representing the absolute value of the input expression.
487+
Examples:
488+
>>> ctx = dfn.SessionContext()
489+
>>> df = ctx.from_pydict({"a": [-1, 0, 1]})
490+
>>> result = df.select(dfn.functions.abs(dfn.col("a")).alias("abs"))
491+
>>> result.collect_column("abs")[0].as_py()
492+
1
491493
"""
492494
return Expr(f.abs(arg.expr))
493495

@@ -600,12 +602,28 @@ def btrim(arg: Expr) -> Expr:
600602

601603

602604
def cbrt(arg: Expr) -> Expr:
603-
"""Returns the cube root of a number."""
605+
"""Returns the cube root of a number.
606+
607+
Examples:
608+
>>> ctx = dfn.SessionContext()
609+
>>> df = ctx.from_pydict({"a": [27]})
610+
>>> cbrt_df = df.select(dfn.functions.cbrt(dfn.col("a")).alias("cbrt"))
611+
>>> cbrt_df.collect_column("cbrt")[0].as_py()
612+
3.0
613+
"""
604614
return Expr(f.cbrt(arg.expr))
605615

606616

607617
def ceil(arg: Expr) -> Expr:
608-
"""Returns the nearest integer greater than or equal to argument."""
618+
"""Returns the nearest integer greater than or equal to argument.
619+
620+
Examples:
621+
>>> ctx = dfn.SessionContext()
622+
>>> df = ctx.from_pydict({"a": [1.9]})
623+
>>> ceil_df = df.select(dfn.functions.ceil(dfn.col("a")).alias("ceil"))
624+
>>> ceil_df.collect_column("ceil")[0].as_py()
625+
2.0
626+
"""
609627
return Expr(f.ceil(arg.expr))
610628

611629

@@ -709,12 +727,30 @@ def ends_with(arg: Expr, suffix: Expr) -> Expr:
709727

710728

711729
def exp(arg: Expr) -> Expr:
712-
"""Returns the exponential of the argument."""
730+
"""Returns the exponential of the argument.
731+
732+
Examples:
733+
>>> ctx = dfn.SessionContext()
734+
>>> df = ctx.from_pydict({"a": [0.0]})
735+
>>> result = df.select(dfn.functions.exp(dfn.col("a")).alias("exp"))
736+
>>> result.collect_column("exp")[0].as_py()
737+
1.0
738+
"""
713739
return Expr(f.exp(arg.expr))
714740

715741

716742
def factorial(arg: Expr) -> Expr:
717-
"""Returns the factorial of the argument."""
743+
"""Returns the factorial of the argument.
744+
745+
Examples:
746+
>>> ctx = dfn.SessionContext()
747+
>>> df = ctx.from_pydict({"a": [3]})
748+
>>> result = df.select(
749+
... dfn.functions.factorial(dfn.col("a")).alias("factorial")
750+
... )
751+
>>> result.collect_column("factorial")[0].as_py()
752+
6
753+
"""
718754
return Expr(f.factorial(arg.expr))
719755

720756

@@ -730,12 +766,30 @@ def find_in_set(string: Expr, string_list: Expr) -> Expr:
730766

731767

732768
def floor(arg: Expr) -> Expr:
733-
"""Returns the nearest integer less than or equal to the argument."""
769+
"""Returns the nearest integer less than or equal to the argument.
770+
771+
Examples:
772+
>>> ctx = dfn.SessionContext()
773+
>>> df = ctx.from_pydict({"a": [1.9]})
774+
>>> floor_df = df.select(dfn.functions.floor(dfn.col("a")).alias("floor"))
775+
>>> floor_df.collect_column("floor")[0].as_py()
776+
1.0
777+
"""
734778
return Expr(f.floor(arg.expr))
735779

736780

737781
def gcd(x: Expr, y: Expr) -> Expr:
738-
"""Returns the greatest common divisor."""
782+
"""Returns the greatest common divisor.
783+
784+
Examples:
785+
>>> ctx = dfn.SessionContext()
786+
>>> df = ctx.from_pydict({"a": [12], "b": [8]})
787+
>>> result = df.select(
788+
... dfn.functions.gcd(dfn.col("a"), dfn.col("b")).alias("gcd")
789+
... )
790+
>>> result.collect_column("gcd")[0].as_py()
791+
4
792+
"""
739793
return Expr(f.gcd(x.expr, y.expr))
740794

741795

@@ -757,12 +811,30 @@ def instr(string: Expr, substring: Expr) -> Expr:
757811

758812

759813
def iszero(arg: Expr) -> Expr:
760-
"""Returns true if a given number is +0.0 or -0.0 otherwise returns false."""
814+
"""Returns true if a given number is +0.0 or -0.0 otherwise returns false.
815+
816+
Examples:
817+
>>> ctx = dfn.SessionContext()
818+
>>> df = ctx.from_pydict({"a": [0.0, 1.0]})
819+
>>> result = df.select(dfn.functions.iszero(dfn.col("a")).alias("iz"))
820+
>>> result.collect_column("iz")[0].as_py()
821+
True
822+
"""
761823
return Expr(f.iszero(arg.expr))
762824

763825

764826
def lcm(x: Expr, y: Expr) -> Expr:
765-
"""Returns the least common multiple."""
827+
"""Returns the least common multiple.
828+
829+
Examples:
830+
>>> ctx = dfn.SessionContext()
831+
>>> df = ctx.from_pydict({"a": [4], "b": [6]})
832+
>>> result = df.select(
833+
... dfn.functions.lcm(dfn.col("a"), dfn.col("b")).alias("lcm")
834+
... )
835+
>>> result.collect_column("lcm")[0].as_py()
836+
12
837+
"""
766838
return Expr(f.lcm(x.expr, y.expr))
767839

768840

@@ -777,22 +849,56 @@ def levenshtein(string1: Expr, string2: Expr) -> Expr:
777849

778850

779851
def ln(arg: Expr) -> Expr:
780-
"""Returns the natural logarithm (base e) of the argument."""
852+
"""Returns the natural logarithm (base e) of the argument.
853+
854+
Examples:
855+
>>> ctx = dfn.SessionContext()
856+
>>> df = ctx.from_pydict({"a": [1.0]})
857+
>>> result = df.select(dfn.functions.ln(dfn.col("a")).alias("ln"))
858+
>>> result.collect_column("ln")[0].as_py()
859+
0.0
860+
"""
781861
return Expr(f.ln(arg.expr))
782862

783863

784864
def log(base: Expr, num: Expr) -> Expr:
785-
"""Returns the logarithm of a number for a particular ``base``."""
865+
"""Returns the logarithm of a number for a particular ``base``.
866+
867+
Examples:
868+
>>> ctx = dfn.SessionContext()
869+
>>> df = ctx.from_pydict({"a": [100.0]})
870+
>>> result = df.select(
871+
... dfn.functions.log(dfn.lit(10.0), dfn.col("a")).alias("log")
872+
... )
873+
>>> result.collect_column("log")[0].as_py()
874+
2.0
875+
"""
786876
return Expr(f.log(base.expr, num.expr))
787877

788878

789879
def log10(arg: Expr) -> Expr:
790-
"""Base 10 logarithm of the argument."""
880+
"""Base 10 logarithm of the argument.
881+
882+
Examples:
883+
>>> ctx = dfn.SessionContext()
884+
>>> df = ctx.from_pydict({"a": [100.0]})
885+
>>> result = df.select(dfn.functions.log10(dfn.col("a")).alias("log10"))
886+
>>> result.collect_column("log10")[0].as_py()
887+
2.0
888+
"""
791889
return Expr(f.log10(arg.expr))
792890

793891

794892
def log2(arg: Expr) -> Expr:
795-
"""Base 2 logarithm of the argument."""
893+
"""Base 2 logarithm of the argument.
894+
895+
Examples:
896+
>>> ctx = dfn.SessionContext()
897+
>>> df = ctx.from_pydict({"a": [8.0]})
898+
>>> result = df.select(dfn.functions.log2(dfn.col("a")).alias("log2"))
899+
>>> result.collect_column("log2")[0].as_py()
900+
3.0
901+
"""
796902
return Expr(f.log2(arg.expr))
797903

798904

@@ -831,7 +937,18 @@ def md5(arg: Expr) -> Expr:
831937

832938

833939
def nanvl(x: Expr, y: Expr) -> Expr:
834-
"""Returns ``x`` if ``x`` is not ``NaN``. Otherwise returns ``y``."""
940+
"""Returns ``x`` if ``x`` is not ``NaN``. Otherwise returns ``y``.
941+
942+
Examples:
943+
>>> ctx = dfn.SessionContext()
944+
>>> df = ctx.from_pydict({"a": [np.nan, 1.0], "b": [0.0, 0.0]})
945+
>>> nanvl_df = df.select(
946+
... dfn.functions.nanvl(dfn.col("a"), dfn.col("b")).alias("nanvl"))
947+
>>> nanvl_df.collect_column("nanvl")[0].as_py()
948+
0.0
949+
>>> nanvl_df.collect_column("nanvl")[1].as_py()
950+
1.0
951+
"""
835952
return Expr(f.nanvl(x.expr, y.expr))
836953

837954

@@ -871,7 +988,20 @@ def overlay(
871988

872989

873990
def pi() -> Expr:
874-
"""Returns an approximate value of π."""
991+
"""Returns an approximate value of π.
992+
993+
Examples:
994+
>>> ctx = dfn.SessionContext()
995+
>>> df = ctx.from_pydict({"a": [1]})
996+
>>> import builtins
997+
>>> result = df.select(
998+
... dfn.functions.pi().alias("pi")
999+
... )
1000+
>>> builtins.round(
1001+
... result.collect_column("pi")[0].as_py(), 5
1002+
... )
1003+
3.14159
1004+
"""
8751005
return Expr(f.pi())
8761006

8771007

@@ -884,7 +1014,17 @@ def position(string: Expr, substring: Expr) -> Expr:
8841014

8851015

8861016
def power(base: Expr, exponent: Expr) -> Expr:
887-
"""Returns ``base`` raised to the power of ``exponent``."""
1017+
"""Returns ``base`` raised to the power of ``exponent``.
1018+
1019+
Examples:
1020+
>>> ctx = dfn.SessionContext()
1021+
>>> df = ctx.from_pydict({"a": [2.0]})
1022+
>>> result = df.select(
1023+
... dfn.functions.power(dfn.col("a"), dfn.lit(3.0)).alias("pow")
1024+
... )
1025+
>>> result.collect_column("pow")[0].as_py()
1026+
8.0
1027+
"""
8881028
return Expr(f.power(base.expr, exponent.expr))
8891029

8901030

@@ -1081,6 +1221,13 @@ def round(value: Expr, decimal_places: Expr | None = None) -> Expr:
10811221
If the optional ``decimal_places`` is specified, round to the nearest number of
10821222
decimal places. You can specify a negative number of decimal places. For example
10831223
``round(lit(125.2345), lit(-2))`` would yield a value of ``100.0``.
1224+
1225+
Examples:
1226+
>>> ctx = dfn.SessionContext()
1227+
>>> df = ctx.from_pydict({"a": [1.567]})
1228+
>>> result = df.select(dfn.functions.round(dfn.col("a"), dfn.lit(2)).alias("r"))
1229+
>>> result.collect_column("r")[0].as_py()
1230+
1.57
10841231
"""
10851232
if decimal_places is None:
10861233
decimal_places = Expr.literal(0)
@@ -1163,7 +1310,15 @@ def sha512(arg: Expr) -> Expr:
11631310

11641311

11651312
def signum(arg: Expr) -> Expr:
1166-
"""Returns the sign of the argument (-1, 0, +1)."""
1313+
"""Returns the sign of the argument (-1, 0, +1).
1314+
1315+
Examples:
1316+
>>> ctx = dfn.SessionContext()
1317+
>>> df = ctx.from_pydict({"a": [-5.0, 0.0, 5.0]})
1318+
>>> result = df.select(dfn.functions.signum(dfn.col("a")).alias("s"))
1319+
>>> result.collect_column("s").to_pylist()
1320+
[-1.0, 0.0, 1.0]
1321+
"""
11671322
return Expr(f.signum(arg.expr))
11681323

11691324

@@ -1203,7 +1358,15 @@ def split_part(string: Expr, delimiter: Expr, index: Expr) -> Expr:
12031358

12041359

12051360
def sqrt(arg: Expr) -> Expr:
1206-
"""Returns the square root of the argument."""
1361+
"""Returns the square root of the argument.
1362+
1363+
Examples:
1364+
>>> ctx = dfn.SessionContext()
1365+
>>> df = ctx.from_pydict({"a": [9.0]})
1366+
>>> result = df.select(dfn.functions.sqrt(dfn.col("a")).alias("sqrt"))
1367+
>>> result.collect_column("sqrt")[0].as_py()
1368+
3.0
1369+
"""
12071370
return Expr(f.sqrt(arg.expr))
12081371

12091372

@@ -1440,7 +1603,15 @@ def trim(arg: Expr) -> Expr:
14401603

14411604

14421605
def trunc(num: Expr, precision: Expr | None = None) -> Expr:
1443-
"""Truncate the number toward zero with optional precision."""
1606+
"""Truncate the number toward zero with optional precision.
1607+
1608+
Examples:
1609+
>>> ctx = dfn.SessionContext()
1610+
>>> df = ctx.from_pydict({"a": [1.567]})
1611+
>>> result = df.select(dfn.functions.trunc(dfn.col("a")).alias("t"))
1612+
>>> result.collect_column("t")[0].as_py()
1613+
1.0
1614+
"""
14441615
if precision is not None:
14451616
return Expr(f.trunc(num.expr, precision.expr))
14461617
return Expr(f.trunc(num.expr))
@@ -1574,7 +1745,18 @@ def arrow_cast(expr: Expr, data_type: Expr) -> Expr:
15741745

15751746

15761747
def random() -> Expr:
1577-
"""Returns a random value in the range ``0.0 <= x < 1.0``."""
1748+
"""Returns a random value in the range ``0.0 <= x < 1.0``.
1749+
1750+
Examples:
1751+
>>> ctx = dfn.SessionContext()
1752+
>>> df = ctx.from_pydict({"a": [1]})
1753+
>>> result = df.select(
1754+
... dfn.functions.random().alias("r")
1755+
... )
1756+
>>> val = result.collect_column("r")[0].as_py()
1757+
>>> 0.0 <= val < 1.0
1758+
True
1759+
"""
15781760
return Expr(f.random())
15791761

15801762

0 commit comments

Comments
 (0)