Skip to content

Commit 0d0c276

Browse files
ntjohnson1claude
andcommitted
Add docstring examples for Scalar math functions
Add example usage to docstrings for Scalar math functions to improve documentation. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 1160d5a commit 0d0c276

File tree

1 file changed

+226
-23
lines changed

1 file changed

+226
-23
lines changed

python/datafusion/functions.py

Lines changed: 226 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -484,10 +484,13 @@ def window(
484484
def abs(arg: Expr) -> Expr:
485485
"""Return the absolute value of a given number.
486486
487-
Returns:
488-
--------
489-
Expr
490-
A new expression representing the absolute value of the input expression.
487+
Examples:
488+
---------
489+
>>> ctx = dfn.SessionContext()
490+
>>> df = ctx.from_pydict({"a": [-1, 0, 1]})
491+
>>> result = df.select(dfn.functions.abs(dfn.col("a")).alias("abs"))
492+
>>> result.collect_column("abs")[0].as_py()
493+
1
491494
"""
492495
return Expr(f.abs(arg.expr))
493496

@@ -607,12 +610,30 @@ def btrim(arg: Expr) -> Expr:
607610

608611

609612
def cbrt(arg: Expr) -> Expr:
610-
"""Returns the cube root of a number."""
613+
"""Returns the cube root of a number.
614+
615+
Examples:
616+
---------
617+
>>> ctx = dfn.SessionContext()
618+
>>> df = ctx.from_pydict({"a": [27]})
619+
>>> cbrt_df = df.select(dfn.functions.cbrt(dfn.col("a")).alias("cbrt"))
620+
>>> cbrt_df.collect_column("cbrt")[0].as_py()
621+
3.0
622+
"""
611623
return Expr(f.cbrt(arg.expr))
612624

613625

614626
def ceil(arg: Expr) -> Expr:
615-
"""Returns the nearest integer greater than or equal to argument."""
627+
"""Returns the nearest integer greater than or equal to argument.
628+
629+
Examples:
630+
---------
631+
>>> ctx = dfn.SessionContext()
632+
>>> df = ctx.from_pydict({"a": [1.9]})
633+
>>> floor_df = df.select(dfn.functions.ceil(dfn.col("a")).alias("ceil"))
634+
>>> floor_df.collect_column("ceil")[0].as_py()
635+
2.0
636+
"""
616637
return Expr(f.ceil(arg.expr))
617638

618639

@@ -711,12 +732,32 @@ def ends_with(arg: Expr, suffix: Expr) -> Expr:
711732

712733

713734
def exp(arg: Expr) -> Expr:
714-
"""Returns the exponential of the argument."""
735+
"""Returns the exponential of the argument.
736+
737+
Examples:
738+
---------
739+
>>> ctx = dfn.SessionContext()
740+
>>> df = ctx.from_pydict({"a": [0.0]})
741+
>>> result = df.select(dfn.functions.exp(dfn.col("a")).alias("exp"))
742+
>>> result.collect_column("exp")[0].as_py()
743+
1.0
744+
"""
715745
return Expr(f.exp(arg.expr))
716746

717747

718748
def factorial(arg: Expr) -> Expr:
719-
"""Returns the factorial of the argument."""
749+
"""Returns the factorial of the argument.
750+
751+
Examples:
752+
---------
753+
>>> ctx = dfn.SessionContext()
754+
>>> df = ctx.from_pydict({"a": [3]})
755+
>>> result = df.select(
756+
... dfn.functions.factorial(dfn.col("a")).alias("factorial")
757+
... )
758+
>>> result.collect_column("factorial")[0].as_py()
759+
6
760+
"""
720761
return Expr(f.factorial(arg.expr))
721762

722763

@@ -732,12 +773,30 @@ def find_in_set(string: Expr, string_list: Expr) -> Expr:
732773

733774

734775
def floor(arg: Expr) -> Expr:
735-
"""Returns the nearest integer less than or equal to the argument."""
776+
"""Returns the nearest integer less than or equal to the argument.
777+
778+
Examples:
779+
---------
780+
>>> ctx = dfn.SessionContext()
781+
>>> df = ctx.from_pydict({"a": [1.9]})
782+
>>> floor_df = df.select(dfn.functions.floor(dfn.col("a")).alias("floor"))
783+
>>> floor_df.collect_column("floor")[0].as_py()
784+
1.0
785+
"""
736786
return Expr(f.floor(arg.expr))
737787

738788

739789
def gcd(x: Expr, y: Expr) -> Expr:
740-
"""Returns the greatest common divisor."""
790+
"""Returns the greatest common divisor.
791+
792+
Examples:
793+
---------
794+
>>> ctx = dfn.SessionContext()
795+
>>> df = ctx.from_pydict({"a": [12], "b": [8]})
796+
>>> result = df.select(dfn.functions.gcd(dfn.col("a"), dfn.col("b")).alias("gcd"))
797+
>>> result.collect_column("gcd")[0].as_py()
798+
4
799+
"""
741800
return Expr(f.gcd(x.expr, y.expr))
742801

743802

@@ -759,12 +818,30 @@ def instr(string: Expr, substring: Expr) -> Expr:
759818

760819

761820
def iszero(arg: Expr) -> Expr:
762-
"""Returns true if a given number is +0.0 or -0.0 otherwise returns false."""
821+
"""Returns true if a given number is +0.0 or -0.0 otherwise returns false.
822+
823+
Examples:
824+
---------
825+
>>> ctx = dfn.SessionContext()
826+
>>> df = ctx.from_pydict({"a": [0.0, 1.0]})
827+
>>> result = df.select(dfn.functions.iszero(dfn.col("a")).alias("iz"))
828+
>>> result.collect_column("iz")[0].as_py()
829+
True
830+
"""
763831
return Expr(f.iszero(arg.expr))
764832

765833

766834
def lcm(x: Expr, y: Expr) -> Expr:
767-
"""Returns the least common multiple."""
835+
"""Returns the least common multiple.
836+
837+
Examples:
838+
---------
839+
>>> ctx = dfn.SessionContext()
840+
>>> df = ctx.from_pydict({"a": [4], "b": [6]})
841+
>>> result = df.select(dfn.functions.lcm(dfn.col("a"), dfn.col("b")).alias("lcm"))
842+
>>> result.collect_column("lcm")[0].as_py()
843+
12
844+
"""
768845
return Expr(f.lcm(x.expr, y.expr))
769846

770847

@@ -779,22 +856,58 @@ def levenshtein(string1: Expr, string2: Expr) -> Expr:
779856

780857

781858
def ln(arg: Expr) -> Expr:
782-
"""Returns the natural logarithm (base e) of the argument."""
859+
"""Returns the natural logarithm (base e) of the argument.
860+
861+
Examples:
862+
---------
863+
>>> ctx = dfn.SessionContext()
864+
>>> df = ctx.from_pydict({"a": [1.0]})
865+
>>> result = df.select(dfn.functions.ln(dfn.col("a")).alias("ln"))
866+
>>> result.collect_column("ln")[0].as_py()
867+
0.0
868+
"""
783869
return Expr(f.ln(arg.expr))
784870

785871

786872
def log(base: Expr, num: Expr) -> Expr:
787-
"""Returns the logarithm of a number for a particular ``base``."""
873+
"""Returns the logarithm of a number for a particular ``base``.
874+
875+
Examples:
876+
---------
877+
>>> ctx = dfn.SessionContext()
878+
>>> df = ctx.from_pydict({"a": [100.0]})
879+
>>> result = df.select(dfn.functions.log(dfn.lit(10.0), dfn.col("a")).alias("log"))
880+
>>> result.collect_column("log")[0].as_py()
881+
2.0
882+
"""
788883
return Expr(f.log(base.expr, num.expr))
789884

790885

791886
def log10(arg: Expr) -> Expr:
792-
"""Base 10 logarithm of the argument."""
887+
"""Base 10 logarithm of the argument.
888+
889+
Examples:
890+
---------
891+
>>> ctx = dfn.SessionContext()
892+
>>> df = ctx.from_pydict({"a": [100.0]})
893+
>>> result = df.select(dfn.functions.log10(dfn.col("a")).alias("log10"))
894+
>>> result.collect_column("log10")[0].as_py()
895+
2.0
896+
"""
793897
return Expr(f.log10(arg.expr))
794898

795899

796900
def log2(arg: Expr) -> Expr:
797-
"""Base 2 logarithm of the argument."""
901+
"""Base 2 logarithm of the argument.
902+
903+
Examples:
904+
---------
905+
>>> ctx = dfn.SessionContext()
906+
>>> df = ctx.from_pydict({"a": [8.0]})
907+
>>> result = df.select(dfn.functions.log2(dfn.col("a")).alias("log2"))
908+
>>> result.collect_column("log2")[0].as_py()
909+
3.0
910+
"""
798911
return Expr(f.log2(arg.expr))
799912

800913

@@ -825,7 +938,19 @@ def md5(arg: Expr) -> Expr:
825938

826939

827940
def nanvl(x: Expr, y: Expr) -> Expr:
828-
"""Returns ``x`` if ``x`` is not ``NaN``. Otherwise returns ``y``."""
941+
"""Returns ``x`` if ``x`` is not ``NaN``. Otherwise returns ``y``.
942+
943+
Examples:
944+
---------
945+
>>> ctx = dfn.SessionContext()
946+
>>> df = ctx.from_pydict({"a": [np.nan, 1.0], "b": [0.0, 0.0]})
947+
>>> nanvl_df = df.select(
948+
... dfn.functions.nanvl(dfn.col("a"), dfn.col("b")).alias("nanvl"))
949+
>>> nanvl_df.collect_column("nanvl")[0].as_py()
950+
0.0
951+
>>> nanvl_df.collect_column("nanvl")[1].as_py()
952+
1.0
953+
"""
829954
return Expr(f.nanvl(x.expr, y.expr))
830955

831956

@@ -853,7 +978,21 @@ def overlay(
853978

854979

855980
def pi() -> Expr:
856-
"""Returns an approximate value of π."""
981+
"""Returns an approximate value of π.
982+
983+
Examples:
984+
---------
985+
>>> ctx = dfn.SessionContext()
986+
>>> df = ctx.from_pydict({"a": [1]})
987+
>>> import builtins
988+
>>> result = df.select(
989+
... dfn.functions.pi().alias("pi")
990+
... )
991+
>>> builtins.round(
992+
... result.collect_column("pi")[0].as_py(), 5
993+
... )
994+
3.14159
995+
"""
857996
return Expr(f.pi())
858997

859998

@@ -866,14 +1005,31 @@ def position(string: Expr, substring: Expr) -> Expr:
8661005

8671006

8681007
def power(base: Expr, exponent: Expr) -> Expr:
869-
"""Returns ``base`` raised to the power of ``exponent``."""
1008+
"""Returns ``base`` raised to the power of ``exponent``.
1009+
1010+
Examples:
1011+
---------
1012+
>>> ctx = dfn.SessionContext()
1013+
>>> df = ctx.from_pydict({"a": [2.0]})
1014+
>>> result = df.select(dfn.functions.power(dfn.col("a"), dfn.lit(3.0)).alias("pow"))
1015+
>>> result.collect_column("pow")[0].as_py()
1016+
8.0
1017+
"""
8701018
return Expr(f.power(base.expr, exponent.expr))
8711019

8721020

8731021
def pow(base: Expr, exponent: Expr) -> Expr:
8741022
"""Returns ``base`` raised to the power of ``exponent``.
8751023
8761024
This is an alias of :py:func:`power`.
1025+
1026+
Examples:
1027+
---------
1028+
>>> ctx = dfn.SessionContext()
1029+
>>> df = ctx.from_pydict({"a": [3.0]})
1030+
>>> result = df.select(dfn.functions.pow(dfn.col("a"), dfn.lit(2.0)).alias("pow"))
1031+
>>> result.collect_column("pow")[0].as_py()
1032+
9.0
8771033
"""
8781034
return power(base, exponent)
8791035

@@ -1008,6 +1164,14 @@ def round(value: Expr, decimal_places: Expr | None = None) -> Expr:
10081164
If the optional ``decimal_places`` is specified, round to the nearest number of
10091165
decimal places. You can specify a negative number of decimal places. For example
10101166
``round(lit(125.2345), lit(-2))`` would yield a value of ``100.0``.
1167+
1168+
Examples:
1169+
---------
1170+
>>> ctx = dfn.SessionContext()
1171+
>>> df = ctx.from_pydict({"a": [1.567]})
1172+
>>> result = df.select(dfn.functions.round(dfn.col("a"), dfn.lit(2)).alias("r"))
1173+
>>> result.collect_column("r")[0].as_py()
1174+
1.57
10111175
"""
10121176
if decimal_places is None:
10131177
decimal_places = Expr.literal(0)
@@ -1050,7 +1214,16 @@ def sha512(arg: Expr) -> Expr:
10501214

10511215

10521216
def signum(arg: Expr) -> Expr:
1053-
"""Returns the sign of the argument (-1, 0, +1)."""
1217+
"""Returns the sign of the argument (-1, 0, +1).
1218+
1219+
Examples:
1220+
---------
1221+
>>> ctx = dfn.SessionContext()
1222+
>>> df = ctx.from_pydict({"a": [-5.0, 0.0, 5.0]})
1223+
>>> result = df.select(dfn.functions.signum(dfn.col("a")).alias("s"))
1224+
>>> result.collect_column("s").to_pylist()
1225+
[-1.0, 0.0, 1.0]
1226+
"""
10541227
return Expr(f.signum(arg.expr))
10551228

10561229

@@ -1092,7 +1265,16 @@ def split_part(string: Expr, delimiter: Expr, index: Expr) -> Expr:
10921265

10931266

10941267
def sqrt(arg: Expr) -> Expr:
1095-
"""Returns the square root of the argument."""
1268+
"""Returns the square root of the argument.
1269+
1270+
Examples:
1271+
---------
1272+
>>> ctx = dfn.SessionContext()
1273+
>>> df = ctx.from_pydict({"a": [9.0]})
1274+
>>> result = df.select(dfn.functions.sqrt(dfn.col("a")).alias("sqrt"))
1275+
>>> result.collect_column("sqrt")[0].as_py()
1276+
3.0
1277+
"""
10961278
return Expr(f.sqrt(arg.expr))
10971279

10981280

@@ -1331,7 +1513,16 @@ def trim(arg: Expr) -> Expr:
13311513

13321514

13331515
def trunc(num: Expr, precision: Expr | None = None) -> Expr:
1334-
"""Truncate the number toward zero with optional precision."""
1516+
"""Truncate the number toward zero with optional precision.
1517+
1518+
Examples:
1519+
---------
1520+
>>> ctx = dfn.SessionContext()
1521+
>>> df = ctx.from_pydict({"a": [1.567]})
1522+
>>> result = df.select(dfn.functions.trunc(dfn.col("a")).alias("t"))
1523+
>>> result.collect_column("t")[0].as_py()
1524+
1.0
1525+
"""
13351526
if precision is not None:
13361527
return Expr(f.trunc(num.expr, precision.expr))
13371528
return Expr(f.trunc(num.expr))
@@ -1408,7 +1599,19 @@ def arrow_cast(expr: Expr, data_type: Expr) -> Expr:
14081599

14091600

14101601
def random() -> Expr:
1411-
"""Returns a random value in the range ``0.0 <= x < 1.0``."""
1602+
"""Returns a random value in the range ``0.0 <= x < 1.0``.
1603+
1604+
Examples:
1605+
---------
1606+
>>> ctx = dfn.SessionContext()
1607+
>>> df = ctx.from_pydict({"a": [1]})
1608+
>>> result = df.select(
1609+
... dfn.functions.random().alias("r")
1610+
... )
1611+
>>> val = result.collect_column("r")[0].as_py()
1612+
>>> 0.0 <= val < 1.0
1613+
True
1614+
"""
14121615
return Expr(f.random())
14131616

14141617

0 commit comments

Comments
 (0)