@@ -637,7 +637,17 @@ def chr(arg: Expr) -> Expr:
637637
638638
639639def coalesce (* args : Expr ) -> Expr :
640- """Returns the value of the first expr in ``args`` which is not NULL."""
640+ """Returns the value of the first expr in ``args`` which is not NULL.
641+
642+ Examples:
643+ ---------
644+ >>> ctx = dfn.SessionContext()
645+ >>> df = ctx.from_pydict({"a": [None, 1], "b": [2, 3]})
646+ >>> result = df.select(
647+ ... dfn.functions.coalesce(dfn.col("a"), dfn.col("b")).alias("c"))
648+ >>> result.collect_column("c")[0].as_py()
649+ 2
650+ """
641651 args = [arg .expr for arg in args ]
642652 return Expr (f .coalesce (* args ))
643653
@@ -820,7 +830,16 @@ def ltrim(arg: Expr) -> Expr:
820830
821831
822832def md5 (arg : Expr ) -> Expr :
823- """Computes an MD5 128-bit checksum for a string expression."""
833+ """Computes an MD5 128-bit checksum for a string expression.
834+
835+ Examples:
836+ ---------
837+ >>> ctx = dfn.SessionContext()
838+ >>> df = ctx.from_pydict({"a": ["hello"]})
839+ >>> result = df.select(dfn.functions.md5(dfn.col("a")).alias("md5"))
840+ >>> result.collect_column("md5")[0].as_py()
841+ '5d41402abc4b2a76b9719d911017c592'
842+ """
824843 return Expr (f .md5 (arg .expr ))
825844
826845
@@ -830,7 +849,18 @@ def nanvl(x: Expr, y: Expr) -> Expr:
830849
831850
832851def nvl (x : Expr , y : Expr ) -> Expr :
833- """Returns ``x`` if ``x`` is not ``NULL``. Otherwise returns ``y``."""
852+ """Returns ``x`` if ``x`` is not ``NULL``. Otherwise returns ``y``.
853+
854+ Examples:
855+ ---------
856+ >>> ctx = dfn.SessionContext()
857+ >>> df = ctx.from_pydict({"a": [None, 1], "b": [0, 0]})
858+ >>> nvl_df = df.select(dfn.functions.nvl(dfn.col("a"), dfn.col("b")).alias("nvl"))
859+ >>> nvl_df.collect_column("nvl")[0].as_py()
860+ 0
861+ >>> nvl_df.collect_column("nvl")[1].as_py()
862+ 1
863+ """
834864 return Expr (f .nvl (x .expr , y .expr ))
835865
836866
@@ -899,21 +929,45 @@ def radians(arg: Expr) -> Expr:
899929
900930
901931def regexp_like (string : Expr , regex : Expr , flags : Expr | None = None ) -> Expr :
902- """Find if any regular expression (regex) matches exist.
932+ r """Find if any regular expression (regex) matches exist.
903933
904934 Tests a string using a regular expression returning true if at least one match,
905935 false otherwise.
936+
937+ Examples:
938+ ---------
939+ >>> ctx = dfn.SessionContext()
940+ >>> df = ctx.from_pydict({"a": ["hello123"]})
941+ >>> result = df.select(
942+ ... dfn.functions.regexp_like(
943+ ... dfn.col("a"), dfn.lit("\\d+")
944+ ... ).alias("m")
945+ ... )
946+ >>> result.collect_column("m")[0].as_py()
947+ True
906948 """
907949 if flags is not None :
908950 flags = flags .expr
909951 return Expr (f .regexp_like (string .expr , regex .expr , flags ))
910952
911953
912954def regexp_match (string : Expr , regex : Expr , flags : Expr | None = None ) -> Expr :
913- """Perform regular expression (regex) matching.
955+ r """Perform regular expression (regex) matching.
914956
915957 Returns an array with each element containing the leftmost-first match of the
916958 corresponding index in ``regex`` to string in ``string``.
959+
960+ Examples:
961+ ---------
962+ >>> ctx = dfn.SessionContext()
963+ >>> df = ctx.from_pydict({"a": ["hello 42 world"]})
964+ >>> result = df.select(
965+ ... dfn.functions.regexp_match(
966+ ... dfn.col("a"), dfn.lit("(\\d+)")
967+ ... ).alias("m")
968+ ... )
969+ >>> result.collect_column("m")[0].as_py()
970+ ['42']
917971 """
918972 if flags is not None :
919973 flags = flags .expr
@@ -923,13 +977,26 @@ def regexp_match(string: Expr, regex: Expr, flags: Expr | None = None) -> Expr:
923977def regexp_replace (
924978 string : Expr , pattern : Expr , replacement : Expr , flags : Expr | None = None
925979) -> Expr :
926- """Replaces substring(s) matching a PCRE-like regular expression.
980+ r """Replaces substring(s) matching a PCRE-like regular expression.
927981
928982 The full list of supported features and syntax can be found at
929983 <https://docs.rs/regex/latest/regex/#syntax>
930984
931985 Supported flags with the addition of 'g' can be found at
932986 <https://docs.rs/regex/latest/regex/#grouping-and-flags>
987+
988+ Examples:
989+ ---------
990+ >>> ctx = dfn.SessionContext()
991+ >>> df = ctx.from_pydict({"a": ["hello 42"]})
992+ >>> result = df.select(
993+ ... dfn.functions.regexp_replace(
994+ ... dfn.col("a"), dfn.lit("\\d+"),
995+ ... dfn.lit("XX")
996+ ... ).alias("r")
997+ ... )
998+ >>> result.collect_column("r")[0].as_py()
999+ 'hello XX'
9331000 """
9341001 if flags is not None :
9351002 flags = flags .expr
@@ -943,6 +1010,15 @@ def regexp_count(
9431010
9441011 Optional start position (the first position is 1) to search for the regular
9451012 expression.
1013+
1014+ Examples:
1015+ ---------
1016+ >>> ctx = dfn.SessionContext()
1017+ >>> df = ctx.from_pydict({"a": ["abcabc"]})
1018+ >>> result = df.select(
1019+ ... dfn.functions.regexp_count(dfn.col("a"), dfn.lit("abc")).alias("c"))
1020+ >>> result.collect_column("c")[0].as_py()
1021+ 2
9461022 """
9471023 if flags is not None :
9481024 flags = flags .expr
@@ -958,12 +1034,24 @@ def regexp_instr(
9581034 flags : Expr | None = None ,
9591035 sub_expr : Expr | None = None ,
9601036) -> Expr :
961- """Returns the position of a regular expression match in a string.
1037+ r """Returns the position of a regular expression match in a string.
9621038
9631039 Searches ``values`` for the ``n``-th occurrence of ``regex``, starting at position
9641040 ``start`` (the first position is 1). Returns the starting or ending position based
9651041 on ``end_position``. Use ``flags`` to control regex behavior and ``sub_expr`` to
9661042 return the position of a specific capture group instead of the entire match.
1043+
1044+ Examples:
1045+ ---------
1046+ >>> ctx = dfn.SessionContext()
1047+ >>> df = ctx.from_pydict({"a": ["hello 42 world"]})
1048+ >>> result = df.select(
1049+ ... dfn.functions.regexp_instr(
1050+ ... dfn.col("a"), dfn.lit("\\d+")
1051+ ... ).alias("pos")
1052+ ... )
1053+ >>> result.collect_column("pos")[0].as_py()
1054+ 7
9671055 """
9681056 start = start .expr if start is not None else None
9691057 n = n .expr if n is not None else None
@@ -1030,22 +1118,66 @@ def rtrim(arg: Expr) -> Expr:
10301118
10311119
10321120def sha224 (arg : Expr ) -> Expr :
1033- """Computes the SHA-224 hash of a binary string."""
1121+ """Computes the SHA-224 hash of a binary string.
1122+
1123+ Examples:
1124+ ---------
1125+ >>> ctx = dfn.SessionContext()
1126+ >>> df = ctx.from_pydict({"a": ["hello"]})
1127+ >>> result = df.select(
1128+ ... dfn.functions.sha224(dfn.col("a")).alias("h")
1129+ ... )
1130+ >>> len(result.collect_column("h")[0].as_py()) > 0
1131+ True
1132+ """
10341133 return Expr (f .sha224 (arg .expr ))
10351134
10361135
10371136def sha256 (arg : Expr ) -> Expr :
1038- """Computes the SHA-256 hash of a binary string."""
1137+ """Computes the SHA-256 hash of a binary string.
1138+
1139+ Examples:
1140+ ---------
1141+ >>> ctx = dfn.SessionContext()
1142+ >>> df = ctx.from_pydict({"a": ["hello"]})
1143+ >>> result = df.select(
1144+ ... dfn.functions.sha256(dfn.col("a")).alias("h")
1145+ ... )
1146+ >>> len(result.collect_column("h")[0].as_py()) > 0
1147+ True
1148+ """
10391149 return Expr (f .sha256 (arg .expr ))
10401150
10411151
10421152def sha384 (arg : Expr ) -> Expr :
1043- """Computes the SHA-384 hash of a binary string."""
1153+ """Computes the SHA-384 hash of a binary string.
1154+
1155+ Examples:
1156+ ---------
1157+ >>> ctx = dfn.SessionContext()
1158+ >>> df = ctx.from_pydict({"a": ["hello"]})
1159+ >>> result = df.select(
1160+ ... dfn.functions.sha384(dfn.col("a")).alias("h")
1161+ ... )
1162+ >>> len(result.collect_column("h")[0].as_py()) > 0
1163+ True
1164+ """
10441165 return Expr (f .sha384 (arg .expr ))
10451166
10461167
10471168def sha512 (arg : Expr ) -> Expr :
1048- """Computes the SHA-512 hash of a binary string."""
1169+ """Computes the SHA-512 hash of a binary string.
1170+
1171+ Examples:
1172+ ---------
1173+ >>> ctx = dfn.SessionContext()
1174+ >>> df = ctx.from_pydict({"a": ["hello"]})
1175+ >>> result = df.select(
1176+ ... dfn.functions.sha512(dfn.col("a")).alias("h")
1177+ ... )
1178+ >>> len(result.collect_column("h")[0].as_py()) > 0
1179+ True
1180+ """
10491181 return Expr (f .sha512 (arg .expr ))
10501182
10511183
@@ -1370,18 +1502,55 @@ def range(start: Expr, stop: Expr, step: Expr) -> Expr:
13701502
13711503
13721504def uuid () -> Expr :
1373- """Returns uuid v4 as a string value."""
1505+ """Returns uuid v4 as a string value.
1506+
1507+ Examples:
1508+ ---------
1509+ >>> ctx = dfn.SessionContext()
1510+ >>> df = ctx.from_pydict({"a": [1]})
1511+ >>> result = df.select(
1512+ ... dfn.functions.uuid().alias("u")
1513+ ... )
1514+ >>> len(result.collect_column("u")[0].as_py()) == 36
1515+ True
1516+ """
13741517 return Expr (f .uuid ())
13751518
13761519
13771520def struct (* args : Expr ) -> Expr :
1378- """Returns a struct with the given arguments."""
1521+ """Returns a struct with the given arguments.
1522+
1523+ Examples:
1524+ ---------
1525+ >>> ctx = dfn.SessionContext()
1526+ >>> df = ctx.from_pydict({"a": [1], "b": [2]})
1527+ >>> result = df.select(
1528+ ... dfn.functions.struct(
1529+ ... dfn.col("a"), dfn.col("b")
1530+ ... ).alias("s")
1531+ ... )
1532+ >>> result.collect_column("s")[0].as_py() == {"c0": 1, "c1": 2}
1533+ True
1534+ """
13791535 args = [arg .expr for arg in args ]
13801536 return Expr (f .struct (* args ))
13811537
13821538
13831539def named_struct (name_pairs : list [tuple [str , Expr ]]) -> Expr :
1384- """Returns a struct with the given names and arguments pairs."""
1540+ """Returns a struct with the given names and arguments pairs.
1541+
1542+ Examples:
1543+ ---------
1544+ >>> ctx = dfn.SessionContext()
1545+ >>> df = ctx.from_pydict({"a": [1]})
1546+ >>> result = df.select(
1547+ ... dfn.functions.named_struct(
1548+ ... [("x", dfn.lit(10)), ("y", dfn.lit(20))]
1549+ ... ).alias("s")
1550+ ... )
1551+ >>> result.collect_column("s")[0].as_py() == {"x": 10, "y": 20}
1552+ True
1553+ """
13851554 name_pair_exprs = [
13861555 [Expr .literal (pa .scalar (pair [0 ], type = pa .string ())), pair [1 ]]
13871556 for pair in name_pairs
@@ -1398,12 +1567,31 @@ def from_unixtime(arg: Expr) -> Expr:
13981567
13991568
14001569def arrow_typeof (arg : Expr ) -> Expr :
1401- """Returns the Arrow type of the expression."""
1570+ """Returns the Arrow type of the expression.
1571+
1572+ Examples:
1573+ ---------
1574+ >>> ctx = dfn.SessionContext()
1575+ >>> df = ctx.from_pydict({"a": [1]})
1576+ >>> result = df.select(dfn.functions.arrow_typeof(dfn.col("a")).alias("t"))
1577+ >>> result.collect_column("t")[0].as_py()
1578+ 'Int64'
1579+ """
14021580 return Expr (f .arrow_typeof (arg .expr ))
14031581
14041582
14051583def arrow_cast (expr : Expr , data_type : Expr ) -> Expr :
1406- """Casts an expression to a specified data type."""
1584+ """Casts an expression to a specified data type.
1585+
1586+ Examples:
1587+ ---------
1588+ >>> ctx = dfn.SessionContext()
1589+ >>> df = ctx.from_pydict({"a": [1]})
1590+ >>> data_type = dfn.string_literal("Float64")).alias("c")
1591+ >>> result = df.select(dfn.functions.arrow_cast(dfn.col("a"), data_type).alias("c"))
1592+ >>> result.collect_column("c")[0].as_py()
1593+ 1.0
1594+ """
14071595 return Expr (f .arrow_cast (expr .expr , data_type .expr ))
14081596
14091597
0 commit comments