@@ -637,7 +637,18 @@ def chr(arg: Expr) -> Expr:
637637
638638
639639def coalesce (* args : Expr ) -> Expr :
640- """Returns the value of the first expr in ``args`` which is not NULL."""
640+ """Returns the value of the first expr in ``args`` which is not NULL.
641+
642+ Examples:
643+ ---------
644+ >>> ctx = dfn.SessionContext()
645+ >>> df = ctx.from_pydict({"a": [None, 1], "b": [2, 3]})
646+ >>> result = df.select(
647+ ... dfn.functions.coalesce(dfn.col("a"), dfn.col("b")).alias("c"))
648+ >>> result = result
649+ >>> result.collect_column("c")[0].as_py()
650+ 2
651+ """
641652 args = [arg .expr for arg in args ]
642653 return Expr (f .coalesce (* args ))
643654
@@ -820,7 +831,16 @@ def ltrim(arg: Expr) -> Expr:
820831
821832
822833def md5 (arg : Expr ) -> Expr :
823- """Computes an MD5 128-bit checksum for a string expression."""
834+ """Computes an MD5 128-bit checksum for a string expression.
835+
836+ Examples:
837+ ---------
838+ >>> ctx = dfn.SessionContext()
839+ >>> df = ctx.from_pydict({"a": ["hello"]})
840+ >>> result = df.select(dfn.functions.md5(dfn.col("a")).alias("md5"))
841+ >>> result.collect_column("md5")[0].as_py()
842+ '5d41402abc4b2a76b9719d911017c592'
843+ """
824844 return Expr (f .md5 (arg .expr ))
825845
826846
@@ -830,7 +850,18 @@ def nanvl(x: Expr, y: Expr) -> Expr:
830850
831851
832852def nvl (x : Expr , y : Expr ) -> Expr :
833- """Returns ``x`` if ``x`` is not ``NULL``. Otherwise returns ``y``."""
853+ """Returns ``x`` if ``x`` is not ``NULL``. Otherwise returns ``y``.
854+
855+ Examples:
856+ ---------
857+ >>> ctx = dfn.SessionContext()
858+ >>> df = ctx.from_pydict({"a": [None, 1], "b": [0, 0]})
859+ >>> nvl_df = df.select(dfn.functions.nvl(dfn.col("a"), dfn.col("b")).alias("nvl"))
860+ >>> nvl_df.collect_column("nvl")[0].as_py()
861+ 0
862+ >>> nvl_df.collect_column("nvl")[1].as_py()
863+ 1
864+ """
834865 return Expr (f .nvl (x .expr , y .expr ))
835866
836867
@@ -899,21 +930,45 @@ def radians(arg: Expr) -> Expr:
899930
900931
901932def regexp_like (string : Expr , regex : Expr , flags : Expr | None = None ) -> Expr :
902- """Find if any regular expression (regex) matches exist.
933+ r """Find if any regular expression (regex) matches exist.
903934
904935 Tests a string using a regular expression returning true if at least one match,
905936 false otherwise.
937+
938+ Examples:
939+ ---------
940+ >>> ctx = dfn.SessionContext()
941+ >>> df = ctx.from_pydict({"a": ["hello123"]})
942+ >>> result = df.select(
943+ ... dfn.functions.regexp_like(
944+ ... dfn.col("a"), dfn.lit("\\d+")
945+ ... ).alias("m")
946+ ... )
947+ >>> result.collect_column("m")[0].as_py()
948+ True
906949 """
907950 if flags is not None :
908951 flags = flags .expr
909952 return Expr (f .regexp_like (string .expr , regex .expr , flags ))
910953
911954
912955def regexp_match (string : Expr , regex : Expr , flags : Expr | None = None ) -> Expr :
913- """Perform regular expression (regex) matching.
956+ r """Perform regular expression (regex) matching.
914957
915958 Returns an array with each element containing the leftmost-first match of the
916959 corresponding index in ``regex`` to string in ``string``.
960+
961+ Examples:
962+ ---------
963+ >>> ctx = dfn.SessionContext()
964+ >>> df = ctx.from_pydict({"a": ["hello 42 world"]})
965+ >>> result = df.select(
966+ ... dfn.functions.regexp_match(
967+ ... dfn.col("a"), dfn.lit("(\\d+)")
968+ ... ).alias("m")
969+ ... )
970+ >>> result.collect_column("m")[0].as_py()
971+ ['42']
917972 """
918973 if flags is not None :
919974 flags = flags .expr
@@ -923,13 +978,26 @@ def regexp_match(string: Expr, regex: Expr, flags: Expr | None = None) -> Expr:
923978def regexp_replace (
924979 string : Expr , pattern : Expr , replacement : Expr , flags : Expr | None = None
925980) -> Expr :
926- """Replaces substring(s) matching a PCRE-like regular expression.
981+ r """Replaces substring(s) matching a PCRE-like regular expression.
927982
928983 The full list of supported features and syntax can be found at
929984 <https://docs.rs/regex/latest/regex/#syntax>
930985
931986 Supported flags with the addition of 'g' can be found at
932987 <https://docs.rs/regex/latest/regex/#grouping-and-flags>
988+
989+ Examples:
990+ ---------
991+ >>> ctx = dfn.SessionContext()
992+ >>> df = ctx.from_pydict({"a": ["hello 42"]})
993+ >>> result = df.select(
994+ ... dfn.functions.regexp_replace(
995+ ... dfn.col("a"), dfn.lit("\\d+"),
996+ ... dfn.lit("XX")
997+ ... ).alias("r")
998+ ... )
999+ >>> result.collect_column("r")[0].as_py()
1000+ 'hello XX'
9331001 """
9341002 if flags is not None :
9351003 flags = flags .expr
@@ -943,6 +1011,16 @@ def regexp_count(
9431011
9441012 Optional start position (the first position is 1) to search for the regular
9451013 expression.
1014+
1015+ Examples:
1016+ ---------
1017+ >>> ctx = dfn.SessionContext()
1018+ >>> df = ctx.from_pydict({"a": ["abcabc"]})
1019+ >>> result = df.select(
1020+ ... dfn.functions.regexp_count(dfn.col("a"), dfn.lit("abc")).alias("c"))
1021+ >>> result = result
1022+ >>> result.collect_column("c")[0].as_py()
1023+ 2
9461024 """
9471025 if flags is not None :
9481026 flags = flags .expr
@@ -958,12 +1036,24 @@ def regexp_instr(
9581036 flags : Expr | None = None ,
9591037 sub_expr : Expr | None = None ,
9601038) -> Expr :
961- """Returns the position of a regular expression match in a string.
1039+ r """Returns the position of a regular expression match in a string.
9621040
9631041 Searches ``values`` for the ``n``-th occurrence of ``regex``, starting at position
9641042 ``start`` (the first position is 1). Returns the starting or ending position based
9651043 on ``end_position``. Use ``flags`` to control regex behavior and ``sub_expr`` to
9661044 return the position of a specific capture group instead of the entire match.
1045+
1046+ Examples:
1047+ ---------
1048+ >>> ctx = dfn.SessionContext()
1049+ >>> df = ctx.from_pydict({"a": ["hello 42 world"]})
1050+ >>> result = df.select(
1051+ ... dfn.functions.regexp_instr(
1052+ ... dfn.col("a"), dfn.lit("\\d+")
1053+ ... ).alias("pos")
1054+ ... )
1055+ >>> result.collect_column("pos")[0].as_py()
1056+ 7
9671057 """
9681058 start = start .expr if start is not None else None
9691059 n = n .expr if n is not None else None
@@ -1030,22 +1120,66 @@ def rtrim(arg: Expr) -> Expr:
10301120
10311121
10321122def sha224 (arg : Expr ) -> Expr :
1033- """Computes the SHA-224 hash of a binary string."""
1123+ """Computes the SHA-224 hash of a binary string.
1124+
1125+ Examples:
1126+ ---------
1127+ >>> ctx = dfn.SessionContext()
1128+ >>> df = ctx.from_pydict({"a": ["hello"]})
1129+ >>> result = df.select(
1130+ ... dfn.functions.sha224(dfn.col("a")).alias("h")
1131+ ... )
1132+ >>> len(result.collect_column("h")[0].as_py()) > 0
1133+ True
1134+ """
10341135 return Expr (f .sha224 (arg .expr ))
10351136
10361137
10371138def sha256 (arg : Expr ) -> Expr :
1038- """Computes the SHA-256 hash of a binary string."""
1139+ """Computes the SHA-256 hash of a binary string.
1140+
1141+ Examples:
1142+ ---------
1143+ >>> ctx = dfn.SessionContext()
1144+ >>> df = ctx.from_pydict({"a": ["hello"]})
1145+ >>> result = df.select(
1146+ ... dfn.functions.sha256(dfn.col("a")).alias("h")
1147+ ... )
1148+ >>> len(result.collect_column("h")[0].as_py()) > 0
1149+ True
1150+ """
10391151 return Expr (f .sha256 (arg .expr ))
10401152
10411153
10421154def sha384 (arg : Expr ) -> Expr :
1043- """Computes the SHA-384 hash of a binary string."""
1155+ """Computes the SHA-384 hash of a binary string.
1156+
1157+ Examples:
1158+ ---------
1159+ >>> ctx = dfn.SessionContext()
1160+ >>> df = ctx.from_pydict({"a": ["hello"]})
1161+ >>> result = df.select(
1162+ ... dfn.functions.sha384(dfn.col("a")).alias("h")
1163+ ... )
1164+ >>> len(result.collect_column("h")[0].as_py()) > 0
1165+ True
1166+ """
10441167 return Expr (f .sha384 (arg .expr ))
10451168
10461169
10471170def sha512 (arg : Expr ) -> Expr :
1048- """Computes the SHA-512 hash of a binary string."""
1171+ """Computes the SHA-512 hash of a binary string.
1172+
1173+ Examples:
1174+ ---------
1175+ >>> ctx = dfn.SessionContext()
1176+ >>> df = ctx.from_pydict({"a": ["hello"]})
1177+ >>> result = df.select(
1178+ ... dfn.functions.sha512(dfn.col("a")).alias("h")
1179+ ... )
1180+ >>> len(result.collect_column("h")[0].as_py()) > 0
1181+ True
1182+ """
10491183 return Expr (f .sha512 (arg .expr ))
10501184
10511185
@@ -1370,18 +1504,55 @@ def range(start: Expr, stop: Expr, step: Expr) -> Expr:
13701504
13711505
13721506def uuid () -> Expr :
1373- """Returns uuid v4 as a string value."""
1507+ """Returns uuid v4 as a string value.
1508+
1509+ Examples:
1510+ ---------
1511+ >>> ctx = dfn.SessionContext()
1512+ >>> df = ctx.from_pydict({"a": [1]})
1513+ >>> result = df.select(
1514+ ... dfn.functions.uuid().alias("u")
1515+ ... )
1516+ >>> len(result.collect_column("u")[0].as_py()) == 36
1517+ True
1518+ """
13741519 return Expr (f .uuid ())
13751520
13761521
13771522def struct (* args : Expr ) -> Expr :
1378- """Returns a struct with the given arguments."""
1523+ """Returns a struct with the given arguments.
1524+
1525+ Examples:
1526+ ---------
1527+ >>> ctx = dfn.SessionContext()
1528+ >>> df = ctx.from_pydict({"a": [1], "b": [2]})
1529+ >>> result = df.select(
1530+ ... dfn.functions.struct(
1531+ ... dfn.col("a"), dfn.col("b")
1532+ ... ).alias("s")
1533+ ... )
1534+ >>> result.collect_column("s")[0].as_py() == {"c0": 1, "c1": 2}
1535+ True
1536+ """
13791537 args = [arg .expr for arg in args ]
13801538 return Expr (f .struct (* args ))
13811539
13821540
13831541def named_struct (name_pairs : list [tuple [str , Expr ]]) -> Expr :
1384- """Returns a struct with the given names and arguments pairs."""
1542+ """Returns a struct with the given names and arguments pairs.
1543+
1544+ Examples:
1545+ ---------
1546+ >>> ctx = dfn.SessionContext()
1547+ >>> df = ctx.from_pydict({"a": [1]})
1548+ >>> result = df.select(
1549+ ... dfn.functions.named_struct(
1550+ ... [("x", dfn.lit(10)), ("y", dfn.lit(20))]
1551+ ... ).alias("s")
1552+ ... )
1553+ >>> result.collect_column("s")[0].as_py() == {"x": 10, "y": 20}
1554+ True
1555+ """
13851556 name_pair_exprs = [
13861557 [Expr .literal (pa .scalar (pair [0 ], type = pa .string ())), pair [1 ]]
13871558 for pair in name_pairs
@@ -1398,12 +1569,31 @@ def from_unixtime(arg: Expr) -> Expr:
13981569
13991570
14001571def arrow_typeof (arg : Expr ) -> Expr :
1401- """Returns the Arrow type of the expression."""
1572+ """Returns the Arrow type of the expression.
1573+
1574+ Examples:
1575+ ---------
1576+ >>> ctx = dfn.SessionContext()
1577+ >>> df = ctx.from_pydict({"a": [1]})
1578+ >>> result = df.select(dfn.functions.arrow_typeof(dfn.col("a")).alias("t"))
1579+ >>> result.collect_column("t")[0].as_py()
1580+ 'Int64'
1581+ """
14021582 return Expr (f .arrow_typeof (arg .expr ))
14031583
14041584
14051585def arrow_cast (expr : Expr , data_type : Expr ) -> Expr :
1406- """Casts an expression to a specified data type."""
1586+ """Casts an expression to a specified data type.
1587+
1588+ Examples:
1589+ ---------
1590+ >>> ctx = dfn.SessionContext()
1591+ >>> result = ctx.sql(
1592+ ... "SELECT arrow_cast(1, 'Float64') as c"
1593+ ... )
1594+ >>> result.collect_column("c")[0].as_py()
1595+ 1.0
1596+ """
14071597 return Expr (f .arrow_cast (expr .expr , data_type .expr ))
14081598
14091599
0 commit comments