@@ -1894,6 +1894,15 @@ def approx_distinct(
18941894 Args:
18951895 expression: Values to check for distinct entries
18961896 filter: If provided, only compute against rows for which the filter is True
1897+
1898+ Examples:
1899+ ---------
1900+ >>> ctx = dfn.SessionContext()
1901+ >>> df = ctx.from_pydict({"a": [1, 1, 2, 3]})
1902+ >>> result = df.aggregate(
1903+ ... [], [dfn.functions.approx_distinct(dfn.col("a")).alias("v")])
1904+ >>> result.collect_column("v")[0].as_py() >= 2
1905+ True
18971906 """
18981907 filter_raw = filter .expr if filter is not None else None
18991908
@@ -1912,6 +1921,15 @@ def approx_median(expression: Expr, filter: Expr | None = None) -> Expr:
19121921 Args:
19131922 expression: Values to find the median for
19141923 filter: If provided, only compute against rows for which the filter is True
1924+
1925+ Examples:
1926+ ---------
1927+ >>> ctx = dfn.SessionContext()
1928+ >>> df = ctx.from_pydict({"a": [1.0, 2.0, 3.0]})
1929+ >>> result = df.aggregate(
1930+ ... [], [dfn.functions.approx_median(dfn.col("a")).alias("v")])
1931+ >>> result.collect_column("v")[0].as_py()
1932+ 2.0
19151933 """
19161934 filter_raw = filter .expr if filter is not None else None
19171935 return Expr (f .approx_median (expression .expr , filter = filter_raw ))
@@ -1943,6 +1961,15 @@ def approx_percentile_cont(
19431961 percentile: This must be between 0.0 and 1.0, inclusive
19441962 num_centroids: Max bin size for the t-digest algorithm
19451963 filter: If provided, only compute against rows for which the filter is True
1964+
1965+ Examples:
1966+ ---------
1967+ >>> ctx = dfn.SessionContext()
1968+ >>> df = ctx.from_pydict({"a": [1.0, 2.0, 3.0, 4.0, 5.0]})
1969+ >>> result = df.aggregate(
1970+ ... [], [dfn.functions.approx_percentile_cont(dfn.col("a"), 0.5).alias("v")])
1971+ >>> result.collect_column("v")[0].as_py()
1972+ 3.0
19461973 """
19471974 sort_expr_raw = sort_or_default (sort_expression )
19481975 filter_raw = filter .expr if filter is not None else None
@@ -1975,6 +2002,15 @@ def approx_percentile_cont_with_weight(
19752002 num_centroids: Max bin size for the t-digest algorithm
19762003 filter: If provided, only compute against rows for which the filter is True
19772004
2005+ Examples:
2006+ ---------
2007+ >>> ctx = dfn.SessionContext()
2008+ >>> df = ctx.from_pydict({"a": [1.0, 2.0, 3.0], "w": [1.0, 1.0, 1.0]})
2009+ >>> result = df.aggregate(
2010+ ... [], [dfn.functions.approx_percentile_cont_with_weight(dfn.col("a"),
2011+ ... dfn.col("w"), 0.5).alias("v")])
2012+ >>> result.collect_column("v")[0].as_py()
2013+ 2.0
19782014 """
19792015 sort_expr_raw = sort_or_default (sort_expression )
19802016 filter_raw = filter .expr if filter is not None else None
@@ -2038,6 +2074,14 @@ def avg(
20382074 Args:
20392075 expression: Values to combine into an array
20402076 filter: If provided, only compute against rows for which the filter is True
2077+
2078+ Examples:
2079+ ---------
2080+ >>> ctx = dfn.SessionContext()
2081+ >>> df = ctx.from_pydict({"a": [1.0, 2.0, 3.0]})
2082+ >>> result = df.aggregate([], [dfn.functions.avg(dfn.col("a")).alias("v")])
2083+ >>> result.collect_column("v")[0].as_py()
2084+ 2.0
20412085 """
20422086 filter_raw = filter .expr if filter is not None else None
20432087 return Expr (f .avg (expression .expr , filter = filter_raw ))
@@ -2076,6 +2120,14 @@ def count(
20762120 expressions: Argument to perform bitwise calculation on
20772121 distinct: If True, a single entry for each distinct value will be in the result
20782122 filter: If provided, only compute against rows for which the filter is True
2123+
2124+ Examples:
2125+ ---------
2126+ >>> ctx = dfn.SessionContext()
2127+ >>> df = ctx.from_pydict({"a": [1, 2, 3]})
2128+ >>> result = df.aggregate([], [dfn.functions.count(dfn.col("a")).alias("v")])
2129+ >>> result.collect_column("v")[0].as_py()
2130+ 3
20792131 """
20802132 filter_raw = filter .expr if filter is not None else None
20812133
@@ -2140,6 +2192,14 @@ def max(expression: Expr, filter: Expr | None = None) -> Expr:
21402192 Args:
21412193 expression: The value to find the maximum of
21422194 filter: If provided, only compute against rows for which the filter is True
2195+
2196+ Examples:
2197+ ---------
2198+ >>> ctx = dfn.SessionContext()
2199+ >>> df = ctx.from_pydict({"a": [1, 2, 3]})
2200+ >>> result = df.aggregate([], [dfn.functions.max(dfn.col("a")).alias("v")])
2201+ >>> result.collect_column("v")[0].as_py()
2202+ 3
21432203 """
21442204 filter_raw = filter .expr if filter is not None else None
21452205 return Expr (f .max (expression .expr , filter = filter_raw ))
@@ -2149,6 +2209,14 @@ def mean(expression: Expr, filter: Expr | None = None) -> Expr:
21492209 """Returns the average (mean) value of the argument.
21502210
21512211 This is an alias for :py:func:`avg`.
2212+
2213+ Examples:
2214+ ---------
2215+ >>> ctx = dfn.SessionContext()
2216+ >>> df = ctx.from_pydict({"a": [1.0, 2.0, 3.0]})
2217+ >>> result = df.aggregate([], [dfn.functions.mean(dfn.col("a")).alias("v")])
2218+ >>> result.collect_column("v")[0].as_py()
2219+ 2.0
21522220 """
21532221 return avg (expression , filter )
21542222
@@ -2168,6 +2236,14 @@ def median(
21682236 expression: The value to compute the median of
21692237 distinct: If True, a single entry for each distinct value will be in the result
21702238 filter: If provided, only compute against rows for which the filter is True
2239+
2240+ Examples:
2241+ ---------
2242+ >>> ctx = dfn.SessionContext()
2243+ >>> df = ctx.from_pydict({"a": [1.0, 2.0, 3.0]})
2244+ >>> result = df.aggregate([], [dfn.functions.median(dfn.col("a")).alias("v")])
2245+ >>> result.collect_column("v")[0].as_py()
2246+ 2.0
21712247 """
21722248 filter_raw = filter .expr if filter is not None else None
21732249 return Expr (f .median (expression .expr , distinct = distinct , filter = filter_raw ))
@@ -2182,6 +2258,14 @@ def min(expression: Expr, filter: Expr | None = None) -> Expr:
21822258 Args:
21832259 expression: The value to find the minimum of
21842260 filter: If provided, only compute against rows for which the filter is True
2261+
2262+ Examples:
2263+ ---------
2264+ >>> ctx = dfn.SessionContext()
2265+ >>> df = ctx.from_pydict({"a": [1, 2, 3]})
2266+ >>> result = df.aggregate([], [dfn.functions.min(dfn.col("a")).alias("v")])
2267+ >>> result.collect_column("v")[0].as_py()
2268+ 1
21852269 """
21862270 filter_raw = filter .expr if filter is not None else None
21872271 return Expr (f .min (expression .expr , filter = filter_raw ))
@@ -2201,6 +2285,14 @@ def sum(
22012285 Args:
22022286 expression: Values to combine into an array
22032287 filter: If provided, only compute against rows for which the filter is True
2288+
2289+ Examples:
2290+ ---------
2291+ >>> ctx = dfn.SessionContext()
2292+ >>> df = ctx.from_pydict({"a": [1, 2, 3]})
2293+ >>> result = df.aggregate([], [dfn.functions.sum(dfn.col("a")).alias("v")])
2294+ >>> result.collect_column("v")[0].as_py()
2295+ 6
22042296 """
22052297 filter_raw = filter .expr if filter is not None else None
22062298 return Expr (f .sum (expression .expr , filter = filter_raw ))
@@ -2618,6 +2710,14 @@ def bit_and(expression: Expr, filter: Expr | None = None) -> Expr:
26182710 Args:
26192711 expression: Argument to perform bitwise calculation on
26202712 filter: If provided, only compute against rows for which the filter is True
2713+
2714+ Examples:
2715+ ---------
2716+ >>> ctx = dfn.SessionContext()
2717+ >>> df = ctx.from_pydict({"a": [7, 3]})
2718+ >>> result = df.aggregate([], [dfn.functions.bit_and(dfn.col("a")).alias("v")])
2719+ >>> result.collect_column("v")[0].as_py()
2720+ 3
26212721 """
26222722 filter_raw = filter .expr if filter is not None else None
26232723 return Expr (f .bit_and (expression .expr , filter = filter_raw ))
@@ -2634,6 +2734,14 @@ def bit_or(expression: Expr, filter: Expr | None = None) -> Expr:
26342734 Args:
26352735 expression: Argument to perform bitwise calculation on
26362736 filter: If provided, only compute against rows for which the filter is True
2737+
2738+ Examples:
2739+ ---------
2740+ >>> ctx = dfn.SessionContext()
2741+ >>> df = ctx.from_pydict({"a": [1, 2]})
2742+ >>> result = df.aggregate([], [dfn.functions.bit_or(dfn.col("a")).alias("v")])
2743+ >>> result.collect_column("v")[0].as_py()
2744+ 3
26372745 """
26382746 filter_raw = filter .expr if filter is not None else None
26392747 return Expr (f .bit_or (expression .expr , filter = filter_raw ))
@@ -2653,6 +2761,14 @@ def bit_xor(
26532761 expression: Argument to perform bitwise calculation on
26542762 distinct: If True, evaluate each unique value of expression only once
26552763 filter: If provided, only compute against rows for which the filter is True
2764+
2765+ Examples:
2766+ ---------
2767+ >>> ctx = dfn.SessionContext()
2768+ >>> df = ctx.from_pydict({"a": [5, 3]})
2769+ >>> result = df.aggregate([], [dfn.functions.bit_xor(dfn.col("a")).alias("v")])
2770+ >>> result.collect_column("v")[0].as_py()
2771+ 6
26562772 """
26572773 filter_raw = filter .expr if filter is not None else None
26582774 return Expr (f .bit_xor (expression .expr , distinct = distinct , filter = filter_raw ))
@@ -2670,6 +2786,14 @@ def bool_and(expression: Expr, filter: Expr | None = None) -> Expr:
26702786 Args:
26712787 expression: Argument to perform calculation on
26722788 filter: If provided, only compute against rows for which the filter is True
2789+
2790+ Examples:
2791+ ---------
2792+ >>> ctx = dfn.SessionContext()
2793+ >>> df = ctx.from_pydict({"a": [True, True, False]})
2794+ >>> result = df.aggregate([], [dfn.functions.bool_and(dfn.col("a")).alias("v")])
2795+ >>> result.collect_column("v")[0].as_py()
2796+ False
26732797 """
26742798 filter_raw = filter .expr if filter is not None else None
26752799 return Expr (f .bool_and (expression .expr , filter = filter_raw ))
@@ -2687,6 +2811,14 @@ def bool_or(expression: Expr, filter: Expr | None = None) -> Expr:
26872811 Args:
26882812 expression: Argument to perform calculation on
26892813 filter: If provided, only compute against rows for which the filter is True
2814+
2815+ Examples:
2816+ ---------
2817+ >>> ctx = dfn.SessionContext()
2818+ >>> df = ctx.from_pydict({"a": [False, False, True]})
2819+ >>> result = df.aggregate([], [dfn.functions.bool_or(dfn.col("a")).alias("v")])
2820+ >>> result.collect_column("v")[0].as_py()
2821+ True
26902822 """
26912823 filter_raw = filter .expr if filter is not None else None
26922824 return Expr (f .bool_or (expression .expr , filter = filter_raw ))
@@ -3077,6 +3209,15 @@ def string_agg(
30773209 For example::
30783210
30793211 df.aggregate([], string_agg(col("a"), ",", order_by="b"))
3212+
3213+ Examples:
3214+ ---------
3215+ >>> ctx = dfn.SessionContext()
3216+ >>> df = ctx.from_pydict({"a": ["x", "y", "z"]})
3217+ >>> result = df.aggregate(
3218+ ... [], [dfn.functions.string_agg(dfn.col("a"), ",", order_by="a").alias("s")])
3219+ >>> result.collect_column("s")[0].as_py()
3220+ 'x,y,z'
30803221 """
30813222 order_by_raw = sort_list_to_raw_sort_list (order_by )
30823223 filter_raw = filter .expr if filter is not None else None
0 commit comments