@@ -2055,6 +2055,15 @@ def corr(value_y: Expr, value_x: Expr, filter: Expr | None = None) -> Expr:
20552055 value_y: The dependent variable for correlation
20562056 value_x: The independent variable for correlation
20572057 filter: If provided, only compute against rows for which the filter is True
2058+
2059+ Examples:
2060+ ---------
2061+ >>> ctx = dfn.SessionContext()
2062+ >>> df = ctx.from_pydict({"a": [1.0, 2.0, 3.0], "b": [1.0, 2.0, 3.0]})
2063+ >>> result = df.aggregate(
2064+ ... [], [dfn.functions.corr(dfn.col("a"), dfn.col("b")).alias("v")])
2065+ >>> result.collect_column("v")[0].as_py()
2066+ 1.0
20582067 """
20592068 filter_raw = filter .expr if filter is not None else None
20602069 return Expr (f .corr (value_y .expr , value_x .expr , filter = filter_raw ))
@@ -2101,6 +2110,22 @@ def covar_pop(value_y: Expr, value_x: Expr, filter: Expr | None = None) -> Expr:
21012110 value_y: The dependent variable for covariance
21022111 value_x: The independent variable for covariance
21032112 filter: If provided, only compute against rows for which the filter is True
2113+
2114+ Examples:
2115+ ---------
2116+ >>> import builtins
2117+ >>> ctx = dfn.SessionContext()
2118+ >>> df = ctx.from_pydict({"a": [1.0, 2.0, 3.0], "b": [4.0, 5.0, 6.0]})
2119+ >>> result = df.aggregate(
2120+ ... [],
2121+ ... [dfn.functions.covar_pop(
2122+ ... dfn.col("a"), dfn.col("b")
2123+ ... ).alias("v")]
2124+ ... )
2125+ >>> builtins.round(
2126+ ... result.collect_column("v")[0].as_py(), 4
2127+ ... )
2128+ 0.6667
21042129 """
21052130 filter_raw = filter .expr if filter is not None else None
21062131 return Expr (f .covar_pop (value_y .expr , value_x .expr , filter = filter_raw ))
@@ -2118,6 +2143,15 @@ def covar_samp(value_y: Expr, value_x: Expr, filter: Expr | None = None) -> Expr
21182143 value_y: The dependent variable for covariance
21192144 value_x: The independent variable for covariance
21202145 filter: If provided, only compute against rows for which the filter is True
2146+
2147+ Examples:
2148+ ---------
2149+ >>> ctx = dfn.SessionContext()
2150+ >>> df = ctx.from_pydict({"a": [1.0, 2.0, 3.0], "b": [4.0, 5.0, 6.0]})
2151+ >>> result = df.aggregate(
2152+ ... [], [dfn.functions.covar_samp(dfn.col("a"), dfn.col("b")).alias("v")])
2153+ >>> result.collect_column("v")[0].as_py()
2154+ 1.0
21212155 """
21222156 filter_raw = filter .expr if filter is not None else None
21232157 return Expr (f .covar_samp (value_y .expr , value_x .expr , filter = filter_raw ))
@@ -2127,6 +2161,15 @@ def covar(value_y: Expr, value_x: Expr, filter: Expr | None = None) -> Expr:
21272161 """Computes the sample covariance.
21282162
21292163 This is an alias for :py:func:`covar_samp`.
2164+
2165+ Examples:
2166+ ---------
2167+ >>> ctx = dfn.SessionContext()
2168+ >>> df = ctx.from_pydict({"a": [1.0, 2.0, 3.0], "b": [4.0, 5.0, 6.0]})
2169+ >>> result = df.aggregate(
2170+ ... [], [dfn.functions.covar(dfn.col("a"), dfn.col("b")).alias("v")])
2171+ >>> result.collect_column("v")[0].as_py()
2172+ 1.0
21302173 """
21312174 return covar_samp (value_y , value_x , filter )
21322175
@@ -2215,6 +2258,14 @@ def stddev(expression: Expr, filter: Expr | None = None) -> Expr:
22152258 Args:
22162259 expression: The value to find the minimum of
22172260 filter: If provided, only compute against rows for which the filter is True
2261+
2262+ Examples:
2263+ ---------
2264+ >>> ctx = dfn.SessionContext()
2265+ >>> df = ctx.from_pydict({"a": [2.0, 4.0, 6.0]})
2266+ >>> result = df.aggregate([], [dfn.functions.stddev(dfn.col("a")).alias("v")])
2267+ >>> result.collect_column("v")[0].as_py()
2268+ 2.0
22182269 """
22192270 filter_raw = filter .expr if filter is not None else None
22202271 return Expr (f .stddev (expression .expr , filter = filter_raw ))
@@ -2229,6 +2280,14 @@ def stddev_pop(expression: Expr, filter: Expr | None = None) -> Expr:
22292280 Args:
22302281 expression: The value to find the minimum of
22312282 filter: If provided, only compute against rows for which the filter is True
2283+
2284+ Examples:
2285+ ---------
2286+ >>> ctx = dfn.SessionContext()
2287+ >>> df = ctx.from_pydict({"a": [1.0, 3.0]})
2288+ >>> result = df.aggregate([], [dfn.functions.stddev_pop(dfn.col("a")).alias("v")])
2289+ >>> result.collect_column("v")[0].as_py()
2290+ 1.0
22322291 """
22332292 filter_raw = filter .expr if filter is not None else None
22342293 return Expr (f .stddev_pop (expression .expr , filter = filter_raw ))
@@ -2238,6 +2297,14 @@ def stddev_samp(arg: Expr, filter: Expr | None = None) -> Expr:
22382297 """Computes the sample standard deviation of the argument.
22392298
22402299 This is an alias for :py:func:`stddev`.
2300+
2301+ Examples:
2302+ ---------
2303+ >>> ctx = dfn.SessionContext()
2304+ >>> df = ctx.from_pydict({"a": [2.0, 4.0, 6.0]})
2305+ >>> result = df.aggregate([], [dfn.functions.stddev_samp(dfn.col("a")).alias("v")])
2306+ >>> result.collect_column("v")[0].as_py()
2307+ 2.0
22412308 """
22422309 return stddev (arg , filter = filter )
22432310
@@ -2246,6 +2313,14 @@ def var(expression: Expr, filter: Expr | None = None) -> Expr:
22462313 """Computes the sample variance of the argument.
22472314
22482315 This is an alias for :py:func:`var_samp`.
2316+
2317+ Examples:
2318+ ---------
2319+ >>> ctx = dfn.SessionContext()
2320+ >>> df = ctx.from_pydict({"a": [1.0, 2.0, 3.0]})
2321+ >>> result = df.aggregate([], [dfn.functions.var(dfn.col("a")).alias("v")])
2322+ >>> result.collect_column("v")[0].as_py()
2323+ 1.0
22492324 """
22502325 return var_samp (expression , filter )
22512326
@@ -2259,6 +2334,14 @@ def var_pop(expression: Expr, filter: Expr | None = None) -> Expr:
22592334 Args:
22602335 expression: The variable to compute the variance for
22612336 filter: If provided, only compute against rows for which the filter is True
2337+
2338+ Examples:
2339+ ---------
2340+ >>> ctx = dfn.SessionContext()
2341+ >>> df = ctx.from_pydict({"a": [0.0, 2.0]})
2342+ >>> result = df.aggregate([], [dfn.functions.var_pop(dfn.col("a")).alias("v")])
2343+ >>> result.collect_column("v")[0].as_py()
2344+ 1.0
22622345 """
22632346 filter_raw = filter .expr if filter is not None else None
22642347 return Expr (f .var_pop (expression .expr , filter = filter_raw ))
@@ -2273,6 +2356,14 @@ def var_samp(expression: Expr, filter: Expr | None = None) -> Expr:
22732356 Args:
22742357 expression: The variable to compute the variance for
22752358 filter: If provided, only compute against rows for which the filter is True
2359+
2360+ Examples:
2361+ ---------
2362+ >>> ctx = dfn.SessionContext()
2363+ >>> df = ctx.from_pydict({"a": [1.0, 2.0, 3.0]})
2364+ >>> result = df.aggregate([], [dfn.functions.var_samp(dfn.col("a")).alias("v")])
2365+ >>> result.collect_column("v")[0].as_py()
2366+ 1.0
22762367 """
22772368 filter_raw = filter .expr if filter is not None else None
22782369 return Expr (f .var_sample (expression .expr , filter = filter_raw ))
@@ -2282,6 +2373,14 @@ def var_sample(expression: Expr, filter: Expr | None = None) -> Expr:
22822373 """Computes the sample variance of the argument.
22832374
22842375 This is an alias for :py:func:`var_samp`.
2376+
2377+ Examples:
2378+ ---------
2379+ >>> ctx = dfn.SessionContext()
2380+ >>> df = ctx.from_pydict({"a": [1.0, 2.0, 3.0]})
2381+ >>> result = df.aggregate([], [dfn.functions.var_sample(dfn.col("a")).alias("v")])
2382+ >>> result.collect_column("v")[0].as_py()
2383+ 1.0
22852384 """
22862385 return var_samp (expression , filter )
22872386
@@ -2303,6 +2402,15 @@ def regr_avgx(
23032402 y: The linear regression dependent variable
23042403 x: The linear regression independent variable
23052404 filter: If provided, only compute against rows for which the filter is True
2405+
2406+ Examples:
2407+ ---------
2408+ >>> ctx = dfn.SessionContext()
2409+ >>> df = ctx.from_pydict({"y": [1.0, 2.0, 3.0], "x": [4.0, 5.0, 6.0]})
2410+ >>> result = df.aggregate(
2411+ ... [], [dfn.functions.regr_avgx(dfn.col("y"), dfn.col("x")).alias("v")])
2412+ >>> result.collect_column("v")[0].as_py()
2413+ 5.0
23062414 """
23072415 filter_raw = filter .expr if filter is not None else None
23082416
@@ -2326,6 +2434,15 @@ def regr_avgy(
23262434 y: The linear regression dependent variable
23272435 x: The linear regression independent variable
23282436 filter: If provided, only compute against rows for which the filter is True
2437+
2438+ Examples:
2439+ ---------
2440+ >>> ctx = dfn.SessionContext()
2441+ >>> df = ctx.from_pydict({"y": [1.0, 2.0, 3.0], "x": [4.0, 5.0, 6.0]})
2442+ >>> result = df.aggregate(
2443+ ... [], [dfn.functions.regr_avgy(dfn.col("y"), dfn.col("x")).alias("v")])
2444+ >>> result.collect_column("v")[0].as_py()
2445+ 2.0
23292446 """
23302447 filter_raw = filter .expr if filter is not None else None
23312448
@@ -2349,6 +2466,15 @@ def regr_count(
23492466 y: The linear regression dependent variable
23502467 x: The linear regression independent variable
23512468 filter: If provided, only compute against rows for which the filter is True
2469+
2470+ Examples:
2471+ ---------
2472+ >>> ctx = dfn.SessionContext()
2473+ >>> df = ctx.from_pydict({"y": [1.0, 2.0, 3.0], "x": [4.0, 5.0, 6.0]})
2474+ >>> result = df.aggregate(
2475+ ... [], [dfn.functions.regr_count(dfn.col("y"), dfn.col("x")).alias("v")])
2476+ >>> result.collect_column("v")[0].as_py()
2477+ 3
23522478 """
23532479 filter_raw = filter .expr if filter is not None else None
23542480
@@ -2372,6 +2498,15 @@ def regr_intercept(
23722498 y: The linear regression dependent variable
23732499 x: The linear regression independent variable
23742500 filter: If provided, only compute against rows for which the filter is True
2501+
2502+ Examples:
2503+ ---------
2504+ >>> ctx = dfn.SessionContext()
2505+ >>> df = ctx.from_pydict({"y": [2.0, 4.0, 6.0], "x": [1.0, 2.0, 3.0]})
2506+ >>> result = df.aggregate(
2507+ ... [], [dfn.functions.regr_intercept(dfn.col("y"), dfn.col("x")).alias("v")])
2508+ >>> result.collect_column("v")[0].as_py()
2509+ 0.0
23752510 """
23762511 filter_raw = filter .expr if filter is not None else None
23772512
@@ -2395,6 +2530,15 @@ def regr_r2(
23952530 y: The linear regression dependent variable
23962531 x: The linear regression independent variable
23972532 filter: If provided, only compute against rows for which the filter is True
2533+
2534+ Examples:
2535+ ---------
2536+ >>> ctx = dfn.SessionContext()
2537+ >>> df = ctx.from_pydict({"y": [2.0, 4.0, 6.0], "x": [1.0, 2.0, 3.0]})
2538+ >>> result = df.aggregate(
2539+ ... [], [dfn.functions.regr_r2(dfn.col("y"), dfn.col("x")).alias("v")])
2540+ >>> result.collect_column("v")[0].as_py()
2541+ 1.0
23982542 """
23992543 filter_raw = filter .expr if filter is not None else None
24002544
@@ -2418,6 +2562,15 @@ def regr_slope(
24182562 y: The linear regression dependent variable
24192563 x: The linear regression independent variable
24202564 filter: If provided, only compute against rows for which the filter is True
2565+
2566+ Examples:
2567+ ---------
2568+ >>> ctx = dfn.SessionContext()
2569+ >>> df = ctx.from_pydict({"y": [2.0, 4.0, 6.0], "x": [1.0, 2.0, 3.0]})
2570+ >>> result = df.aggregate(
2571+ ... [], [dfn.functions.regr_slope(dfn.col("y"), dfn.col("x")).alias("v")])
2572+ >>> result.collect_column("v")[0].as_py()
2573+ 2.0
24212574 """
24222575 filter_raw = filter .expr if filter is not None else None
24232576
@@ -2441,6 +2594,15 @@ def regr_sxx(
24412594 y: The linear regression dependent variable
24422595 x: The linear regression independent variable
24432596 filter: If provided, only compute against rows for which the filter is True
2597+
2598+ Examples:
2599+ ---------
2600+ >>> ctx = dfn.SessionContext()
2601+ >>> df = ctx.from_pydict({"y": [1.0, 2.0, 3.0], "x": [1.0, 2.0, 3.0]})
2602+ >>> result = df.aggregate(
2603+ ... [], [dfn.functions.regr_sxx(dfn.col("y"), dfn.col("x")).alias("v")])
2604+ >>> result.collect_column("v")[0].as_py()
2605+ 2.0
24442606 """
24452607 filter_raw = filter .expr if filter is not None else None
24462608
@@ -2464,6 +2626,15 @@ def regr_sxy(
24642626 y: The linear regression dependent variable
24652627 x: The linear regression independent variable
24662628 filter: If provided, only compute against rows for which the filter is True
2629+
2630+ Examples:
2631+ ---------
2632+ >>> ctx = dfn.SessionContext()
2633+ >>> df = ctx.from_pydict({"y": [1.0, 2.0, 3.0], "x": [1.0, 2.0, 3.0]})
2634+ >>> result = df.aggregate(
2635+ ... [], [dfn.functions.regr_sxy(dfn.col("y"), dfn.col("x")).alias("v")])
2636+ >>> result.collect_column("v")[0].as_py()
2637+ 2.0
24672638 """
24682639 filter_raw = filter .expr if filter is not None else None
24692640
@@ -2487,6 +2658,15 @@ def regr_syy(
24872658 y: The linear regression dependent variable
24882659 x: The linear regression independent variable
24892660 filter: If provided, only compute against rows for which the filter is True
2661+
2662+ Examples:
2663+ ---------
2664+ >>> ctx = dfn.SessionContext()
2665+ >>> df = ctx.from_pydict({"y": [1.0, 2.0, 3.0], "x": [1.0, 2.0, 3.0]})
2666+ >>> result = df.aggregate(
2667+ ... [], [dfn.functions.regr_syy(dfn.col("y"), dfn.col("x")).alias("v")])
2668+ >>> result.collect_column("v")[0].as_py()
2669+ 2.0
24902670 """
24912671 filter_raw = filter .expr if filter is not None else None
24922672
0 commit comments