Skip to content

Commit 2a90ff6

Browse files
Adez017alamb
andauthored
Added Example for Statistical Functions in Docs (#16927)
* Update aggregate_functions.md * Update aggregate_functions.md * formating fix * Update aggregate_functions.md * Update docs/source/user-guide/sql/aggregate_functions.md Co-authored-by: Andrew Lamb <[email protected]> * Update docs/source/user-guide/sql/aggregate_functions.md Co-authored-by: Andrew Lamb <[email protected]> * Update docs/source/user-guide/sql/aggregate_functions.md Co-authored-by: Andrew Lamb <[email protected]> * Update aggregate_functions.md * Update aggregate_functions.md * Update aggregate_functions.md * Add examples to code * Alamb Update * Updated docs , build * prettier * Updates + prettier --------- Co-authored-by: Andrew Lamb <[email protected]>
1 parent d376a32 commit 2a90ff6

File tree

2 files changed

+410
-0
lines changed

2 files changed

+410
-0
lines changed

datafusion/functions-aggregate/src/regr.rs

Lines changed: 205 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,29 @@ static DOCUMENTATION: LazyLock<HashMap<RegrType, Documentation>> = LazyLock::new
144144
Given input column Y and X: regr_slope(Y, X) returns the slope (k in Y = k*X + b) using minimal RSS fitting.",
145145

146146
"regr_slope(expression_y, expression_x)")
147+
.with_sql_example(
148+
r#"```sql
149+
create table weekly_performance(day int, user_signups int) as values (1,60), (2,65), (3, 70), (4,75), (5,80);
150+
select * from weekly_performance;
151+
+-----+--------------+
152+
| day | user_signups |
153+
+-----+--------------+
154+
| 1 | 60 |
155+
| 2 | 65 |
156+
| 3 | 70 |
157+
| 4 | 75 |
158+
| 5 | 80 |
159+
+-----+--------------+
160+
161+
SELECT regr_slope(user_signups, day) AS slope FROM weekly_performance;
162+
+--------+
163+
| slope |
164+
+--------+
165+
| 5.0 |
166+
+--------+
167+
```
168+
"#
169+
)
147170
.with_standard_argument("expression_y", Some("Dependent variable"))
148171
.with_standard_argument("expression_x", Some("Independent variable"))
149172
.build()
@@ -157,6 +180,30 @@ static DOCUMENTATION: LazyLock<HashMap<RegrType, Documentation>> = LazyLock::new
157180
this function returns b.",
158181

159182
"regr_intercept(expression_y, expression_x)")
183+
.with_sql_example(
184+
r#"```sql
185+
create table weekly_performance(week int, productivity_score int) as values (1,60), (2,65), (3, 70), (4,75), (5,80);
186+
select * from weekly_performance;
187+
+------+---------------------+
188+
| week | productivity_score |
189+
| ---- | ------------------- |
190+
| 1 | 60 |
191+
| 2 | 65 |
192+
| 3 | 70 |
193+
| 4 | 75 |
194+
| 5 | 80 |
195+
+------+---------------------+
196+
197+
SELECT regr_intercept(productivity_score, week) AS intercept FROM weekly_performance;
198+
+----------+
199+
|intercept|
200+
|intercept |
201+
+----------+
202+
| 55 |
203+
+----------+
204+
```
205+
"#
206+
)
160207
.with_standard_argument("expression_y", Some("Dependent variable"))
161208
.with_standard_argument("expression_x", Some("Independent variable"))
162209
.build()
@@ -169,6 +216,29 @@ static DOCUMENTATION: LazyLock<HashMap<RegrType, Documentation>> = LazyLock::new
169216
"Counts the number of non-null paired data points.",
170217
"regr_count(expression_y, expression_x)",
171218
)
219+
.with_sql_example(
220+
r#"```sql
221+
create table daily_metrics(day int, user_signups int) as values (1,100), (2,120), (3, NULL), (4,110), (5,NULL);
222+
select * from daily_metrics;
223+
+-----+---------------+
224+
| day | user_signups |
225+
| --- | ------------- |
226+
| 1 | 100 |
227+
| 2 | 120 |
228+
| 3 | NULL |
229+
| 4 | 110 |
230+
| 5 | NULL |
231+
+-----+---------------+
232+
233+
SELECT regr_count(user_signups, day) AS valid_pairs FROM daily_metrics;
234+
+-------------+
235+
| valid_pairs |
236+
+-------------+
237+
| 3 |
238+
+-------------+
239+
```
240+
"#
241+
)
172242
.with_standard_argument("expression_y", Some("Dependent variable"))
173243
.with_standard_argument("expression_x", Some("Independent variable"))
174244
.build(),
@@ -181,6 +251,29 @@ static DOCUMENTATION: LazyLock<HashMap<RegrType, Documentation>> = LazyLock::new
181251
"Computes the square of the correlation coefficient between the independent and dependent variables.",
182252

183253
"regr_r2(expression_y, expression_x)")
254+
.with_sql_example(
255+
r#"```sql
256+
create table weekly_performance(day int ,user_signups int) as values (1,60), (2,65), (3, 70), (4,75), (5,80);
257+
select * from weekly_performance;
258+
+-----+--------------+
259+
| day | user_signups |
260+
+-----+--------------+
261+
| 1 | 60 |
262+
| 2 | 65 |
263+
| 3 | 70 |
264+
| 4 | 75 |
265+
| 5 | 80 |
266+
+-----+--------------+
267+
268+
SELECT regr_r2(user_signups, day) AS r_squared FROM weekly_performance;
269+
+---------+
270+
|r_squared|
271+
+---------+
272+
| 1.0 |
273+
+---------+
274+
```
275+
"#
276+
)
184277
.with_standard_argument("expression_y", Some("Dependent variable"))
185278
.with_standard_argument("expression_x", Some("Independent variable"))
186279
.build()
@@ -193,6 +286,29 @@ static DOCUMENTATION: LazyLock<HashMap<RegrType, Documentation>> = LazyLock::new
193286
"Computes the average of the independent variable (input) expression_x for the non-null paired data points.",
194287

195288
"regr_avgx(expression_y, expression_x)")
289+
.with_sql_example(
290+
r#"```sql
291+
create table daily_sales(day int, total_sales int) as values (1,100), (2,150), (3,200), (4,NULL), (5,250);
292+
select * from daily_sales;
293+
+-----+-------------+
294+
| day | total_sales |
295+
| --- | ----------- |
296+
| 1 | 100 |
297+
| 2 | 150 |
298+
| 3 | 200 |
299+
| 4 | NULL |
300+
| 5 | 250 |
301+
+-----+-------------+
302+
303+
SELECT regr_avgx(total_sales, day) AS avg_day FROM daily_sales;
304+
+----------+
305+
| avg_day |
306+
+----------+
307+
| 2.75 |
308+
+----------+
309+
```
310+
"#
311+
)
196312
.with_standard_argument("expression_y", Some("Dependent variable"))
197313
.with_standard_argument("expression_x", Some("Independent variable"))
198314
.build()
@@ -205,6 +321,30 @@ static DOCUMENTATION: LazyLock<HashMap<RegrType, Documentation>> = LazyLock::new
205321
"Computes the average of the dependent variable (output) expression_y for the non-null paired data points.",
206322

207323
"regr_avgy(expression_y, expression_x)")
324+
.with_sql_example(
325+
r#"```sql
326+
create table daily_temperature(day int, temperature int) as values (1,30), (2,32), (3, NULL), (4,35), (5,36);
327+
select * from daily_temperature;
328+
+-----+-------------+
329+
| day | temperature |
330+
| --- | ----------- |
331+
| 1 | 30 |
332+
| 2 | 32 |
333+
| 3 | NULL |
334+
| 4 | 35 |
335+
| 5 | 36 |
336+
+-----+-------------+
337+
338+
-- temperature as Dependent Variable(Y), day as Independent Variable(X)
339+
SELECT regr_avgy(temperature, day) AS avg_temperature FROM daily_temperature;
340+
+-----------------+
341+
| avg_temperature |
342+
+-----------------+
343+
| 33.25 |
344+
+-----------------+
345+
```
346+
"#
347+
)
208348
.with_standard_argument("expression_y", Some("Dependent variable"))
209349
.with_standard_argument("expression_x", Some("Independent variable"))
210350
.build()
@@ -217,6 +357,29 @@ static DOCUMENTATION: LazyLock<HashMap<RegrType, Documentation>> = LazyLock::new
217357
"Computes the sum of squares of the independent variable.",
218358
"regr_sxx(expression_y, expression_x)",
219359
)
360+
.with_sql_example(
361+
r#"```sql
362+
create table study_hours(student_id int, hours int, test_score int) as values (1,2,55), (2,4,65), (3,6,75), (4,8,85), (5,10,95);
363+
select * from study_hours;
364+
+------------+-------+------------+
365+
| student_id | hours | test_score |
366+
+------------+-------+------------+
367+
| 1 | 2 | 55 |
368+
| 2 | 4 | 65 |
369+
| 3 | 6 | 75 |
370+
| 4 | 8 | 85 |
371+
| 5 | 10 | 95 |
372+
+------------+-------+------------+
373+
374+
SELECT regr_sxx(test_score, hours) AS sxx FROM study_hours;
375+
+------+
376+
| sxx |
377+
+------+
378+
| 40.0 |
379+
+------+
380+
```
381+
"#
382+
)
220383
.with_standard_argument("expression_y", Some("Dependent variable"))
221384
.with_standard_argument("expression_x", Some("Independent variable"))
222385
.build(),
@@ -229,6 +392,27 @@ static DOCUMENTATION: LazyLock<HashMap<RegrType, Documentation>> = LazyLock::new
229392
"Computes the sum of squares of the dependent variable.",
230393
"regr_syy(expression_y, expression_x)",
231394
)
395+
.with_sql_example(
396+
r#"```sql
397+
create table employee_productivity(week int, productivity_score int) as values (1,60), (2,65), (3,70);
398+
select * from employee_productivity;
399+
+------+--------------------+
400+
| week | productivity_score |
401+
+------+--------------------+
402+
| 1 | 60 |
403+
| 2 | 65 |
404+
| 3 | 70 |
405+
+------+--------------------+
406+
407+
SELECT regr_syy(productivity_score, week) AS sum_squares_y FROM employee_productivity;
408+
+---------------+
409+
| sum_squares_y |
410+
+---------------+
411+
| 50.0 |
412+
+---------------+
413+
```
414+
"#
415+
)
232416
.with_standard_argument("expression_y", Some("Dependent variable"))
233417
.with_standard_argument("expression_x", Some("Independent variable"))
234418
.build(),
@@ -241,6 +425,27 @@ static DOCUMENTATION: LazyLock<HashMap<RegrType, Documentation>> = LazyLock::new
241425
"Computes the sum of products of paired data points.",
242426
"regr_sxy(expression_y, expression_x)",
243427
)
428+
.with_sql_example(
429+
r#"```sql
430+
create table employee_productivity(week int, productivity_score int) as values(1,60), (2,65), (3,70);
431+
select * from employee_productivity;
432+
+------+--------------------+
433+
| week | productivity_score |
434+
+------+--------------------+
435+
| 1 | 60 |
436+
| 2 | 65 |
437+
| 3 | 70 |
438+
+------+--------------------+
439+
440+
SELECT regr_sxy(productivity_score, week) AS sum_product_deviations FROM employee_productivity;
441+
+------------------------+
442+
| sum_product_deviations |
443+
+------------------------+
444+
| 10.0 |
445+
+------------------------+
446+
```
447+
"#
448+
)
244449
.with_standard_argument("expression_y", Some("Dependent variable"))
245450
.with_standard_argument("expression_x", Some("Independent variable"))
246451
.build(),

0 commit comments

Comments
 (0)