Skip to content

Commit 61789c2

Browse files
authored
Merge pull request #33 from rich-iannone/docs-examples-improve
docs: improve the Examples section
2 parents 37e54c4 + f2d14cf commit 61789c2

File tree

26 files changed

+1263
-468
lines changed

26 files changed

+1263
-468
lines changed

docs/_quarto.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ website:
2121
- text: Get Started
2222
file: get-started/index.qmd
2323
- text: Examples
24-
file: examples/index.qmd
24+
file: demos/index.qmd
2525
- href: reference/index.qmd
2626
text: Reference
2727
right:

docs/demos/01-starter/index.qmd

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
---
2+
pagetitle: Examples
3+
notebook-links: false
4+
page-navigation: false
5+
toc: false
6+
html-table-processing: none
7+
---
8+
9+
### Starter Validation
10+
11+
A validation with the basics.
12+
13+
```{python}
14+
#| echo: false
15+
16+
import pointblank as pb
17+
18+
validation = (
19+
pb.Validate( # Use pb.Validate to start
20+
data=pb.load_dataset(dataset="small_table", tbl_type="polars")
21+
)
22+
.col_vals_gt(columns="d", value=1000) # STEP 1 |
23+
.col_vals_le(columns="c", value=5) # STEP 2 | <-- Build up a validation plan
24+
.col_exists(columns=["date", "date_time"]) # STEP 3 |
25+
.interrogate() # This will execute all validation steps and collect intel
26+
)
27+
28+
validation
29+
```
30+
31+
```python
32+
import pointblank as pb
33+
34+
validation = (
35+
pb.Validate( # Use pb.Validate to start
36+
data=pb.load_dataset(dataset="small_table", tbl_type="polars")
37+
)
38+
.col_vals_gt(columns="d", value=1000) # STEP 1 |
39+
.col_vals_le(columns="c", value=5) # STEP 2 | <-- Build up a validation plan
40+
.col_exists(columns=["date", "date_time"]) # STEP 3 |
41+
.interrogate() # This will execute all validation steps and collect intel
42+
)
43+
44+
validation
45+
```
46+
47+
<details>
48+
<summary>Preview of Input Table</summary>
49+
50+
```{python}
51+
# | echo: false
52+
pb.preview(pb.load_dataset(dataset="small_table"), n_head=20, n_tail=20)
53+
```
54+
55+
</details>

docs/demos/02-advanced/index.qmd

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
---
2+
pagetitle: "Examples: Advanced Validation"
3+
notebook-links: false
4+
page-navigation: false
5+
toc: false
6+
html-table-processing: none
7+
---
8+
9+
### Advanced Validation
10+
11+
A validation with a comprehensive set of rules.
12+
13+
```{python}
14+
#| echo: false
15+
16+
import pointblank as pb
17+
import polars as pl
18+
import narwhals as nw
19+
20+
validation = (
21+
pb.Validate(
22+
data=pb.load_dataset(dataset="game_revenue", tbl_type="polars"),
23+
tbl_name="game_revenue",
24+
label="Comprehensive validation example",
25+
thresholds=pb.Thresholds(warn_at=0.10, stop_at=0.25, notify_at=0.35),
26+
)
27+
.col_vals_regex(columns="player_id", pattern=r"^[A-Z]{12}[0-9]{3}$") # STEP 1
28+
.col_vals_gt(columns="session_duration", value=5) # STEP 2
29+
.col_vals_ge(columns="item_revenue", value=0.02) # STEP 3
30+
.col_vals_in_set(columns="item_type", set=["iap", "ad"]) # STEP 4
31+
.col_vals_in_set( # STEP 5
32+
columns="acquisition",
33+
set=["google", "facebook", "organic", "crosspromo", "other_campaign"]
34+
)
35+
.col_vals_not_in_set(columns="country", set=["Mongolia", "Germany"]) # STEP 6
36+
.col_vals_expr(expr=nw.when( # STEP 7
37+
nw.col("item_type") == "iap")
38+
.then(nw.col("item_name").str.contains(r"^[a-z]*?\d$"))
39+
)
40+
.col_vals_between( # STEP 8
41+
columns="session_duration",
42+
left=10, right=50,
43+
pre = lambda df: df.select(pl.median("session_duration"))
44+
)
45+
.rows_distinct(columns_subset=["player_id", "session_id", "time"]) # STEP 9
46+
.row_count_match(count=2000) # STEP 10
47+
.col_count_match(count=11) # STEP 11
48+
.col_vals_not_null(columns=pb.starts_with("item")) # STEP 12-14
49+
.col_exists(columns="start_day") # STEP 15
50+
.interrogate()
51+
)
52+
53+
validation
54+
```
55+
56+
```python
57+
import pointblank as pb
58+
import polars as pl
59+
import narwhals as nw
60+
61+
validation = (
62+
pb.Validate(
63+
data=pb.load_dataset(dataset="game_revenue", tbl_type="polars"),
64+
tbl_name="game_revenue",
65+
label="Comprehensive validation example",
66+
thresholds=pb.Thresholds(warn_at=0.10, stop_at=0.25, notify_at=0.35),
67+
)
68+
.col_vals_regex(columns="player_id", pattern=r"^[A-Z]{12}[0-9]{3}$") # STEP 1
69+
.col_vals_gt(columns="session_duration", value=5) # STEP 2
70+
.col_vals_ge(columns="item_revenue", value=0.02) # STEP 3
71+
.col_vals_in_set(columns="item_type", set=["iap", "ad"]) # STEP 4
72+
.col_vals_in_set( # STEP 5
73+
columns="acquisition",
74+
set=["google", "facebook", "organic", "crosspromo", "other_campaign"]
75+
)
76+
.col_vals_not_in_set(columns="country", set=["Mongolia", "Germany"]) # STEP 6
77+
.col_vals_expr(expr=nw.when( # STEP 7
78+
nw.col("item_type") == "iap")
79+
.then(nw.col("item_name").str.contains(r"^[a-z]*?\d$"))
80+
)
81+
.col_vals_between( # STEP 8
82+
columns="session_duration",
83+
left=10, right=50,
84+
pre = lambda df: df.select(pl.median("session_duration"))
85+
)
86+
.rows_distinct(columns_subset=["player_id", "session_id", "time"]) # STEP 9
87+
.row_count_match(count=2000) # STEP 10
88+
.col_count_match(count=11) # STEP 11
89+
.col_vals_not_null(columns=pb.starts_with("item")) # STEP 12-14
90+
.col_exists(columns="start_day") # STEP 15
91+
.interrogate()
92+
)
93+
94+
validation
95+
```
96+
97+
<details>
98+
<summary>Preview of Input Table</summary>
99+
100+
```{python}
101+
# | echo: false
102+
pb.preview(pb.load_dataset(dataset="game_revenue"), n_head=10, n_tail=10)
103+
```
104+
105+
</details>
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
---
2+
pagetitle: "Examples: Data Extracts"
3+
notebook-links: false
4+
page-navigation: false
5+
toc: false
6+
html-table-processing: none
7+
---
8+
9+
### Data Extracts
10+
11+
Pulling out data extracts that highlight rows with validation failures.
12+
13+
Validation with failures at *Step 3*:
14+
15+
```{python}
16+
#| echo: false
17+
18+
import pointblank as pb
19+
20+
validation = (
21+
pb.Validate(data=pb.load_dataset(dataset="game_revenue"))
22+
.col_vals_lt(columns="item_revenue", value=200)
23+
.col_vals_gt(columns="item_revenue", value=0)
24+
.col_vals_ge(columns="session_duration", value=5)
25+
.col_vals_in_set(columns="item_type", set=["iap", "ad"])
26+
.col_vals_regex(columns="player_id", pattern=r"[A-Z]{12}\d{3}")
27+
.interrogate()
28+
)
29+
30+
validation
31+
```
32+
33+
<br>
34+
Extract from *Step 3* (`.col_vals_ge(columns="session_duration", value=5)`):
35+
36+
```{python}
37+
#| echo: false
38+
pb.preview(validation.get_data_extracts(i=3, frame=True), n_head=10, n_tail=10)
39+
```
40+
41+
```python
42+
import pointblank as pb
43+
44+
validation = (
45+
pb.Validate(data=pb.load_dataset(dataset="game_revenue"))
46+
.col_vals_lt(columns="item_revenue", value=200)
47+
.col_vals_gt(columns="item_revenue", value=0)
48+
.col_vals_ge(columns="session_duration", value=5)
49+
.col_vals_in_set(columns="item_type", set=["iap", "ad"])
50+
.col_vals_regex(columns="player_id", pattern=r"[A-Z]{12}\d{3}")
51+
.interrogate()
52+
)
53+
```
54+
55+
```python
56+
pb.preview(validation.get_data_extracts(i=3, frame=True), n_head=20, n_tail=20)
57+
```
58+
59+
<details>
60+
<summary>Preview of Input Table</summary>
61+
62+
```{python}
63+
# | echo: false
64+
pb.preview(pb.load_dataset(dataset="game_revenue"), n_head=10, n_tail=10)
65+
```
66+
67+
</details>
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
---
2+
pagetitle: "Examples: Sundered Data"
3+
notebook-links: false
4+
page-navigation: false
5+
toc: false
6+
html-table-processing: none
7+
---
8+
9+
### Sundered Data
10+
11+
Splitting your data into 'pass' and 'fail' subsets.
12+
13+
```{python}
14+
#| echo: false
15+
16+
import pointblank as pb
17+
18+
validation = (
19+
pb.Validate( # Use pb.Validate to start
20+
data=pb.load_dataset(dataset="small_table", tbl_type="pandas")
21+
)
22+
.col_vals_gt(columns="d", value=1000)
23+
.col_vals_le(columns="c", value=5)
24+
.interrogate()
25+
)
26+
27+
validation
28+
```
29+
30+
```{python}
31+
#| echo: false
32+
pb.preview(validation.get_sundered_data(type="pass"))
33+
```
34+
35+
```python
36+
import pointblank as pb
37+
import polars as pl
38+
39+
validation = (
40+
pb.Validate( # Use pb.Validate to start
41+
data=pb.load_dataset(dataset="small_table", tbl_type="pandas")
42+
)
43+
.col_vals_gt(columns="d", value=1000)
44+
.col_vals_le(columns="c", value=5)
45+
.interrogate()
46+
)
47+
48+
validation
49+
```
50+
51+
```python
52+
pb.preview(validation.get_sundered_data(type="pass"))
53+
```
54+
55+
<details>
56+
<summary>Preview of Input Table</summary>
57+
58+
```{python}
59+
# | echo: false
60+
pb.preview(pb.load_dataset(dataset="small_table", tbl_type="pandas"), n_head=20, n_tail=20)
61+
```
62+
63+
</details>
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
---
2+
pagetitle: "Examples: Apply Validation Rules to Multiple Columns"
3+
notebook-links: false
4+
page-navigation: false
5+
toc: false
6+
html-table-processing: none
7+
---
8+
9+
### Apply Validation Rules to Multiple Columns
10+
11+
Create multiple validation steps by supplying a list of column names to the `columns=` argument.
12+
13+
```{python}
14+
#| echo: false
15+
16+
import pointblank as pb
17+
18+
validation = (
19+
pb.Validate(
20+
data=pb.load_dataset(dataset="small_table", tbl_type="polars")
21+
)
22+
.col_vals_ge(columns=["a", "c", "d"], value=0) # check values in 'a', 'c', and 'd'
23+
.col_exists(columns=["date_time", "date"]) # check for the existence of two columns
24+
.interrogate()
25+
)
26+
27+
validation
28+
```
29+
30+
```python
31+
import pointblank as pb
32+
33+
validation = (
34+
pb.Validate(
35+
data=pb.load_dataset(dataset="small_table", tbl_type="polars")
36+
)
37+
.col_vals_ge(columns=["a", "c", "d"], value=0) # check values in 'a', 'c', and 'd'
38+
.col_exists(columns=["date_time", "date"]) # check for the existence of two columns
39+
.interrogate()
40+
)
41+
42+
validation
43+
```
44+
45+
<details>
46+
<summary>Preview of Input Table</summary>
47+
48+
```{python}
49+
# | echo: false
50+
pb.preview(pb.load_dataset(dataset="small_table", tbl_type="polars"), n_head=20, n_tail=20)
51+
```
52+
53+
</details>

0 commit comments

Comments
 (0)