From 156648f4ee407bd65be2bea6d1f368a17cca037c Mon Sep 17 00:00:00 2001 From: Richard Iannone Date: Tue, 28 Oct 2025 12:03:22 -0400 Subject: [PATCH 01/17] Revise docs navigation and consolidate introduction --- docs/_quarto.yml | 5 +- docs/index.qmd | 397 +++++++++++++++----------------------- docs/user-guide/index.qmd | 21 +- 3 files changed, 162 insertions(+), 261 deletions(-) diff --git a/docs/_quarto.yml b/docs/_quarto.yml index cd07abc12..4e2aef2a4 100644 --- a/docs/_quarto.yml +++ b/docs/_quarto.yml @@ -43,11 +43,10 @@ website: bread-crumbs: false favicon: assets/fav-logo.png site-url: https://posit-dev.github.io/pointblank/ + repo-url: https://github.com/posit-dev/pointblank description: "Find out if your data is what you think it is" navbar: left: - - text: User Guide - file: user-guide/index.qmd - text: Examples file: demos/index.qmd - href: reference/index.qmd @@ -64,7 +63,7 @@ website: contents: - section: "Getting Started" contents: - - user-guide/index.qmd + - index.qmd - user-guide/installation.qmd - section: "Validation Plan" contents: diff --git a/docs/index.qmd b/docs/index.qmd index 9e00d7ad1..8ed08137f 100644 --- a/docs/index.qmd +++ b/docs/index.qmd @@ -1,314 +1,223 @@ --- +title: Introduction jupyter: python3 +toc-expand: 2 html-table-processing: none --- - -
- -![](/assets/pointblank_logo.svg){width=75%} - -_Data validation made beautiful and powerful_ - -
- -Pointblank is a powerful, yet elegant data validation framework for Python that transforms how you -ensure data quality. With its intuitive, chainable API, you can quickly validate your data against -comprehensive quality checks and visualize results through stunning, interactive reports that make -data issues immediately actionable. - -Whether you're a data scientist, data engineer, or analyst, Pointblank helps you catch data quality -issues before they impact your analyses or downstream systems. - -## Getting Started in 30 Seconds - -```python +```{python} +#| echo: false +#| output: false import pointblank as pb - -validation = ( - pb.Validate(data=pb.load_dataset(dataset="small_table")) - .col_vals_gt(columns="d", value=100) # Validate values > 100 - .col_vals_le(columns="c", value=5) # Validate values <= 5 - .col_exists(columns=["date", "date_time"]) # Check columns exist - .interrogate() # Execute and collect results -) - -# Get the validation report from the REPL with: -validation.get_tabular_report().show() - -# From a notebook simply use: -validation +pb.config(report_incl_footer=False) ``` -
- -![](/assets/pointblank-tabular-report.png){width=100%} +The Pointblank library is all about assessing the state of data quality for a table. You provide the +validation rules and the library will dutifully interrogate the data and provide useful reporting. +We can use different types of tables like Polars and Pandas DataFrames, Parquet files, or various +database tables. Let's walk through what data validation looks like in Pointblank. -
+## A Simple Validation Table -## Real-World Example +This is a validation report table that is produced from a validation of a Polars DataFrame: -```python +```{python} +#| code-fold: true +#| code-summary: "Show the code" import pointblank as pb -import polars as pl - -# Load your data -sales_data = pl.read_csv("sales_data.csv") - -# Create a comprehensive validation -validation = ( - pb.Validate( - data=sales_data, - tbl_name="sales_data", # Name of the table for reporting - label="Real-world example.", # Label for the validation, appears in reports - thresholds=(0.01, 0.02, 0.05), # Set thresholds for warnings, errors, and critical issues - actions=pb.Actions( # Define actions for any threshold exceedance - critical="Major data quality issue found in step {step} ({time})." - ), - final_actions=pb.FinalActions( # Define final actions for the entire validation - pb.send_slack_notification( - webhook_url="https://hooks.slack.com/services/your/webhook/url" - ) - ), - brief=True, # Add automatically-generated briefs for each step - ) - .col_vals_between( # Check numeric ranges with precision - columns=["price", "quantity"], - left=0, right=1000 - ) - .col_vals_not_null( # Ensure that columns ending with '_id' don't have null values - columns=pb.ends_with("_id") - ) - .col_vals_regex( # Validate patterns with regex - columns="email", - pattern="^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}$" - ) - .col_vals_in_set( # Check categorical values - columns="status", - set=["pending", "shipped", "delivered", "returned"] - ) - .conjointly( # Combine multiple conditions - lambda df: pb.expr_col("revenue") == pb.expr_col("price") * pb.expr_col("quantity"), - lambda df: pb.expr_col("tax") >= pb.expr_col("revenue") * 0.05 - ) - .interrogate() -) -``` - -``` -Major data quality issue found in step 7 (2025-04-16 15:03:04.685612+00:00). -``` - -```python -# Get an HTML report you can share with your team -validation.get_tabular_report().show("browser") -``` - -
-![](/assets/pointblank-sales-data.png){width=100%} - -
- -```python -# Get a report of failing records from a specific step -validation.get_step_report(i=3).show("browser") # Get failing records from step 3 +( + pb.Validate(data=pb.load_dataset(dataset="small_table"), label="Example Validation") + .col_vals_lt(columns="a", value=10) + .col_vals_between(columns="d", left=0, right=5000) + .col_vals_in_set(columns="f", set=["low", "mid", "high"]) + .col_vals_regex(columns="b", pattern=r"^[0-9]-[a-z]{3}-[0-9]{3}$") + .interrogate() +) ``` -
- -![](/assets/pointblank-step-report.png){width=100%} +Each row in this reporting table constitutes a single validation step. Roughly, the left-hand side +outlines the validation rules and the right-hand side provides the results of each validation step. +While simple in principle, there's a lot of useful information packed into this validation table. -
+Here's a diagram that describes a few of the important parts of the validation table: -## YAML Configuration +![](/assets/validation-table-diagram.png){width=100%} -For teams that need portable, version-controlled validation workflows, Pointblank supports YAML -configuration files. This makes it easy to share validation logic across different environments and -team members, ensuring everyone is on the same page. +There are three things that should be noted here: -**validation.yaml** +- validation steps: each step is a separate test on the table, focused on a certain aspect of the +table +- validation rules: the validation type is provided here along with key constraints +- validation results: interrogation results are provided here, with a breakdown of test units +(*total*, *passing*, and *failing*), threshold flags, and more -```yaml -validate: - data: small_table - tbl_name: "small_table" - label: "Getting started validation" +The intent is to provide the key information in one place, and have it be interpretable by data +stakeholders. For example, a failure can be seen in the second row (notice there's a CSV button). A +data quality stakeholder could click this to download a CSV of the failing rows for that step. -steps: - - col_vals_gt: - columns: "d" - value: 100 - - col_vals_le: - columns: "c" - value: 5 - - col_exists: - columns: ["date", "date_time"] -``` +## Example Code, Step-by-Step -**Execute the YAML validation** +This section will walk you through the example code used above. ```python import pointblank as pb -# Run validation from YAML configuration -validation = pb.yaml_interrogate("validation.yaml") - -# Get the results just like any other validation -validation.get_tabular_report().show() +( + pb.Validate(data=pb.load_dataset(dataset="small_table")) + .col_vals_lt(columns="a", value=10) + .col_vals_between(columns="d", left=0, right=5000) + .col_vals_in_set(columns="f", set=["low", "mid", "high"]) + .col_vals_regex(columns="b", pattern=r"^[0-9]-[a-z]{3}-[0-9]{3}$") + .interrogate() +) ``` -This approach is perfect for: - -- **CI/CD pipelines**: Store validation rules alongside your code -- **Team collaboration**: Share validation logic in a readable format -- **Environment consistency**: Use the same validation across dev, staging, and production -- **Documentation**: YAML files serve as living documentation of your data quality requirements +Note these three key pieces in the code: -## Command Line Interface +- **data**: the `Validate(data=)` argument takes a DataFrame or database table that you want to validate +- **steps**: the methods starting with `col_vals_` specify validation steps that run on specific columns +- **execution**: the `~~Validate.interrogate()` method executes the validation plan on the table -Pointblank includes a powerful CLI utility called `pb` that lets you run data validation workflows -directly from the command line. Perfect for CI/CD pipelines, scheduled data quality checks, or quick -validation tasks. +This common pattern is used in a validation workflow, where `Validate` and +`~~Validate.interrogate()` bookend a validation plan generated through calling validation methods. -
+In the next few sections we'll go a bit further by understanding how we can measure data quality and +respond to failures. -![](/assets/vhs/cli-complete-workflow.gif){width=100%} +## Understanding Test Units -
+Each validation step will execute a type of validation test on the target table. For example, a +`~~Validate.col_vals_lt()` validation step can test that each value in a column is less than a +specified number. And the key finding that's reported in each step is the number of *test units* +that pass or fail. -**Explore Your Data** +In the validation report table, test unit metrics are displayed under the `UNITS`, `PASS`, and +`FAIL` columns. This diagram explains what the tabulated values signify: -```bash -# Get a quick preview of your data -pb preview small_table - -# Check for missing values -pb missing small_table - -# Generate column summaries -pb scan small_table -``` +![](/assets/validation-test-units.png){width=100%} -**Run Essential Validations** +Test units are dependent on the test being run. Some validation methods might test every value in a +particular column, so each value will be a test unit. Others will only have a single test unit since +they aren't testing individual values but rather if the overall test passes or fails. -```bash -# Run validation from YAML configuration file -pb run validation.yaml +## Setting Thresholds for Data Quality Signals -# Run validation from Python file -pb run validation.py +Understanding test units is essential because they form the foundation of Pointblank's threshold +system. Thresholds let you define acceptable levels of data quality, triggering different severity +signals ('warning', 'error', or 'critical') when certain failure conditions are met. -# Check for duplicate rows -pb validate small_table --check rows-distinct +Here's a simple example that uses a single validation step along with thresholds set using the +`Thresholds` class: -# Verify no null values -pb validate small_table --check col-vals-not-null --column a +```{python} +( + pb.Validate(data=pb.load_dataset(dataset="small_table")) + .col_vals_lt( + columns="a", + value=7, -# Extract failing data for debugging -pb validate small_table --check col-vals-gt --column a --value 5 --show-extract -``` - -**Integrate with CI/CD** - -```bash -# Use exit codes for automation in one-liner validations (0 = pass, 1 = fail) -pb validate small_table --check rows-distinct --exit-code - -# Run validation workflows with exit codes -pb run validation.yaml --exit-code -pb run validation.py --exit-code + # Set the 'warning' and 'error' thresholds --- + thresholds=pb.Thresholds(warning=2, error=4) + ) + .interrogate() +) ``` -## Join the Community +If you look at the validation report table, we can see: -We'd love to hear from you! Connect with us: +- the `FAIL` column shows that 2 tests units have failed +- the `W` column (short for 'warning') shows a filled gray circle indicating those failing test +units reached that threshold value +- the `E` column (short for 'error') shows an open yellow circle indicating that the number of +failing test units is below that threshold -- [GitHub Issues](https://github.com/posit-dev/pointblank/issues) for bug reports and feature requests -- [_Discord server_](https://discord.com/invite/YH7CybCNCQ) for discussions and help -- [Contributing guidelines](https://github.com/posit-dev/pointblank/blob/main/CONTRIBUTING.md) if you'd like to help improve Pointblank +The one final threshold level, `C` (for 'critical'), wasn't set so it appears on the validation +table as a long dash. -## Installation +## Taking Action on Threshold Exceedances -You can install Pointblank using pip: +Pointblank becomes even more powerful when you combine thresholds with actions. The +`Actions` class lets you trigger responses when validation failures exceed threshold levels, turning +passive reporting into active notifications. -```bash -pip install pointblank -``` +Here's a simple example that adds an action to the previous validation: -You can also install Pointblank from Conda-Forge by using: +```{python} +( + pb.Validate(data=pb.load_dataset(dataset="small_table")) + .col_vals_lt( + columns="a", + value=7, + thresholds=pb.Thresholds(warning=2, error=4), -```bash -conda install conda-forge::pointblank -``` - -If you don't have Polars or Pandas installed, you'll need to install one of them to use Pointblank. - -```bash -pip install "pointblank[pl]" # Install Pointblank with Polars -pip install "pointblank[pd]" # Install Pointblank with Pandas + # Set an action for the 'warning' threshold --- + actions=pb.Actions( + warning="WARNING: Column 'a' has values that aren't less than 7." + ) + ) + .interrogate() +) ``` -To use Pointblank with DuckDB, MySQL, PostgreSQL, or SQLite, install Ibis with the appropriate backend: +Notice the printed warning message: `"WARNING: Column 'a' has values that aren't less than +7."`. The warning indicator (filled gray circle) visually confirms this threshold was reached and +the action should trigger. -```bash -pip install "pointblank[duckdb]" # Install Pointblank with Ibis + DuckDB -pip install "pointblank[mysql]" # Install Pointblank with Ibis + MySQL -pip install "pointblank[postgres]" # Install Pointblank with Ibis + PostgreSQL -pip install "pointblank[sqlite]" # Install Pointblank with Ibis + SQLite -``` +Actions make your validation workflows more responsive and integrated with your data pipelines. For +example, you can generate console messages, Slack notifications, and more. -## Technical Details +## Navigating the User Guide -Pointblank uses [Narwhals](https://github.com/narwhals-dev/narwhals) to work with Polars and Pandas -DataFrames, and integrates with [Ibis](https://github.com/ibis-project/ibis) for database and file -format support. This architecture provides a consistent API for validating tabular data from various -sources. +As you continue exploring Pointblank's capabilities, you'll find the **User Guide** organized into +sections that will help you navigate the various features. -## Contributing to Pointblank +### Getting Started -There are many ways to contribute to the ongoing development of Pointblank. Some contributions can -be simple (like fixing typos, improving documentation, filing issues for feature requests or -problems, etc.) and others might take more time and care (like answering questions and submitting -PRs with code changes). Just know that anything you can do to help would be very much appreciated! +The *Getting Started* section introduces you to Pointblank: -Please read over the [contributing guidelines](https://github.com/posit-dev/pointblank/blob/main/CONTRIBUTING.md) -for information on how to get started. +- [Introduction](index.qmd): Overview of Pointblank and core concepts (**this article**) +- [Installation](user-guide/installation.qmd): How to install and set up Pointblank -## Pointblank for R +### Validation Plan -There's also a version of Pointblank for R, which has been around since 2017 and is widely used in -the R community. You can find it at . +The *Validation Plan* section covers everything you need to know about creating robust +validation plans: -## Roadmap +- [Overview](user-guide/validation-overview.qmd): Survey of validation methods and their shared parameters +- [Validation Methods](user-guide/validation-methods.qmd): A closer look at the more common validation methods +- [Column Selection Patterns](user-guide/column-selection-patterns.qmd): Techniques for targeting specific columns +- [Preprocessing](user-guide/preprocessing.qmd): Transform data before validation +- [Segmentation](user-guide/segmentation.qmd): Apply validations to specific segments of your data +- [Thresholds](user-guide/thresholds.qmd): Set quality standards and trigger severity levels +- [Actions](user-guide/actions.qmd): Respond to threshold exceedances with notifications or custom functions +- [Briefs](user-guide/briefs.qmd): Add context to validation steps -We're actively working on enhancing Pointblank with: +### Advanced Validation -1. Additional validation methods for comprehensive data quality checks -2. Advanced logging capabilities -3. Messaging actions (Slack, email) for threshold exceedances -4. LLM-powered validation suggestions and data dictionary generation -5. JSON/YAML configuration for pipeline portability -6. CLI utility for validation from the command line -7. Expanded backend support and certification -8. High-quality documentation and examples +The *Advanced Validation* section explores more specialized validation techniques: -If you have any ideas for features or improvements, don't hesitate to share them with us! We are -always looking for ways to make Pointblank better. +- [Expression-Based Validation](user-guide/expressions.qmd): Use column expressions for advanced validation +- [Schema Validation](user-guide/schema-validation.qmd): Enforce table structure and column types +- [Assertions](user-guide/assertions.qmd): Raise exceptions to enforce data quality requirements +- [Draft Validation](user-guide/draft-validation.qmd): Create validation plans from existing data -## Code of Conduct +### Post Interrogation -Please note that the Pointblank project is released with a -[contributor code of conduct](https://www.contributor-covenant.org/version/2/1/code_of_conduct/). -
By participating in this project you agree to abide by its terms. +After validating your data, the *Post Interrogation* section helps you analyze and respond to +results: -## πŸ“„ License +- [Validation Reports](user-guide/validation-reports.qmd): Understand and customize the validation report table +- [Step Reports](user-guide/step-reports.qmd): View detailed results for individual validation steps +- [Data Extracts](user-guide/extracts.qmd): Extract and analyze failing data +- [Sundering Validated Data](user-guide/sundering.qmd): Split data based on validation results -Pointblank is licensed under the MIT license. +### Data Inspection -Β© Posit Software, PBC. +The *Data Inspection* section provides tools to explore and understand your data: -## πŸ›οΈ Governance +- [Previewing Data](user-guide/preview.qmd): View samples of your data +- [Column Summaries](user-guide/col-summary-tbl.qmd): Get statistical summaries of your data +- [Missing Values Reporting](user-guide/missing-vals-tbl.qmd): Identify and visualize missing data -This project is primarily maintained by -[Rich Iannone](https://bsky.app/profile/richmeister.bsky.social). Other authors may occasionally -assist with some of these duties. +By following this guide, you'll gain a comprehensive understanding of how to validate, monitor, and +maintain high-quality data with Pointblank. diff --git a/docs/user-guide/index.qmd b/docs/user-guide/index.qmd index d8a33a1e4..7b14853f8 100644 --- a/docs/user-guide/index.qmd +++ b/docs/user-guide/index.qmd @@ -1,20 +1,13 @@ --- -title: Introduction -jupyter: python3 -toc-expand: 2 -html-table-processing: none +title: "Introduction" +format: + html: + include-in-header: + - text: | + --- -```{python} -#| echo: false -#| output: false -import pointblank as pb -pb.config(report_incl_footer=False) -``` -The Pointblank library is all about assessing the state of data quality for a table. You provide the -validation rules and the library will dutifully interrogate the data and provide useful reporting. -We can use different types of tables like Polars and Pandas DataFrames, Parquet files, or various -database tables. Let's walk through what data validation looks like in Pointblank. +Redirecting to the [Introduction](../index.qmd)... ## A Simple Validation Table From 3cf007f9f4f1515df38e25c99cef1e4d4e5e31a7 Mon Sep 17 00:00:00 2001 From: Richard Iannone Date: Tue, 28 Oct 2025 13:10:31 -0400 Subject: [PATCH 02/17] Fix method signature in UserSchema age_in_range rule --- docs/blog/validation-libs-2025/index.qmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/blog/validation-libs-2025/index.qmd b/docs/blog/validation-libs-2025/index.qmd index eb2c7e91c..1b1ddf544 100644 --- a/docs/blog/validation-libs-2025/index.qmd +++ b/docs/blog/validation-libs-2025/index.qmd @@ -548,7 +548,7 @@ class UserSchema(dy.Schema): # Use @dy.rule() for age range validation @dy.rule() - def age_in_range() -> pl.Expr: + def age_in_range(cls) -> pl.Expr: return pl.col("age").is_between(18, 80, closed="both") # Validate using the schema From ca0555575ce7e23d210b83fb15a9abedad08a526 Mon Sep 17 00:00:00 2001 From: Richard Iannone Date: Tue, 28 Oct 2025 14:03:59 -0400 Subject: [PATCH 03/17] Revamp docs intro and add quickstart guide --- docs/_quarto.yml | 1 + docs/index.qmd | 299 +++++++++++++++------------------ docs/user-guide/quickstart.qmd | 223 ++++++++++++++++++++++++ 3 files changed, 357 insertions(+), 166 deletions(-) create mode 100644 docs/user-guide/quickstart.qmd diff --git a/docs/_quarto.yml b/docs/_quarto.yml index 4e2aef2a4..f37ef1f26 100644 --- a/docs/_quarto.yml +++ b/docs/_quarto.yml @@ -64,6 +64,7 @@ website: - section: "Getting Started" contents: - index.qmd + - user-guide/quick-start.qmd - user-guide/installation.qmd - section: "Validation Plan" contents: diff --git a/docs/index.qmd b/docs/index.qmd index 8ed08137f..f5fc8d1b7 100644 --- a/docs/index.qmd +++ b/docs/index.qmd @@ -1,9 +1,21 @@ --- -title: Introduction +title: Welcome to Pointblank jupyter: python3 -toc-expand: 2 html-table-processing: none --- + +
+ +![](/assets/pointblank_logo.svg){width=60%} + +**Data validation made beautiful and powerful.** + +
+ +Pointblank is a data validation framework for Python that makes data quality checks beautiful, +powerful, and stakeholder-friendly. Instead of cryptic error messages, get stunning interactive +reports that turn data issues into conversations. + ```{python} #| echo: false #| output: false @@ -11,213 +23,168 @@ import pointblank as pb pb.config(report_incl_footer=False) ``` -The Pointblank library is all about assessing the state of data quality for a table. You provide the -validation rules and the library will dutifully interrogate the data and provide useful reporting. -We can use different types of tables like Polars and Pandas DataFrames, Parquet files, or various -database tables. Let's walk through what data validation looks like in Pointblank. - -## A Simple Validation Table - -This is a validation report table that is produced from a validation of a Polars DataFrame: - ```{python} -#| code-fold: true -#| code-summary: "Show the code" +#| echo: false import pointblank as pb - -( - pb.Validate(data=pb.load_dataset(dataset="small_table"), label="Example Validation") - .col_vals_lt(columns="a", value=10) - .col_vals_between(columns="d", left=0, right=5000) - .col_vals_in_set(columns="f", set=["low", "mid", "high"]) - .col_vals_regex(columns="b", pattern=r"^[0-9]-[a-z]{3}-[0-9]{3}$") +import polars as pl + +validation = ( + pb.Validate( + data=pb.load_dataset(dataset="game_revenue", tbl_type="polars"), + tbl_name="game_revenue", + label="Comprehensive validation of game revenue data", + thresholds=pb.Thresholds(warning=0.10, error=0.25, critical=0.35), + brief=True + ) + .col_vals_regex(columns="player_id", pattern=r"^[A-Z]{12}[0-9]{3}$") # STEP 1 + .col_vals_gt(columns="session_duration", value=20) # STEP 2 + .col_vals_ge(columns="item_revenue", value=0.20) # STEP 3 + .col_vals_in_set(columns="item_type", set=["iap", "ad"]) # STEP 4 + .col_vals_in_set( # STEP 5 + columns="acquisition", + set=["google", "facebook", "organic", "crosspromo", "other_campaign"] + ) + .col_vals_not_in_set(columns="country", set=["Mongolia", "Germany"]) # STEP 6 + .col_vals_between( # STEP 7 + columns="session_duration", + left=10, right=50, + pre = lambda df: df.select(pl.median("session_duration")), + brief="Expect that the median of `session_duration` should be between `10` and `50`." + ) + .rows_distinct(columns_subset=["player_id", "session_id", "time"]) # STEP 8 + .row_count_match(count=2000) # STEP 9 + .col_count_match(count=11) # STEP 10 + .col_vals_not_null(columns="item_type") # STEP 11 + .col_exists(columns="start_day") # STEP 12 .interrogate() ) -``` - -Each row in this reporting table constitutes a single validation step. Roughly, the left-hand side -outlines the validation rules and the right-hand side provides the results of each validation step. -While simple in principle, there's a lot of useful information packed into this validation table. - -Here's a diagram that describes a few of the important parts of the validation table: - -![](/assets/validation-table-diagram.png){width=100%} - -There are three things that should be noted here: - -- validation steps: each step is a separate test on the table, focused on a certain aspect of the -table -- validation rules: the validation type is provided here along with key constraints -- validation results: interrogation results are provided here, with a breakdown of test units -(*total*, *passing*, and *failing*), threshold flags, and more - -The intent is to provide the key information in one place, and have it be interpretable by data -stakeholders. For example, a failure can be seen in the second row (notice there's a CSV button). A -data quality stakeholder could click this to download a CSV of the failing rows for that step. - -## Example Code, Step-by-Step - -This section will walk you through the example code used above. - -```python -import pointblank as pb -( - pb.Validate(data=pb.load_dataset(dataset="small_table")) - .col_vals_lt(columns="a", value=10) - .col_vals_between(columns="d", left=0, right=5000) - .col_vals_in_set(columns="f", set=["low", "mid", "high"]) - .col_vals_regex(columns="b", pattern=r"^[0-9]-[a-z]{3}-[0-9]{3}$") - .interrogate() -) +validation.get_tabular_report(title="Game Revenue Validation Report").show("browser") ``` -Note these three key pieces in the code: +Ready to validate? Start with our [Installation](user-guide/installation.qmd) guide or jump straight +to the [User Guide](user-guide/index.qmd). -- **data**: the `Validate(data=)` argument takes a DataFrame or database table that you want to validate -- **steps**: the methods starting with `col_vals_` specify validation steps that run on specific columns -- **execution**: the `~~Validate.interrogate()` method executes the validation plan on the table +Pointblank is made with πŸ’™ by [Posit](https://posit.co/). -This common pattern is used in a validation workflow, where `Validate` and -`~~Validate.interrogate()` bookend a validation plan generated through calling validation methods. +## What is Data Validation? -In the next few sections we'll go a bit further by understanding how we can measure data quality and -respond to failures. +Data validation ensures your data meets quality standards before it's used in analysis, reports, or +downstream systems. Pointblank provides a structured way to define validation rules, execute them, +and communicate results to both technical and non-technical stakeholders. -## Understanding Test Units +With Pointblank you can: -Each validation step will execute a type of validation test on the target table. For example, a -`~~Validate.col_vals_lt()` validation step can test that each value in a column is less than a -specified number. And the key finding that's reported in each step is the number of *test units* -that pass or fail. +- **Validate data** through a fluent, chainable API with [25+ validation methods](reference/index.qmd#validation-steps) +- **Set thresholds** to define acceptable levels of data quality (warning, error, critical) +- **Take actions** when thresholds are exceeded (notifications, logging, custom functions) +- **Generate reports** that make data quality issues immediately understandable +- **Inspect data** with built-in tools for previewing, summarizing, and finding missing values -In the validation report table, test unit metrics are displayed under the `UNITS`, `PASS`, and -`FAIL` columns. This diagram explains what the tabulated values signify: +## Why Pointblank? -![](/assets/validation-test-units.png){width=100%} +Pointblank is designed for the entire data team, not just engineers: -Test units are dependent on the test being run. Some validation methods might test every value in a -particular column, so each value will be a test unit. Others will only have a single test unit since -they aren't testing individual values but rather if the overall test passes or fails. +🎨 **Beautiful Reports**: Interactive validation reports that stakeholders actually want to read +πŸ“Š **Threshold Management**: Define quality standards with warning, error, and critical levels +πŸ” **Error Drill-Down**: Inspect failing data to get to root causes quickly +πŸ”— **Universal Compatibility**: Works with Polars, Pandas, DuckDB, MySQL, PostgreSQL, SQLite, and more +πŸ“ **YAML Support**: Write validations in YAML for version control and team collaboration +⚑ **CLI Tools**: Run validations from the command line for CI/CD pipelines or as quick checks +οΏ½ **Rich Inspection**: Preview data, analyze columns, and visualize missing values -## Setting Thresholds for Data Quality Signals +## Quick Examples -Understanding test units is essential because they form the foundation of Pointblank's threshold -system. Thresholds let you define acceptable levels of data quality, triggering different severity -signals ('warning', 'error', or 'critical') when certain failure conditions are met. +### Interactive Reports -Here's a simple example that uses a single validation step along with thresholds set using the -`Thresholds` class: +Validation reports aren't just for engineers. They're designed for data stakeholders and are +highly customizable and publishable as HTML: -```{python} -( - pb.Validate(data=pb.load_dataset(dataset="small_table")) - .col_vals_lt( - columns="a", - value=7, - - # Set the 'warning' and 'error' thresholds --- - thresholds=pb.Thresholds(warning=2, error=4) - ) - .interrogate() -) +```python +validation.get_tabular_report().show() # In REPL +validation # In notebooks: it just works ``` -If you look at the validation report table, we can see: - -- the `FAIL` column shows that 2 tests units have failed -- the `W` column (short for 'warning') shows a filled gray circle indicating those failing test -units reached that threshold value -- the `E` column (short for 'error') shows an open yellow circle indicating that the number of -failing test units is below that threshold - -The one final threshold level, `C` (for 'critical'), wasn't set so it appears on the validation -table as a long dash. - -## Taking Action on Threshold Exceedances +### Threshold-Based Quality -Pointblank becomes even more powerful when you combine thresholds with actions. The -`Actions` class lets you trigger responses when validation failures exceed threshold levels, turning -passive reporting into active notifications. +Set expectations and react when data quality degrades (with alerts, logging, or custom functions): -Here's a simple example that adds an action to the previous validation: - -```{python} -( - pb.Validate(data=pb.load_dataset(dataset="small_table")) - .col_vals_lt( - columns="a", - value=7, - thresholds=pb.Thresholds(warning=2, error=4), - - # Set an action for the 'warning' threshold --- - actions=pb.Actions( - warning="WARNING: Column 'a' has values that aren't less than 7." - ) - ) +```python +validation = ( + pb.Validate(data=sales_data, thresholds=(0.01, 0.02, 0.05)) # Three threhold levels set + .col_vals_not_null(columns="customer_id") + .col_vals_in_set(columns="status", set=["pending", "shipped", "delivered"]) .interrogate() ) ``` -Notice the printed warning message: `"WARNING: Column 'a' has values that aren't less than -7."`. The warning indicator (filled gray circle) visually confirms this threshold was reached and -the action should trigger. +### YAML Workflows + +Works wonderfully for CI/CD pipelines and team collaboration: -Actions make your validation workflows more responsive and integrated with your data pipelines. For -example, you can generate console messages, Slack notifications, and more. +```yaml +validate: + data: sales_data + tbl_name: "sales_data" + thresholds: [0.01, 0.02, 0.05] -## Navigating the User Guide +steps: + - col_vals_not_null: + columns: "customer_id" + - col_vals_in_set: + columns: "status" + set: ["pending", "shipped", "delivered"] +``` -As you continue exploring Pointblank's capabilities, you'll find the **User Guide** organized into -sections that will help you navigate the various features. +```python +validation = pb.yaml_interrogate("validation.yaml") +``` -### Getting Started +### Command Line Power -The *Getting Started* section introduces you to Pointblank: +Run validations without writing code: -- [Introduction](index.qmd): Overview of Pointblank and core concepts (**this article**) -- [Installation](user-guide/installation.qmd): How to install and set up Pointblank +```bash +# Quick validation +pb validate sales_data.csv --check col-vals-not-null --column customer_id -### Validation Plan +# Run YAML workflows +pb run validation.yaml --exit-code # <- Great for CI/CD! -The *Validation Plan* section covers everything you need to know about creating robust -validation plans: +# Explore your data +pb scan sales_data.csv +pb missing sales_data.csv +``` -- [Overview](user-guide/validation-overview.qmd): Survey of validation methods and their shared parameters -- [Validation Methods](user-guide/validation-methods.qmd): A closer look at the more common validation methods -- [Column Selection Patterns](user-guide/column-selection-patterns.qmd): Techniques for targeting specific columns -- [Preprocessing](user-guide/preprocessing.qmd): Transform data before validation -- [Segmentation](user-guide/segmentation.qmd): Apply validations to specific segments of your data -- [Thresholds](user-guide/thresholds.qmd): Set quality standards and trigger severity levels -- [Actions](user-guide/actions.qmd): Respond to threshold exceedances with notifications or custom functions -- [Briefs](user-guide/briefs.qmd): Add context to validation steps +## Installation -### Advanced Validation +Install Pointblank using pip or conda: -The *Advanced Validation* section explores more specialized validation techniques: +```bash +pip install pointblank +# or +conda install conda-forge::pointblank +``` -- [Expression-Based Validation](user-guide/expressions.qmd): Use column expressions for advanced validation -- [Schema Validation](user-guide/schema-validation.qmd): Enforce table structure and column types -- [Assertions](user-guide/assertions.qmd): Raise exceptions to enforce data quality requirements -- [Draft Validation](user-guide/draft-validation.qmd): Create validation plans from existing data +For specific backends: -### Post Interrogation +```bash +pip install "pointblank[pl]" # Polars support +pip install "pointblank[pd]" # Pandas support +pip install "pointblank[duckdb]" # DuckDB support +pip install "pointblank[postgres]" # PostgreSQL support +``` -After validating your data, the *Post Interrogation* section helps you analyze and respond to -results: +See the [Installation guide](user-guide/installation.qmd) for more details. -- [Validation Reports](user-guide/validation-reports.qmd): Understand and customize the validation report table -- [Step Reports](user-guide/step-reports.qmd): View detailed results for individual validation steps -- [Data Extracts](user-guide/extracts.qmd): Extract and analyze failing data -- [Sundering Validated Data](user-guide/sundering.qmd): Split data based on validation results +## Join the Community -### Data Inspection +We'd love to hear from you! Connect with us: -The *Data Inspection* section provides tools to explore and understand your data: +- [GitHub Issues](https://github.com/posit-dev/pointblank/issues) for bug reports and feature requests +- [Discord server](https://discord.com/invite/YH7CybCNCQ) for discussions and help +- [Contributing guidelines](https://github.com/posit-dev/pointblank/blob/main/CONTRIBUTING.md) if you'd like to contribute -- [Previewing Data](user-guide/preview.qmd): View samples of your data -- [Column Summaries](user-guide/col-summary-tbl.qmd): Get statistical summaries of your data -- [Missing Values Reporting](user-guide/missing-vals-tbl.qmd): Identify and visualize missing data +--- -By following this guide, you'll gain a comprehensive understanding of how to validate, monitor, and -maintain high-quality data with Pointblank. +**License**: MIT | **Β© 2024-2025 Posit Software, PBC** diff --git a/docs/user-guide/quickstart.qmd b/docs/user-guide/quickstart.qmd new file mode 100644 index 000000000..6e9246ca8 --- /dev/null +++ b/docs/user-guide/quickstart.qmd @@ -0,0 +1,223 @@ +--- +title: Quickstart +jupyter: python3 +toc-expand: 2 +html-table-processing: none +--- +```{python} +#| echo: false +#| output: false +import pointblank as pb +pb.config(report_incl_footer=False) +``` + +The Pointblank library is all about assessing the state of data quality for a table. You provide the +validation rules and the library will dutifully interrogate the data and provide useful reporting. +We can use different types of tables like Polars and Pandas DataFrames, Parquet files, or various +database tables. Let's walk through what data validation looks like in Pointblank. + +## A Simple Validation Table + +This is a validation report table that is produced from a validation of a Polars DataFrame: + +```{python} +#| code-fold: true +#| code-summary: "Show the code" +import pointblank as pb + +( + pb.Validate(data=pb.load_dataset(dataset="small_table"), label="Example Validation") + .col_vals_lt(columns="a", value=10) + .col_vals_between(columns="d", left=0, right=5000) + .col_vals_in_set(columns="f", set=["low", "mid", "high"]) + .col_vals_regex(columns="b", pattern=r"^[0-9]-[a-z]{3}-[0-9]{3}$") + .interrogate() +) +``` + +Each row in this reporting table constitutes a single validation step. Roughly, the left-hand side +outlines the validation rules and the right-hand side provides the results of each validation step. +While simple in principle, there's a lot of useful information packed into this validation table. + +Here's a diagram that describes a few of the important parts of the validation table: + +![](/assets/validation-table-diagram.png){width=100%} + +There are three things that should be noted here: + +- validation steps: each step is a separate test on the table, focused on a certain aspect of the +table +- validation rules: the validation type is provided here along with key constraints +- validation results: interrogation results are provided here, with a breakdown of test units +(*total*, *passing*, and *failing*), threshold flags, and more + +The intent is to provide the key information in one place, and have it be interpretable by data +stakeholders. For example, a failure can be seen in the second row (notice there's a CSV button). A +data quality stakeholder could click this to download a CSV of the failing rows for that step. + +## Example Code, Step-by-Step + +This section will walk you through the example code used above. + +```python +import pointblank as pb + +( + pb.Validate(data=pb.load_dataset(dataset="small_table")) + .col_vals_lt(columns="a", value=10) + .col_vals_between(columns="d", left=0, right=5000) + .col_vals_in_set(columns="f", set=["low", "mid", "high"]) + .col_vals_regex(columns="b", pattern=r"^[0-9]-[a-z]{3}-[0-9]{3}$") + .interrogate() +) +``` + +Note these three key pieces in the code: + +- **data**: the `Validate(data=)` argument takes a DataFrame or database table that you want to validate +- **steps**: the methods starting with `col_vals_` specify validation steps that run on specific columns +- **execution**: the `~~Validate.interrogate()` method executes the validation plan on the table + +This common pattern is used in a validation workflow, where `Validate` and +`~~Validate.interrogate()` bookend a validation plan generated through calling validation methods. + +In the next few sections we'll go a bit further by understanding how we can measure data quality and +respond to failures. + +## Understanding Test Units + +Each validation step will execute a type of validation test on the target table. For example, a +`~~Validate.col_vals_lt()` validation step can test that each value in a column is less than a +specified number. And the key finding that's reported in each step is the number of *test units* +that pass or fail. + +In the validation report table, test unit metrics are displayed under the `UNITS`, `PASS`, and +`FAIL` columns. This diagram explains what the tabulated values signify: + +![](/assets/validation-test-units.png){width=100%} + +Test units are dependent on the test being run. Some validation methods might test every value in a +particular column, so each value will be a test unit. Others will only have a single test unit since +they aren't testing individual values but rather if the overall test passes or fails. + +## Setting Thresholds for Data Quality Signals + +Understanding test units is essential because they form the foundation of Pointblank's threshold +system. Thresholds let you define acceptable levels of data quality, triggering different severity +signals ('warning', 'error', or 'critical') when certain failure conditions are met. + +Here's a simple example that uses a single validation step along with thresholds set using the +`Thresholds` class: + +```{python} +( + pb.Validate(data=pb.load_dataset(dataset="small_table")) + .col_vals_lt( + columns="a", + value=7, + + # Set the 'warning' and 'error' thresholds --- + thresholds=pb.Thresholds(warning=2, error=4) + ) + .interrogate() +) +``` + +If you look at the validation report table, we can see: + +- the `FAIL` column shows that 2 tests units have failed +- the `W` column (short for 'warning') shows a filled gray circle indicating those failing test +units reached that threshold value +- the `E` column (short for 'error') shows an open yellow circle indicating that the number of +failing test units is below that threshold + +The one final threshold level, `C` (for 'critical'), wasn't set so it appears on the validation +table as a long dash. + +## Taking Action on Threshold Exceedances + +Pointblank becomes even more powerful when you combine thresholds with actions. The +`Actions` class lets you trigger responses when validation failures exceed threshold levels, turning +passive reporting into active notifications. + +Here's a simple example that adds an action to the previous validation: + +```{python} +( + pb.Validate(data=pb.load_dataset(dataset="small_table")) + .col_vals_lt( + columns="a", + value=7, + thresholds=pb.Thresholds(warning=2, error=4), + + # Set an action for the 'warning' threshold --- + actions=pb.Actions( + warning="WARNING: Column 'a' has values that aren't less than 7." + ) + ) + .interrogate() +) +``` + +Notice the printed warning message: `"WARNING: Column 'a' has values that aren't less than +7."`. The warning indicator (filled gray circle) visually confirms this threshold was reached and +the action should trigger. + +Actions make your validation workflows more responsive and integrated with your data pipelines. For +example, you can generate console messages, Slack notifications, and more. + +## Navigating the User Guide + +As you continue exploring Pointblank's capabilities, you'll find the **User Guide** organized into +sections that will help you navigate the various features. + +### Getting Started + +The *Getting Started* section introduces you to Pointblank: + +- [Introduction](index.qmd): Overview of Pointblank and core concepts (**this article**) +- [Installation](user-guide/installation.qmd): How to install and set up Pointblank + +### Validation Plan + +The *Validation Plan* section covers everything you need to know about creating robust +validation plans: + +- [Overview](user-guide/validation-overview.qmd): Survey of validation methods and their shared parameters +- [Validation Methods](user-guide/validation-methods.qmd): A closer look at the more common validation methods +- [Column Selection Patterns](user-guide/column-selection-patterns.qmd): Techniques for targeting specific columns +- [Preprocessing](user-guide/preprocessing.qmd): Transform data before validation +- [Segmentation](user-guide/segmentation.qmd): Apply validations to specific segments of your data +- [Thresholds](user-guide/thresholds.qmd): Set quality standards and trigger severity levels +- [Actions](user-guide/actions.qmd): Respond to threshold exceedances with notifications or custom functions +- [Briefs](user-guide/briefs.qmd): Add context to validation steps + +### Advanced Validation + +The *Advanced Validation* section explores more specialized validation techniques: + +- [Expression-Based Validation](user-guide/expressions.qmd): Use column expressions for advanced validation +- [Schema Validation](user-guide/schema-validation.qmd): Enforce table structure and column types +- [Assertions](user-guide/assertions.qmd): Raise exceptions to enforce data quality requirements +- [Draft Validation](user-guide/draft-validation.qmd): Create validation plans from existing data + +### Post Interrogation + +After validating your data, the *Post Interrogation* section helps you analyze and respond to +results: + +- [Validation Reports](user-guide/validation-reports.qmd): Understand and customize the validation report table +- [Step Reports](user-guide/step-reports.qmd): View detailed results for individual validation steps +- [Data Extracts](user-guide/extracts.qmd): Extract and analyze failing data +- [Sundering Validated Data](user-guide/sundering.qmd): Split data based on validation results + +### Data Inspection + +The *Data Inspection* section provides tools to explore and understand your data: + +- [Previewing Data](user-guide/preview.qmd): View samples of your data +- [Column Summaries](user-guide/col-summary-tbl.qmd): Get statistical summaries of your data +- [Missing Values Reporting](user-guide/missing-vals-tbl.qmd): Identify and visualize missing data + +By following this guide, you'll gain a comprehensive understanding of how to validate, monitor, and +maintain high-quality data with Pointblank. From 4603a0d14e743f4498ad995f1f9208f09943731f Mon Sep 17 00:00:00 2001 From: Richard Iannone Date: Tue, 28 Oct 2025 14:22:26 -0400 Subject: [PATCH 04/17] Update index.qmd --- docs/index.qmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/index.qmd b/docs/index.qmd index f5fc8d1b7..c58de6ec1 100644 --- a/docs/index.qmd +++ b/docs/index.qmd @@ -6,7 +6,7 @@ html-table-processing: none
-![](/assets/pointblank_logo.svg){width=60%} +![](assets/pointblank_logo.svg){width=60%} **Data validation made beautiful and powerful.** From ac351357eb1dbc63ef7a55d9b91a2afd938ee730 Mon Sep 17 00:00:00 2001 From: Richard Iannone Date: Tue, 28 Oct 2025 15:33:57 -0400 Subject: [PATCH 05/17] Update index.qmd --- docs/index.qmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/index.qmd b/docs/index.qmd index c58de6ec1..6c75d0dab 100644 --- a/docs/index.qmd +++ b/docs/index.qmd @@ -59,7 +59,7 @@ validation = ( .interrogate() ) -validation.get_tabular_report(title="Game Revenue Validation Report").show("browser") +validation.get_tabular_report(title="Game Revenue Validation Report") ``` Ready to validate? Start with our [Installation](user-guide/installation.qmd) guide or jump straight From 38c1126c0dcbb5f4338c2a56c54b4631d2ff0a0e Mon Sep 17 00:00:00 2001 From: Richard Iannone Date: Tue, 28 Oct 2025 16:16:45 -0400 Subject: [PATCH 06/17] Revise docs navigation and improve index page --- docs/_quarto.yml | 5 +++-- docs/index.qmd | 21 ++++++++++++--------- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/docs/_quarto.yml b/docs/_quarto.yml index f37ef1f26..6682c8fec 100644 --- a/docs/_quarto.yml +++ b/docs/_quarto.yml @@ -63,8 +63,9 @@ website: contents: - section: "Getting Started" contents: - - index.qmd - - user-guide/quick-start.qmd + - text: "Welcome to Pointblank" + href: index.qmd + - user-guide/quickstart.qmd - user-guide/installation.qmd - section: "Validation Plan" contents: diff --git a/docs/index.qmd b/docs/index.qmd index 6c75d0dab..7060a1351 100644 --- a/docs/index.qmd +++ b/docs/index.qmd @@ -1,5 +1,5 @@ --- -title: Welcome to Pointblank +title: "" jupyter: python3 html-table-processing: none --- @@ -16,15 +16,12 @@ Pointblank is a data validation framework for Python that makes data quality che powerful, and stakeholder-friendly. Instead of cryptic error messages, get stunning interactive reports that turn data issues into conversations. -```{python} -#| echo: false -#| output: false -import pointblank as pb -pb.config(report_incl_footer=False) -``` +Here's what a validation looks like (click "Show the code" to see how it's done): ```{python} -#| echo: false +#| code-fold: true +#| code-summary: "Show the code" + import pointblank as pb import polars as pl @@ -62,6 +59,12 @@ validation = ( validation.get_tabular_report(title="Game Revenue Validation Report") ``` +That's the kind of report you get from Pointblank: clear, interactive, and designed for everyone on +your team. And if you need help getting started or want to work faster, Pointblank has built-in AI +support through the [`assistant()`](reference/assistant.qmd) function to guide you along the way. +You can also use [`DraftValidation`](user-guide/draft-validation.qmd) to quickly generate a +validation plan from your existing data (great for getting started fast). + Ready to validate? Start with our [Installation](user-guide/installation.qmd) guide or jump straight to the [User Guide](user-guide/index.qmd). @@ -91,7 +94,7 @@ Pointblank is designed for the entire data team, not just engineers: πŸ”— **Universal Compatibility**: Works with Polars, Pandas, DuckDB, MySQL, PostgreSQL, SQLite, and more πŸ“ **YAML Support**: Write validations in YAML for version control and team collaboration ⚑ **CLI Tools**: Run validations from the command line for CI/CD pipelines or as quick checks -οΏ½ **Rich Inspection**: Preview data, analyze columns, and visualize missing values +πŸ“‹ **Rich Inspection**: Preview data, analyze columns, and visualize missing values ## Quick Examples From fe1a7b0d365c385e6fd9accb37ed4dbc6cd8fab0 Mon Sep 17 00:00:00 2001 From: Richard Iannone Date: Tue, 28 Oct 2025 16:48:15 -0400 Subject: [PATCH 07/17] Update docs navigation and installation page --- docs/_quarto.yml | 3 ++- docs/index.qmd | 14 +++++++------- docs/user-guide/installation.qmd | 2 +- 3 files changed, 10 insertions(+), 9 deletions(-) diff --git a/docs/_quarto.yml b/docs/_quarto.yml index 6682c8fec..9909bde02 100644 --- a/docs/_quarto.yml +++ b/docs/_quarto.yml @@ -66,7 +66,8 @@ website: - text: "Welcome to Pointblank" href: index.qmd - user-guide/quickstart.qmd - - user-guide/installation.qmd + - text: "Installation" + href: user-guide/installation.qmd - section: "Validation Plan" contents: - user-guide/validation-overview.qmd diff --git a/docs/index.qmd b/docs/index.qmd index 7060a1351..5fe93b018 100644 --- a/docs/index.qmd +++ b/docs/index.qmd @@ -88,13 +88,13 @@ With Pointblank you can: Pointblank is designed for the entire data team, not just engineers: -🎨 **Beautiful Reports**: Interactive validation reports that stakeholders actually want to read -πŸ“Š **Threshold Management**: Define quality standards with warning, error, and critical levels -πŸ” **Error Drill-Down**: Inspect failing data to get to root causes quickly -πŸ”— **Universal Compatibility**: Works with Polars, Pandas, DuckDB, MySQL, PostgreSQL, SQLite, and more -πŸ“ **YAML Support**: Write validations in YAML for version control and team collaboration -⚑ **CLI Tools**: Run validations from the command line for CI/CD pipelines or as quick checks -πŸ“‹ **Rich Inspection**: Preview data, analyze columns, and visualize missing values +- 🎨 **Beautiful Reports**: Interactive validation reports that stakeholders actually want to read +- πŸ“Š **Threshold Management**: Define quality standards with warning, error, and critical levels +- πŸ” **Error Drill-Down**: Inspect failing data to get to root causes quickly +- πŸ”— **Universal Compatibility**: Works with Polars, Pandas, DuckDB, MySQL, PostgreSQL, SQLite, and more +- πŸ“ **YAML Support**: Write validations in YAML for version control and team collaboration +- ⚑ **CLI Tools**: Run validations from the command line for CI/CD pipelines or as quick checks +- πŸ“‹ **Rich Inspection**: Preview data, analyze columns, and visualize missing values ## Quick Examples diff --git a/docs/user-guide/installation.qmd b/docs/user-guide/installation.qmd index f44e14351..e494d3289 100644 --- a/docs/user-guide/installation.qmd +++ b/docs/user-guide/installation.qmd @@ -1,5 +1,5 @@ --- -title: Installation +title: "" jupyter: python3 toc-expand: 2 html-table-processing: none From 617bca78ff977fa460c8b76affced9e6da7b3b83 Mon Sep 17 00:00:00 2001 From: Richard Iannone Date: Tue, 28 Oct 2025 18:05:05 -0400 Subject: [PATCH 08/17] Update index.qmd --- docs/index.qmd | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/index.qmd b/docs/index.qmd index 5fe93b018..a1a02c3eb 100644 --- a/docs/index.qmd +++ b/docs/index.qmd @@ -92,6 +92,7 @@ Pointblank is designed for the entire data team, not just engineers: - πŸ“Š **Threshold Management**: Define quality standards with warning, error, and critical levels - πŸ” **Error Drill-Down**: Inspect failing data to get to root causes quickly - πŸ”— **Universal Compatibility**: Works with Polars, Pandas, DuckDB, MySQL, PostgreSQL, SQLite, and more +- 🌍 **Multilingual Support**: Reports available in 30+ languages for global teams - πŸ“ **YAML Support**: Write validations in YAML for version control and team collaboration - ⚑ **CLI Tools**: Run validations from the command line for CI/CD pipelines or as quick checks - πŸ“‹ **Rich Inspection**: Preview data, analyze columns, and visualize missing values From 57f5681d08ea2015846697ca216267e314f35c80 Mon Sep 17 00:00:00 2001 From: Richard Iannone Date: Tue, 28 Oct 2025 18:05:07 -0400 Subject: [PATCH 09/17] Update _quarto.yml --- docs/_quarto.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/_quarto.yml b/docs/_quarto.yml index 9909bde02..2127ac7f8 100644 --- a/docs/_quarto.yml +++ b/docs/_quarto.yml @@ -46,6 +46,7 @@ website: repo-url: https://github.com/posit-dev/pointblank description: "Find out if your data is what you think it is" navbar: + logo: assets/pointblank_logo.svg left: - text: Examples file: demos/index.qmd From 78c90186fdd8d3d74761b036b0dffeae0eedcf72 Mon Sep 17 00:00:00 2001 From: Richard Iannone Date: Tue, 28 Oct 2025 18:44:13 -0400 Subject: [PATCH 10/17] Update styles.css --- docs/styles.css | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/docs/styles.css b/docs/styles.css index 0cc7b4c53..145542650 100644 --- a/docs/styles.css +++ b/docs/styles.css @@ -8,6 +8,11 @@ text-decoration-thickness: 2px; } +/* Make navbar logo bigger */ +.navbar-brand img { + max-height: 45px; +} + body { background-color: #FCFEFF; } From 06b771a42719aad2af813abb71ee808909717e4d Mon Sep 17 00:00:00 2001 From: Richard Iannone Date: Tue, 28 Oct 2025 18:44:15 -0400 Subject: [PATCH 11/17] Update _quarto.yml --- docs/_quarto.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/_quarto.yml b/docs/_quarto.yml index 2127ac7f8..9530b33b5 100644 --- a/docs/_quarto.yml +++ b/docs/_quarto.yml @@ -47,6 +47,7 @@ website: description: "Find out if your data is what you think it is" navbar: logo: assets/pointblank_logo.svg + title: false left: - text: Examples file: demos/index.qmd From a23b3951bb2ec0d7504e5d4da5c724883f2d2588 Mon Sep 17 00:00:00 2001 From: Richard Iannone Date: Tue, 28 Oct 2025 19:12:37 -0400 Subject: [PATCH 12/17] Update _quarto.yml --- docs/_quarto.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/docs/_quarto.yml b/docs/_quarto.yml index 9530b33b5..7ce483485 100644 --- a/docs/_quarto.yml +++ b/docs/_quarto.yml @@ -68,8 +68,7 @@ website: - text: "Welcome to Pointblank" href: index.qmd - user-guide/quickstart.qmd - - text: "Installation" - href: user-guide/installation.qmd + - user-guide/installation.qmd - section: "Validation Plan" contents: - user-guide/validation-overview.qmd From 2b099b579b039af391b25301d94403ff0926d8b3 Mon Sep 17 00:00:00 2001 From: Richard Iannone Date: Tue, 28 Oct 2025 19:12:41 -0400 Subject: [PATCH 13/17] Update index.qmd --- docs/index.qmd | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/docs/index.qmd b/docs/index.qmd index a1a02c3eb..85d66118d 100644 --- a/docs/index.qmd +++ b/docs/index.qmd @@ -68,7 +68,7 @@ validation plan from your existing data (great for getting started fast). Ready to validate? Start with our [Installation](user-guide/installation.qmd) guide or jump straight to the [User Guide](user-guide/index.qmd). -Pointblank is made with πŸ’™ by [Posit](https://posit.co/). +By the way, Pointblank is made with πŸ’™ by [Posit](https://posit.co/). ## What is Data Validation? @@ -99,16 +99,6 @@ Pointblank is designed for the entire data team, not just engineers: ## Quick Examples -### Interactive Reports - -Validation reports aren't just for engineers. They're designed for data stakeholders and are -highly customizable and publishable as HTML: - -```python -validation.get_tabular_report().show() # In REPL -validation # In notebooks: it just works -``` - ### Threshold-Based Quality Set expectations and react when data quality degrades (with alerts, logging, or custom functions): From 3f897de5a10b7febf7ed6cbcfbfbc99be0275f38 Mon Sep 17 00:00:00 2001 From: Richard Iannone Date: Tue, 28 Oct 2025 19:12:44 -0400 Subject: [PATCH 14/17] Update styles.css --- docs/styles.css | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/styles.css b/docs/styles.css index 145542650..9589ca199 100644 --- a/docs/styles.css +++ b/docs/styles.css @@ -11,6 +11,7 @@ /* Make navbar logo bigger */ .navbar-brand img { max-height: 45px; + padding-right: 15px; } body { From effaf8e19f0c24302ed6b53ede3b0da0ee493de4 Mon Sep 17 00:00:00 2001 From: Richard Iannone Date: Tue, 28 Oct 2025 19:12:47 -0400 Subject: [PATCH 15/17] Update installation.qmd --- docs/user-guide/installation.qmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/user-guide/installation.qmd b/docs/user-guide/installation.qmd index e494d3289..464adae2e 100644 --- a/docs/user-guide/installation.qmd +++ b/docs/user-guide/installation.qmd @@ -1,5 +1,5 @@ --- -title: "" +title: "Installation" jupyter: python3 toc-expand: 2 html-table-processing: none From b371665e98bdfdf16314734ac6cba6122bdc4e2e Mon Sep 17 00:00:00 2001 From: Richard Iannone Date: Tue, 28 Oct 2025 19:30:02 -0400 Subject: [PATCH 16/17] Update installation.qmd --- docs/user-guide/installation.qmd | 2 -- 1 file changed, 2 deletions(-) diff --git a/docs/user-guide/installation.qmd b/docs/user-guide/installation.qmd index 464adae2e..79c0438c7 100644 --- a/docs/user-guide/installation.qmd +++ b/docs/user-guide/installation.qmd @@ -5,8 +5,6 @@ toc-expand: 2 html-table-processing: none --- -# Installing Pointblank - Pointblank can be installed using various package managers. The base installation gives you the core validation functionality, with optional dependencies for working with different data sources. From 35841c5ba6bb819320cb995eed63c2eb47cf2ff6 Mon Sep 17 00:00:00 2001 From: Richard Iannone Date: Tue, 28 Oct 2025 19:37:37 -0400 Subject: [PATCH 17/17] Enable breadcrumbs in user guide documentation --- docs/user-guide/actions.qmd | 1 + docs/user-guide/assertions.qmd | 1 + docs/user-guide/briefs.qmd | 1 + docs/user-guide/cli-data-inspection.qmd | 1 + docs/user-guide/cli-data-validation.qmd | 1 + docs/user-guide/cli-reference.qmd | 1 + docs/user-guide/col-summary-tbl.qmd | 1 + docs/user-guide/column-selection-patterns.qmd | 1 + docs/user-guide/draft-validation.qmd | 3 ++- docs/user-guide/expressions.qmd | 1 + docs/user-guide/extracts.qmd | 1 + docs/user-guide/installation.qmd | 1 + docs/user-guide/mcp-quick-start.qmd | 3 +-- docs/user-guide/missing-vals-tbl.qmd | 1 + docs/user-guide/preprocessing.qmd | 1 + docs/user-guide/preview.qmd | 1 + docs/user-guide/quickstart.qmd | 2 ++ docs/user-guide/schema-validation.qmd | 1 + docs/user-guide/segmentation.qmd | 1 + docs/user-guide/step-reports.qmd | 1 + docs/user-guide/sundering.qmd | 1 + docs/user-guide/thresholds.qmd | 1 + docs/user-guide/validation-methods.qmd | 1 + docs/user-guide/validation-overview.qmd | 1 + docs/user-guide/validation-reports.qmd | 1 + docs/user-guide/yaml-reference.qmd | 1 + docs/user-guide/yaml-validation-workflows.qmd | 1 + 27 files changed, 29 insertions(+), 3 deletions(-) diff --git a/docs/user-guide/actions.qmd b/docs/user-guide/actions.qmd index 80c50f046..32fa10599 100644 --- a/docs/user-guide/actions.qmd +++ b/docs/user-guide/actions.qmd @@ -3,6 +3,7 @@ title: Actions jupyter: python3 toc-expand: 2 html-table-processing: none +bread-crumbs: true --- ```{python} diff --git a/docs/user-guide/assertions.qmd b/docs/user-guide/assertions.qmd index 9c087d04f..03c3809b0 100644 --- a/docs/user-guide/assertions.qmd +++ b/docs/user-guide/assertions.qmd @@ -3,6 +3,7 @@ title: Assertions jupyter: python3 toc-expand: 2 html-table-processing: none +bread-crumbs: true --- ```{python} diff --git a/docs/user-guide/briefs.qmd b/docs/user-guide/briefs.qmd index d7202d0f0..a75926395 100644 --- a/docs/user-guide/briefs.qmd +++ b/docs/user-guide/briefs.qmd @@ -3,6 +3,7 @@ title: Briefs jupyter: python3 toc-expand: 2 html-table-processing: none +bread-crumbs: true --- ```{python} diff --git a/docs/user-guide/cli-data-inspection.qmd b/docs/user-guide/cli-data-inspection.qmd index 1f34c73cd..63ba2d509 100644 --- a/docs/user-guide/cli-data-inspection.qmd +++ b/docs/user-guide/cli-data-inspection.qmd @@ -2,6 +2,7 @@ title: Data Inspection jupyter: python3 toc-expand: 2 +bread-crumbs: true --- Pointblank’s CLI (`pb`) makes it easy to view your data before running validations. It has several diff --git a/docs/user-guide/cli-data-validation.qmd b/docs/user-guide/cli-data-validation.qmd index 6e71e1720..e8a023adb 100644 --- a/docs/user-guide/cli-data-validation.qmd +++ b/docs/user-guide/cli-data-validation.qmd @@ -2,6 +2,7 @@ title: Data Validation jupyter: python3 toc-expand: 2 +bread-crumbs: true --- Validating data directly in the terminal with the Pointblank CLI offers a fast, scriptable, and diff --git a/docs/user-guide/cli-reference.qmd b/docs/user-guide/cli-reference.qmd index 787f22f96..62dad778d 100644 --- a/docs/user-guide/cli-reference.qmd +++ b/docs/user-guide/cli-reference.qmd @@ -2,6 +2,7 @@ title: CLI Reference jupyter: python3 toc-expand: 2 +bread-crumbs: true --- This page provides a complete reference for all Pointblank CLI commands. Each section shows the full help text as it appears in the terminal, giving you quick access to all available options and examples. diff --git a/docs/user-guide/col-summary-tbl.qmd b/docs/user-guide/col-summary-tbl.qmd index 3c7b87622..db9067247 100644 --- a/docs/user-guide/col-summary-tbl.qmd +++ b/docs/user-guide/col-summary-tbl.qmd @@ -3,6 +3,7 @@ title: Column Summaries jupyter: python3 toc-expand: 2 html-table-processing: none +bread-crumbs: true --- ```{python} diff --git a/docs/user-guide/column-selection-patterns.qmd b/docs/user-guide/column-selection-patterns.qmd index 5b9400536..c81d767d8 100644 --- a/docs/user-guide/column-selection-patterns.qmd +++ b/docs/user-guide/column-selection-patterns.qmd @@ -3,6 +3,7 @@ title: Column Selection Patterns jupyter: python3 toc-expand: 2 html-table-processing: none +bread-crumbs: true --- ```{python} diff --git a/docs/user-guide/draft-validation.qmd b/docs/user-guide/draft-validation.qmd index 2f8e1b7d3..cfd7e2116 100644 --- a/docs/user-guide/draft-validation.qmd +++ b/docs/user-guide/draft-validation.qmd @@ -3,9 +3,10 @@ title: Draft Validation jupyter: python3 toc-expand: 2 html-table-processing: none +bread-crumbs: true --- -Draft Validation in Pointblank leverages large language models (LLMs) to automatically generate +Draft validation in Pointblank leverages large language models (LLMs) to automatically generate validation plans for your data. This feature is especially useful when starting validation on a new dataset or when you need to quickly establish baseline validation coverage. diff --git a/docs/user-guide/expressions.qmd b/docs/user-guide/expressions.qmd index cb009efc6..f21d1fafd 100644 --- a/docs/user-guide/expressions.qmd +++ b/docs/user-guide/expressions.qmd @@ -3,6 +3,7 @@ title: Expression-Based Validation jupyter: python3 toc-expand: 2 html-table-processing: none +bread-crumbs: true --- ```{python} diff --git a/docs/user-guide/extracts.qmd b/docs/user-guide/extracts.qmd index e75ecfd0b..3ece7c0c7 100644 --- a/docs/user-guide/extracts.qmd +++ b/docs/user-guide/extracts.qmd @@ -3,6 +3,7 @@ title: Data Extracts jupyter: python3 toc-expand: 2 html-table-processing: none +bread-crumbs: true --- ```{python} diff --git a/docs/user-guide/installation.qmd b/docs/user-guide/installation.qmd index 79c0438c7..90e36662b 100644 --- a/docs/user-guide/installation.qmd +++ b/docs/user-guide/installation.qmd @@ -3,6 +3,7 @@ title: "Installation" jupyter: python3 toc-expand: 2 html-table-processing: none +bread-crumbs: true --- Pointblank can be installed using various package managers. The base installation gives you the core diff --git a/docs/user-guide/mcp-quick-start.qmd b/docs/user-guide/mcp-quick-start.qmd index faa8ce2b7..156414ef9 100644 --- a/docs/user-guide/mcp-quick-start.qmd +++ b/docs/user-guide/mcp-quick-start.qmd @@ -3,10 +3,9 @@ title: "MCP Quick Start" jupyter: python3 toc-expand: 2 html-table-processing: none +bread-crumbs: true --- -## Getting Started in 5 Minutes - Transform your data validation workflow with conversational AI in VS Code or Positron IDE. Here are three simple steps to start validating data through conversation (and no complex configuration required). ### 1. Install diff --git a/docs/user-guide/missing-vals-tbl.qmd b/docs/user-guide/missing-vals-tbl.qmd index 31a2015aa..951392099 100644 --- a/docs/user-guide/missing-vals-tbl.qmd +++ b/docs/user-guide/missing-vals-tbl.qmd @@ -3,6 +3,7 @@ title: Missing Values Reporting jupyter: python3 toc-expand: 2 html-table-processing: none +bread-crumbs: true --- ```{python} diff --git a/docs/user-guide/preprocessing.qmd b/docs/user-guide/preprocessing.qmd index 2378a2185..ffe18f848 100644 --- a/docs/user-guide/preprocessing.qmd +++ b/docs/user-guide/preprocessing.qmd @@ -3,6 +3,7 @@ title: Preprocessing jupyter: python3 toc-expand: 2 html-table-processing: none +bread-crumbs: true --- ```{python} diff --git a/docs/user-guide/preview.qmd b/docs/user-guide/preview.qmd index e2b4fb35e..7e958f581 100644 --- a/docs/user-guide/preview.qmd +++ b/docs/user-guide/preview.qmd @@ -3,6 +3,7 @@ title: Previewing Data jupyter: python3 toc-expand: 2 html-table-processing: none +bread-crumbs: true --- ```{python} diff --git a/docs/user-guide/quickstart.qmd b/docs/user-guide/quickstart.qmd index 6e9246ca8..492a77546 100644 --- a/docs/user-guide/quickstart.qmd +++ b/docs/user-guide/quickstart.qmd @@ -3,7 +3,9 @@ title: Quickstart jupyter: python3 toc-expand: 2 html-table-processing: none +bread-crumbs: true --- + ```{python} #| echo: false #| output: false diff --git a/docs/user-guide/schema-validation.qmd b/docs/user-guide/schema-validation.qmd index 0eb7d7015..9ef6cfd25 100644 --- a/docs/user-guide/schema-validation.qmd +++ b/docs/user-guide/schema-validation.qmd @@ -3,6 +3,7 @@ title: Schema Validation jupyter: python3 toc-expand: 2 html-table-processing: none +bread-crumbs: true --- ```{python} diff --git a/docs/user-guide/segmentation.qmd b/docs/user-guide/segmentation.qmd index 659f81f62..f6c5fab62 100644 --- a/docs/user-guide/segmentation.qmd +++ b/docs/user-guide/segmentation.qmd @@ -3,6 +3,7 @@ title: Segmentation jupyter: python3 toc-expand: 2 html-table-processing: none +bread-crumbs: true --- ```{python} diff --git a/docs/user-guide/step-reports.qmd b/docs/user-guide/step-reports.qmd index 711e9f842..3d949ad44 100644 --- a/docs/user-guide/step-reports.qmd +++ b/docs/user-guide/step-reports.qmd @@ -3,6 +3,7 @@ title: "Step Reports" jupyter: python3 toc-expand: 2 html-table-processing: none +bread-crumbs: true --- ```{python} diff --git a/docs/user-guide/sundering.qmd b/docs/user-guide/sundering.qmd index e47f2cfe4..ab1e6cfca 100644 --- a/docs/user-guide/sundering.qmd +++ b/docs/user-guide/sundering.qmd @@ -3,6 +3,7 @@ title: Sundering Validated Data jupyter: python3 toc-expand: 2 html-table-processing: none +bread-crumbs: true --- ```{python} diff --git a/docs/user-guide/thresholds.qmd b/docs/user-guide/thresholds.qmd index 806d0aff0..49d6f1560 100644 --- a/docs/user-guide/thresholds.qmd +++ b/docs/user-guide/thresholds.qmd @@ -3,6 +3,7 @@ title: Thresholds jupyter: python3 toc-expand: 2 html-table-processing: none +bread-crumbs: true --- ```{python} diff --git a/docs/user-guide/validation-methods.qmd b/docs/user-guide/validation-methods.qmd index 56b0e4e99..f3fbbfa18 100644 --- a/docs/user-guide/validation-methods.qmd +++ b/docs/user-guide/validation-methods.qmd @@ -3,6 +3,7 @@ title: Validation Methods jupyter: python3 toc-expand: 2 html-table-processing: none +bread-crumbs: true --- ```{python} diff --git a/docs/user-guide/validation-overview.qmd b/docs/user-guide/validation-overview.qmd index a9c3ec859..ee53ee04f 100644 --- a/docs/user-guide/validation-overview.qmd +++ b/docs/user-guide/validation-overview.qmd @@ -3,6 +3,7 @@ title: Overview jupyter: python3 toc-expand: 2 html-table-processing: none +bread-crumbs: true --- ```{python} diff --git a/docs/user-guide/validation-reports.qmd b/docs/user-guide/validation-reports.qmd index 6129b5eab..5bc8c4a77 100644 --- a/docs/user-guide/validation-reports.qmd +++ b/docs/user-guide/validation-reports.qmd @@ -3,6 +3,7 @@ title: "Validation Reports" jupyter: python3 toc-expand: 2 html-table-processing: none +bread-crumbs: true --- ```{python} diff --git a/docs/user-guide/yaml-reference.qmd b/docs/user-guide/yaml-reference.qmd index 7a647acc8..1bce1e897 100644 --- a/docs/user-guide/yaml-reference.qmd +++ b/docs/user-guide/yaml-reference.qmd @@ -3,6 +3,7 @@ title: YAML Reference jupyter: python3 toc-expand: 2 html-table-processing: none +bread-crumbs: true --- This reference provides a comprehensive guide to all YAML keys and parameters supported by diff --git a/docs/user-guide/yaml-validation-workflows.qmd b/docs/user-guide/yaml-validation-workflows.qmd index 5ab982252..a13648a4b 100644 --- a/docs/user-guide/yaml-validation-workflows.qmd +++ b/docs/user-guide/yaml-validation-workflows.qmd @@ -3,6 +3,7 @@ title: YAML Validation Workflows jupyter: python3 toc-expand: 2 html-table-processing: none +bread-crumbs: true --- Pointblank supports defining validation workflows using YAML configuration files, providing a