Skip to content

Commit b2a910e

Browse files
authored
Merge pull request #123 from posit-dev/feat-step-report-select-columns
feat: add ability to select subset of columns in row-based step reports
2 parents 63457ae + df35175 commit b2a910e

File tree

2 files changed

+165
-37
lines changed

2 files changed

+165
-37
lines changed

pointblank/validate.py

Lines changed: 107 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -633,6 +633,10 @@ def _generate_display_table(
633633
"The `columns_subset=` value doesn't resolve to any columns in the table."
634634
)
635635

636+
# Add back the row number column if it was removed
637+
if has_leading_row_num_col:
638+
resolved_columns = ["_row_num_"] + resolved_columns
639+
636640
# Select the columns to display in the table with the `resolved_columns` value
637641
data = _select_columns(
638642
data, resolved_columns=resolved_columns, ibis_tbl=ibis_tbl, tbl_type=tbl_type
@@ -7551,7 +7555,13 @@ def get_tabular_report(
75517555

75527556
return gt_tbl
75537557

7554-
def get_step_report(self, i: int, header: str = ":default:", limit: int | None = 10) -> GT:
7558+
def get_step_report(
7559+
self,
7560+
i: int,
7561+
columns_subset: str | list[str] | Column | None = None,
7562+
header: str = ":default:",
7563+
limit: int | None = 10,
7564+
) -> GT:
75557565
"""
75567566
Get a detailed report for a single validation step.
75577567
@@ -7569,6 +7579,14 @@ def get_step_report(self, i: int, header: str = ":default:", limit: int | None =
75697579
----------
75707580
i
75717581
The step number for which to get the report.
7582+
columns_subset
7583+
The columns to display in a step report that shows errors in the input table. By default
7584+
all columns are shown (`None`). If a subset of columns is desired, we can provide a list
7585+
of column names, a string with a single column name, a `Column` object, or a
7586+
`ColumnSelector` object. The last two options allow for more flexible column selection
7587+
using column selector functions. Errors are raised if the column names provided don't
7588+
match any columns in the table (when provided as a string or list of strings) or if
7589+
column selector expressions don't resolve to any columns.
75727590
header
75737591
Options for customizing the header of the step report. The default is the `":default:"`
75747592
value which produces a generic header. Aside from this default, text can be provided for
@@ -7639,6 +7657,25 @@ def get_step_report(self, i: int, header: str = ":default:", limit: int | None =
76397657
```{python}
76407658
validation.get_step_report(i=4)
76417659
```
7660+
7661+
If you'd like to trim down the number of columns shown in the report, you can provide a
7662+
subset of columns to display. For example, if you only want to see the columns `a`, `b`, and
7663+
`c`, you can provide those column names as a list.
7664+
7665+
```{python}
7666+
validation.get_step_report(i=1, columns_subset=["a", "b", "c"])
7667+
```
7668+
7669+
If you'd like to increase or reduce the maximum number of rows shown in the report, you can
7670+
provide a different value for the `limit` parameter. For example, if you'd like to see only
7671+
up to 5 rows, you can set `limit=5`.
7672+
7673+
```{python}
7674+
validation.get_step_report(i=3, limit=5)
7675+
```
7676+
7677+
Step 3 actually had 7 failing test units, but only the first 5 rows are shown in the step
7678+
report because of the `limit=5` parameter.
76427679
"""
76437680

76447681
# If the step number is `-99` then enter the debug mode
@@ -7696,7 +7733,15 @@ def get_step_report(self, i: int, header: str = ":default:", limit: int | None =
76967733
return "This validation step is inactive."
76977734

76987735
# Create a table with a sample of ten rows, highlighting the column of interest
7699-
tbl_preview = preview(data=self.data, n_head=5, n_tail=5, limit=10, incl_header=False)
7736+
tbl_preview = preview(
7737+
data=self.data,
7738+
columns_subset=columns_subset,
7739+
n_head=5,
7740+
n_tail=5,
7741+
limit=10,
7742+
min_tbl_width=600,
7743+
incl_header=False,
7744+
)
77007745

77017746
# If no rows were extracted, create a message to indicate that no rows were extracted
77027747
# if get_row_count(extract) == 0:
@@ -7711,6 +7756,7 @@ def get_step_report(self, i: int, header: str = ":default:", limit: int | None =
77117756
i=i,
77127757
column=column,
77137758
column_position=column_position,
7759+
columns_subset=columns_subset,
77147760
values=values,
77157761
inclusive=inclusive,
77167762
n=n,
@@ -8730,6 +8776,7 @@ def _step_report_row_based(
87308776
i: int,
87318777
column: str,
87328778
column_position: int,
8779+
columns_subset: list[str] | None,
87338780
values: any,
87348781
inclusive: tuple[bool, bool] | None,
87358782
n: int,
@@ -8778,24 +8825,33 @@ def _step_report_row_based(
87788825
text = f"<code style='color: #303030; font-family: monospace; font-size: smaller;'>{column}</code> is not <code style='color: #303030; font-family: monospace; font-size: smaller;'>Null</code>"
87798826

87808827
if all_passed:
8781-
step_report = tbl_preview.tab_style(
8782-
style=[
8783-
style.text(color="#006400"),
8784-
style.fill(color="#4CA64C33"),
8785-
style.borders(
8786-
sides=["left", "right"],
8787-
color="#1B4D3E80",
8788-
style="solid",
8789-
weight="2px",
8828+
# Style the target column in green and add borders but only if that column is present
8829+
# in the `tbl_preview` (i.e., it may not be present if `columns_subset=` didn't include it)
8830+
preview_tbl_columns = tbl_preview._boxhead._get_columns()
8831+
preview_tbl_has_target_column = column in preview_tbl_columns
8832+
8833+
if preview_tbl_has_target_column:
8834+
step_report = tbl_preview.tab_style(
8835+
style=[
8836+
style.text(color="#006400"),
8837+
style.fill(color="#4CA64C33"),
8838+
style.borders(
8839+
sides=["left", "right"],
8840+
color="#1B4D3E80",
8841+
style="solid",
8842+
weight="2px",
8843+
),
8844+
],
8845+
locations=loc.body(columns=column),
8846+
).tab_style(
8847+
style=style.borders(
8848+
sides=["left", "right"], color="#1B4D3E80", style="solid", weight="2px"
87908849
),
8791-
],
8792-
locations=loc.body(columns=column),
8793-
).tab_style(
8794-
style=style.borders(
8795-
sides=["left", "right"], color="#1B4D3E80", style="solid", weight="2px"
8796-
),
8797-
locations=loc.column_labels(columns=column),
8798-
)
8850+
locations=loc.column_labels(columns=column),
8851+
)
8852+
8853+
else:
8854+
step_report = tbl_preview
87998855

88008856
if header == ":default:":
88018857
step_report = step_report.tab_header(
@@ -8824,9 +8880,11 @@ def _step_report_row_based(
88248880
# Create a preview of the extracted data
88258881
extract_tbl = _generate_display_table(
88268882
data=extract,
8883+
columns_subset=columns_subset,
88278884
n_head=limit,
88288885
n_tail=0,
88298886
limit=limit,
8887+
min_tbl_width=600,
88308888
incl_header=False,
88318889
mark_missing_values=False,
88328890
)
@@ -8839,19 +8897,34 @@ def _step_report_row_based(
88398897
extract_length_resolved = extract_length
88408898
extract_of_x_rows = "ALL"
88418899

8842-
step_report = extract_tbl.tab_style(
8843-
style=[
8844-
style.text(color="#B22222"),
8845-
style.fill(color="#FFC1C159"),
8846-
style.borders(sides=["left", "right"], color="black", style="solid", weight="2px"),
8847-
],
8848-
locations=loc.body(columns=column),
8849-
).tab_style(
8850-
style=style.borders(
8851-
sides=["left", "right"], color="black", style="solid", weight="2px"
8852-
),
8853-
locations=loc.column_labels(columns=column),
8854-
)
8900+
# Style the target column in green and add borders but only if that column is present
8901+
# in the `extract_tbl` (i.e., it may not be present if `columns_subset=` didn't include it)
8902+
extract_tbl_columns = extract_tbl._boxhead._get_columns()
8903+
extract_tbl_has_target_column = column in extract_tbl_columns
8904+
8905+
if extract_tbl_has_target_column:
8906+
step_report = extract_tbl.tab_style(
8907+
style=[
8908+
style.text(color="#B22222"),
8909+
style.fill(color="#FFC1C159"),
8910+
style.borders(
8911+
sides=["left", "right"], color="black", style="solid", weight="2px"
8912+
),
8913+
],
8914+
locations=loc.body(columns=column),
8915+
).tab_style(
8916+
style=style.borders(
8917+
sides=["left", "right"], color="black", style="solid", weight="2px"
8918+
),
8919+
locations=loc.column_labels(columns=column),
8920+
)
8921+
8922+
not_shown = ""
8923+
shown_failures = "WITH <span style='color: #B22222;'>TEST UNIT FAILURES IN RED</span>"
8924+
else:
8925+
step_report = extract_tbl
8926+
not_shown = " (NOT SHOWN)"
8927+
shown_failures = ""
88558928

88568929
if header == ":default:":
88578930
step_report = step_report.tab_header(
@@ -8863,10 +8936,9 @@ def _step_report_row_based(
88638936
f"<code style='color: #303030;'>{text}</code></span><br>"
88648937
f"<div style='padding-top: 3px;'><strong>{n_failed}</strong> / "
88658938
f"<strong>{n}</strong> TEST UNIT FAILURES "
8866-
f"IN COLUMN <strong>{column_position}</strong></div>"
8939+
f"IN COLUMN <strong>{column_position}</strong>{not_shown}</div>"
88678940
f"<div style='padding-top: 10px;'>EXTRACT OF {extract_of_x_rows} "
8868-
f"<strong>{extract_length_resolved}</strong> ROWS WITH "
8869-
"<span style='color: #B22222;'>TEST UNIT FAILURES IN RED</span>:"
8941+
f"<strong>{extract_length_resolved}</strong> ROWS {shown_failures}:"
88708942
"</div></div>"
88718943
),
88728944
)

tests/test_validate.py

Lines changed: 58 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5709,14 +5709,70 @@ def test_get_step_report_no_fail(tbl_type):
57095709
for i in range(1, 18):
57105710
assert isinstance(validation.get_step_report(i=i), GT.GT)
57115711

5712-
# Test with a limit of `2` for every step report
5712+
# Test with a fixed limit of `2`
57135713
for i in range(1, 18):
57145714
assert isinstance(validation.get_step_report(i=i, limit=2), GT.GT)
57155715

5716-
# Test with `limit=None` for every step report
5716+
# Test with `limit=None`
57175717
for i in range(1, 18):
57185718
assert isinstance(validation.get_step_report(i=i, limit=None), GT.GT)
57195719

5720+
# Test with a custom header
5721+
for i in range(1, 18):
5722+
assert isinstance(validation.get_step_report(i=i, header="Custom header"), GT.GT)
5723+
5724+
#
5725+
# Tests with a subset of columns
5726+
#
5727+
5728+
# All passing cases
5729+
5730+
# Single column (target)
5731+
assert isinstance(validation.get_step_report(i=1, columns_subset="a"), GT.GT)
5732+
5733+
# Single column (non-target)
5734+
assert isinstance(validation.get_step_report(i=1, columns_subset="b"), GT.GT)
5735+
5736+
# Multiple columns (including target)
5737+
assert isinstance(validation.get_step_report(i=1, columns_subset=["a", "b"]), GT.GT)
5738+
5739+
# Multiple columns (excluding target)
5740+
assert isinstance(validation.get_step_report(i=1, columns_subset=["b", "c"]), GT.GT)
5741+
5742+
# Using single selector
5743+
assert isinstance(validation.get_step_report(i=1, columns_subset=col("a")), GT.GT)
5744+
assert isinstance(validation.get_step_report(i=1, columns_subset=col(matches("a"))), GT.GT)
5745+
assert isinstance(validation.get_step_report(i=1, columns_subset=col(starts_with("a"))), GT.GT)
5746+
5747+
# Using multiple selectors
5748+
assert isinstance(
5749+
validation.get_step_report(i=1, columns_subset=col(starts_with("a") | matches("b"))), GT.GT
5750+
)
5751+
5752+
# Failing cases
5753+
5754+
# Single column (target)
5755+
assert isinstance(validation.get_step_report(i=3, columns_subset="a"), GT.GT)
5756+
5757+
# Single column (non-target)
5758+
assert isinstance(validation.get_step_report(i=3, columns_subset="b"), GT.GT)
5759+
5760+
# Multiple columns (including target)
5761+
assert isinstance(validation.get_step_report(i=3, columns_subset=["a", "b"]), GT.GT)
5762+
5763+
# Multiple columns (excluding target)
5764+
assert isinstance(validation.get_step_report(i=3, columns_subset=["b", "c"]), GT.GT)
5765+
5766+
# Using single selector
5767+
assert isinstance(validation.get_step_report(i=3, columns_subset=col("a")), GT.GT)
5768+
assert isinstance(validation.get_step_report(i=3, columns_subset=col(matches("a"))), GT.GT)
5769+
assert isinstance(validation.get_step_report(i=3, columns_subset=col(starts_with("a"))), GT.GT)
5770+
5771+
# Using multiple selectors
5772+
assert isinstance(
5773+
validation.get_step_report(i=3, columns_subset=col(starts_with("a") | matches("b"))), GT.GT
5774+
)
5775+
57205776

57215777
def test_get_step_report_failing_inputs():
57225778
small_table = load_dataset(dataset="small_table", tbl_type="pandas")

0 commit comments

Comments
 (0)