Skip to content

Commit 61a2644

Browse files
committed
Add notes for missing comparison columns in validation
1 parent 3adad4f commit 61a2644

File tree

2 files changed

+239
-6
lines changed

2 files changed

+239
-6
lines changed

pointblank/_constants_translations.py

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4672,4 +4672,88 @@
46724672
"th": "ไม่ตรงกับคอลัมน์ใดๆ ในตาราง",
46734673
"fa": "با هیچ ستونی در جدول مطابقت ندارد",
46744674
},
4675+
"comparison_column_provided": {
4676+
"en": "The comparison column provided",
4677+
"fr": "La colonne de comparaison fournie",
4678+
"de": "Die angegebene Vergleichsspalte",
4679+
"it": "La colonna di confronto fornita",
4680+
"es": "La columna de comparación proporcionada",
4681+
"pt": "A coluna de comparação fornecida",
4682+
"ro": "Coloana de comparație furnizată",
4683+
"tr": "Sağlanan karşılaştırma sütunu",
4684+
"zh-Hans": "提供的比较列",
4685+
"zh-Hant": "提供的比較列",
4686+
"ja": "提供された比較列",
4687+
"ko": "제공된 비교 열",
4688+
"vi": "Cột so sánh được cung cấp",
4689+
"ru": "Предоставленный столбец сравнения",
4690+
"cs": "Poskytnutý srovnávací sloupec",
4691+
"pl": "Dostarczona kolumna porównania",
4692+
"da": "Den angivne sammenligningskolonne",
4693+
"sv": "Den angivna jämförelsekolumnen",
4694+
"nb": "Den angitte sammenligningskolonnen",
4695+
"nl": "De opgegeven vergelijkingskolom",
4696+
"fi": "Annettu vertailusarake",
4697+
"is": "Uppgefinn samanburðardálkur",
4698+
"ar": "عمود المقارنة المقدم",
4699+
"hi": "प्रदान किया गया तुलना स्तंभ",
4700+
"el": "Η παρεχόμενη στήλη σύγκρισης",
4701+
"id": "Kolom perbandingan yang diberikan",
4702+
"uk": "Наданий стовпець порівняння",
4703+
"bg": "Предоставената колона за сравнение",
4704+
"hr": "Navedeni stupac usporedbe",
4705+
"et": "Esitatud võrdlusveerg",
4706+
"hu": "A megadott összehasonlítási oszlop",
4707+
"ga": "An colún comparáide a cuireadh ar fáil",
4708+
"lv": "Nodrošinātā salīdzinājuma kolonna",
4709+
"lt": "Pateiktas palyginimo stulpelis",
4710+
"mt": "Il-kolonna tal-paragun pprovduta",
4711+
"sk": "Poskytnutý porovnávací stĺpec",
4712+
"sl": "Podani primerjalni stolpec",
4713+
"he": "עמודת ההשוואה שסופקה",
4714+
"th": "คอลัมน์เปรียบเทียบที่ให้มา",
4715+
"fa": "ستون مقایسه ارائه شده",
4716+
},
4717+
"comparison_column_for": {
4718+
"en": "The comparison column provided for",
4719+
"fr": "La colonne de comparaison fournie pour",
4720+
"de": "Die angegebene Vergleichsspalte für",
4721+
"it": "La colonna di confronto fornita per",
4722+
"es": "La columna de comparación proporcionada para",
4723+
"pt": "A coluna de comparação fornecida para",
4724+
"ro": "Coloana de comparație furnizată pentru",
4725+
"tr": "Sağlanan karşılaştırma sütunu için",
4726+
"zh-Hans": "为提供的比较列",
4727+
"zh-Hant": "為提供的比較列",
4728+
"ja": "提供された比較列の",
4729+
"ko": "제공된 비교 열의",
4730+
"vi": "Cột so sánh được cung cấp cho",
4731+
"ru": "Предоставленный столбец сравнения для",
4732+
"cs": "Poskytnutý srovnávací sloupec pro",
4733+
"pl": "Dostarczona kolumna porównania dla",
4734+
"da": "Den angivne sammenligningskolonne til",
4735+
"sv": "Den angivna jämförelsekolumnen för",
4736+
"nb": "Den angitte sammenligningskolonnen for",
4737+
"nl": "De opgegeven vergelijkingskolom voor",
4738+
"fi": "Annettu vertailusarake parametrille",
4739+
"is": "Uppgefinn samanburðardálkur fyrir",
4740+
"ar": "عمود المقارنة المقدم لـ",
4741+
"hi": "प्रदान किया गया तुलना स्तंभ के लिए",
4742+
"el": "Η παρεχόμενη στήλη σύγκρισης για",
4743+
"id": "Kolom perbandingan yang diberikan untuk",
4744+
"uk": "Наданий стовпець порівняння для",
4745+
"bg": "Предоставената колона за сравнение за",
4746+
"hr": "Navedeni stupac usporedbe za",
4747+
"et": "Esitatud võrdlusveerg jaoks",
4748+
"hu": "A megadott összehasonlítási oszlop paraméterre",
4749+
"ga": "An colún comparáide a cuireadh ar fáil le haghaidh",
4750+
"lv": "Nodrošinātā salīdzinājuma kolonna priekš",
4751+
"lt": "Pateiktas palyginimo stulpelis parametrui",
4752+
"mt": "Il-kolonna tal-paragun pprovduta għal",
4753+
"sk": "Poskytnutý porovnávací stĺpec pre",
4754+
"sl": "Podani primerjalni stolpec za",
4755+
"he": "עמודת ההשוואה שסופקה עבור",
4756+
"th": "คอลัมน์เปรียบเทียบที่ให้มาสำหรับ",
4757+
"fa": "ستون مقایسه ارائه شده برای",
4758+
},
46754759
}

pointblank/validate.py

Lines changed: 155 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12682,6 +12682,7 @@ def interrogate(
1268212682
except Exception as e:
1268312683
# Catch data quality errors and column not found errors
1268412684
error_msg = str(e).lower()
12685+
1268512686
is_comparison_error = (
1268612687
"boolean value of na is ambiguous" in error_msg
1268712688
or "cannot compare" in error_msg
@@ -12691,13 +12692,20 @@ def interrogate(
1269112692
)
1269212693
or ("dtype" in error_msg and "compare" in error_msg)
1269312694
)
12695+
1269412696
is_column_not_found = "column" in error_msg and "not found" in error_msg
1269512697

12696-
if is_comparison_error or is_column_not_found: # pragma: no cover
12698+
is_comparison_column_not_found = (
12699+
"unable to find column" in error_msg and "valid columns" in error_msg
12700+
)
12701+
12702+
if (
12703+
is_comparison_error or is_column_not_found or is_comparison_column_not_found
12704+
): # pragma: no cover
1269712705
# If data quality comparison fails or column not found, mark as eval_error
1269812706
validation.eval_error = True # pragma: no cover
1269912707

12700-
# Add a note for column not found errors
12708+
# Add a note for column not found errors (target column)
1270112709
if is_column_not_found:
1270212710
note_html = _create_column_not_found_note_html(
1270312711
column_name=column,
@@ -12718,14 +12726,65 @@ def interrogate(
1271812726
text=note_text,
1271912727
)
1272012728

12729+
# Add a note for comparison column not found errors
12730+
elif is_comparison_column_not_found:
12731+
# Extract column name from error message
12732+
# Error format: 'unable to find column "col_name"; valid columns: ...'
12733+
match = re.search(r'unable to find column "([^"]+)"', str(e))
12734+
12735+
if match:
12736+
missing_col_name = match.group(1)
12737+
12738+
# Determine position for between/outside validations
12739+
position = None
12740+
if assertion_type in ["col_vals_between", "col_vals_outside"]:
12741+
# Check if missing column is in left or right position
12742+
from pointblank.column import Column
12743+
12744+
if (
12745+
isinstance(value[0], Column)
12746+
and value[0].exprs == missing_col_name
12747+
):
12748+
position = "left"
12749+
elif (
12750+
isinstance(value[1], Column)
12751+
and value[1].exprs == missing_col_name
12752+
):
12753+
position = "right"
12754+
12755+
note_html = _create_comparison_column_not_found_note_html(
12756+
column_name=missing_col_name,
12757+
position=position,
12758+
available_columns=list(data_tbl_step.columns)
12759+
if hasattr(data_tbl_step, "columns")
12760+
else [],
12761+
locale=self.locale,
12762+
)
12763+
note_text = _create_comparison_column_not_found_note_text(
12764+
column_name=missing_col_name,
12765+
position=position,
12766+
available_columns=list(data_tbl_step.columns)
12767+
if hasattr(data_tbl_step, "columns")
12768+
else [],
12769+
)
12770+
validation._add_note(
12771+
key="comparison_column_not_found",
12772+
markdown=note_html,
12773+
text=note_text,
12774+
)
12775+
1272112776
end_time = datetime.datetime.now(datetime.timezone.utc) # pragma: no cover
12777+
1272212778
validation.proc_duration_s = (
1272312779
end_time - start_time
1272412780
).total_seconds() # pragma: no cover
12781+
1272512782
validation.time_processed = end_time.isoformat(
1272612783
timespec="milliseconds"
1272712784
) # pragma: no cover
12785+
1272812786
validation.active = False # pragma: no cover
12787+
1272912788
continue # pragma: no cover
1273012789
else:
1273112790
# For other unexpected errors, let them propagate
@@ -12826,6 +12885,7 @@ def interrogate(
1282612885
markdown=threshold_note_html,
1282712886
text=threshold_note_text,
1282812887
)
12888+
1282912889
elif self.thresholds != Thresholds():
1283012890
# Thresholds explicitly reset to empty when global thresholds exist
1283112891
reset_note_html = _create_threshold_reset_note_html(locale=self.locale)
@@ -18520,8 +18580,8 @@ def _create_no_columns_resolved_note_html(
1852018580
),
1852118581
)
1852218582

18523-
# Format the column expression
18524-
col_expr_html = f"<code>{column_expr}</code>"
18583+
# Format the column expression with monospace font
18584+
col_expr_html = f"<code style='font-family: \"IBM Plex Mono\", monospace;'>{column_expr}</code>"
1852518585

1852618586
# Build the HTML note
1852718587
html = f"{intro} {col_expr_html} {no_resolve}."
@@ -18579,8 +18639,8 @@ def _create_column_not_found_note_html(
1857918639
),
1858018640
)
1858118641

18582-
# Format the column name
18583-
col_name_html = f"<code>{column_name}</code>"
18642+
# Format the column name with monospace font
18643+
col_name_html = f"<code style='font-family: \"IBM Plex Mono\", monospace;'>{column_name}</code>"
1858418644

1858518645
# Build the HTML note
1858618646
html = f"{intro} ({col_name_html}) {not_found}."
@@ -18607,6 +18667,95 @@ def _create_column_not_found_note_text(column_name: str, available_columns: list
1860718667
return f"The target column provided ({column_name}) does not match any columns in the table."
1860818668

1860918669

18670+
def _create_comparison_column_not_found_note_html(
18671+
column_name: str, position: str | None, available_columns: list[str], locale: str = "en"
18672+
) -> str:
18673+
"""
18674+
Create an HTML note explaining that a comparison column was not found.
18675+
18676+
Parameters
18677+
----------
18678+
column_name
18679+
The comparison column name that was not found.
18680+
position
18681+
Optional position indicator ("left", "right") for between/outside validations.
18682+
available_columns
18683+
List of available column names in the table.
18684+
locale
18685+
The locale string (e.g., 'en', 'fr').
18686+
18687+
Returns
18688+
-------
18689+
str
18690+
HTML-formatted note text.
18691+
"""
18692+
# Get translated strings
18693+
intro = NOTES_TEXT.get("comparison_column_provided", {}).get(
18694+
locale,
18695+
NOTES_TEXT.get("comparison_column_provided", {}).get(
18696+
"en", "The comparison column provided"
18697+
),
18698+
)
18699+
intro_with_for = NOTES_TEXT.get("comparison_column_for", {}).get(
18700+
locale,
18701+
NOTES_TEXT.get("comparison_column_for", {}).get("en", "The comparison column provided for"),
18702+
)
18703+
not_found = NOTES_TEXT.get("does_not_match_any_columns", {}).get(
18704+
locale,
18705+
NOTES_TEXT.get("does_not_match_any_columns", {}).get(
18706+
"en", "does not match any columns in the table"
18707+
),
18708+
)
18709+
18710+
# Format the column name with monospace font
18711+
col_name_html = f"<code style='font-family: \"IBM Plex Mono\", monospace;'>{column_name}</code>"
18712+
18713+
# Add position if provided (for between/outside validations)
18714+
if position:
18715+
# Format position parameter with monospace font (e.g., "left=", "right=")
18716+
position_param = (
18717+
f"<code style='font-family: \"IBM Plex Mono\", monospace;'>{position}=</code>"
18718+
)
18719+
# Use the "for" version of the intro text
18720+
html = f"{intro_with_for} {position_param} ({col_name_html}) {not_found}."
18721+
else:
18722+
# Use the standard intro text without "for"
18723+
html = f"{intro} ({col_name_html}) {not_found}."
18724+
18725+
return html
18726+
18727+
18728+
def _create_comparison_column_not_found_note_text(
18729+
column_name: str, position: str | None, available_columns: list[str]
18730+
) -> str:
18731+
"""
18732+
Create a plain text note explaining that a comparison column was not found.
18733+
18734+
Parameters
18735+
----------
18736+
column_name
18737+
The comparison column name that was not found.
18738+
position
18739+
Optional position indicator ("left", "right") for between/outside validations.
18740+
available_columns
18741+
List of available column names in the table.
18742+
18743+
Returns
18744+
-------
18745+
str
18746+
Plain text note.
18747+
"""
18748+
if position:
18749+
position_text = f" for {position}="
18750+
else:
18751+
position_text = ""
18752+
18753+
return (
18754+
f"The comparison column provided{position_text} ({column_name}) "
18755+
f"does not match any columns in the table."
18756+
)
18757+
18758+
1861018759
def _create_col_schema_match_note_html(schema_info: dict, locale: str = "en") -> str:
1861118760
"""
1861218761
Create an HTML note with collapsible schema expectation and results.

0 commit comments

Comments
 (0)