Skip to content

Commit 5061e7d

Browse files
authored
Merge pull request #298 from posit-dev/ci-extra-tests
ci: add more tests
2 parents c83bc43 + 0ae0319 commit 5061e7d

16 files changed

+788
-217
lines changed

pointblank/_interrogation.py

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -119,17 +119,17 @@ def _safe_is_nan_or_null_expr(data_frame: Any, column_expr: Any, column_name: st
119119
# The namespace is the actual module, so we check its name
120120
if hasattr(native_namespace, "__name__") and "ibis" in native_namespace.__name__:
121121
return null_check
122-
except Exception:
123-
pass
122+
except Exception: # pragma: no cover
123+
pass # pragma: no cover
124124

125125
# For non-Ibis backends, try to use `is_nan()` if the column type supports it
126126
try:
127127
if hasattr(data_frame, "collect_schema"):
128128
schema = data_frame.collect_schema()
129129
elif hasattr(data_frame, "schema"):
130130
schema = data_frame.schema
131-
else:
132-
schema = None
131+
else: # pragma: no cover
132+
schema = None # pragma: no cover
133133

134134
if schema and column_name:
135135
column_dtype = schema.get(column_name)
@@ -148,8 +148,8 @@ def _safe_is_nan_or_null_expr(data_frame: Any, column_expr: Any, column_name: st
148148
except Exception:
149149
# If `is_nan()` fails for any reason, fall back to Null only
150150
pass
151-
except Exception:
152-
pass
151+
except Exception: # pragma: no cover
152+
pass # pragma: no cover
153153

154154
# Fallback: just check Null values
155155
return null_check
@@ -370,7 +370,7 @@ def _get_pyspark_results(self):
370370
else:
371371
raise TypeError(
372372
f"Expression returned {type(expr_result)}, expected PySpark Column"
373-
)
373+
) # pragma: no cover
374374

375375
except Exception as e:
376376
try:
@@ -382,7 +382,9 @@ def _get_pyspark_results(self):
382382
pyspark_expr = col_expr.to_pyspark_expr(self.data_tbl)
383383
pyspark_columns.append(pyspark_expr)
384384
else:
385-
raise TypeError(f"Cannot convert {type(col_expr)} to PySpark Column")
385+
raise TypeError(
386+
f"Cannot convert {type(col_expr)} to PySpark Column"
387+
) # pragma: no cover
386388
except Exception as nested_e:
387389
print(f"Error evaluating PySpark expression: {e} -> {nested_e}")
388390

@@ -656,7 +658,7 @@ def col_vals_expr(data_tbl: FrameT, expr, tbl_type: str = "local"):
656658
return data_tbl.assign(pb_is_good_=expr)
657659

658660
# For remote backends, return original table (placeholder)
659-
return data_tbl
661+
return data_tbl # pragma: no cover
660662

661663

662664
def rows_complete(data_tbl: FrameT, columns_subset: list[str] | None):

pointblank/_utils.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ def _get_tbl_type(data: FrameT | Any) -> str:
102102
if "read_parquet" in tbl_name:
103103
return "parquet"
104104

105-
else:
105+
else: # pragma: no cover
106106
return "duckdb"
107107

108108
return backend
@@ -274,10 +274,10 @@ def _copy_dataframe(df):
274274
import copy
275275

276276
return copy.deepcopy(df)
277-
except Exception:
277+
except Exception: # pragma: no cover
278278
# If all else fails, return the original DataFrame
279279
# This is better than crashing the validation
280-
return df
280+
return df # pragma: no cover
281281

282282

283283
def _convert_to_narwhals(df: FrameT) -> nw.DataFrame:

pointblank/_utils_ai.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -791,15 +791,15 @@ def validate_batch(batch: Dict[str, Any]) -> List[Dict[str, Any]]:
791791
logger.info(f"Successfully validated batch {batch['batch_id']}")
792792
return results
793793

794-
except Exception as e:
794+
except Exception as e: # pragma: no cover
795795
logger.error(
796796
f"Failed to validate batch {batch['batch_id']}: {e}"
797797
) # pragma: no cover
798798
# Return default results (all False) for failed batches
799-
default_results = []
800-
for i in range(batch["start_row"], batch["end_row"]):
801-
default_results.append({"index": i, "result": False})
802-
return default_results
799+
default_results = [] # pragma: no cover
800+
for i in range(batch["start_row"], batch["end_row"]): # pragma: no cover
801+
default_results.append({"index": i, "result": False}) # pragma: no cover
802+
return default_results # pragma: no cover
803803

804804
# Execute all batch validations sequentially (chatlas is synchronous)
805805
final_results = []

pointblank/column.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -219,7 +219,7 @@ def resolve(self, table) -> list[str]:
219219
# Use `collect_schema()` for LazyFrame to avoid performance warnings
220220
if hasattr(selected_df, "collect_schema"):
221221
return list(selected_df.collect_schema().keys())
222-
else:
222+
else: # pragma: no cover
223223
return list(selected_df.columns)
224224

225225

pointblank/datascan.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -143,17 +143,17 @@ def __init__(self, data: IntoFrameT, tbl_name: str | None = None) -> None:
143143
for conv_method in valid_conversion_methods:
144144
try:
145145
valid_native = getattr(ibis_native, conv_method)()
146-
except (NotImplementedError, ImportError, ModuleNotFoundError):
147-
continue
146+
except (NotImplementedError, ImportError, ModuleNotFoundError): # pragma: no cover
147+
continue # pragma: no cover
148148
break
149-
else:
149+
else: # pragma: no cover
150150
msg = (
151151
"To use `ibis` as input, you must have one of arrow, pandas, polars or numpy "
152152
"available in the process. Until `ibis` is fully supported by Narwhals, this is "
153153
"necessary. Additionally, the data must be collected in order to calculate some "
154154
"structural statistics, which may be performance detrimental."
155155
)
156-
raise ImportError(msg)
156+
raise ImportError(msg) # pragma: no cover
157157
as_native = nw.from_native(valid_native)
158158

159159
self.nw_data: Frame = nw.from_native(as_native)

pointblank/scan_profile.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -299,12 +299,12 @@ def as_dataframe(self, *, strict: bool = True) -> DataFrame:
299299
# instantiations that require consistent types.
300300
all_same_type: bool = all(type(v) is first_type for v in values[1:])
301301
if not all_same_type:
302-
if strict:
303-
msg = f"Some types in {key!s} stat are different. Turn off `strict` to bypass."
304-
raise TypeError(msg)
305-
for d in cols:
306-
if key in d:
307-
d[key] = str(d[key])
302+
if strict: # pragma: no cover
303+
msg = f"Some types in {key!s} stat are different. Turn off `strict` to bypass." # pragma: no cover
304+
raise TypeError(msg) # pragma: no cover
305+
for d in cols: # pragma: no cover
306+
if key in d: # pragma: no cover
307+
d[key] = str(d[key]) # pragma: no cover
308308

309309
return nw.from_dict(transpose_dicts(cols), backend=self.implementation)
310310

pointblank/schema.py

Lines changed: 8 additions & 82 deletions
Original file line numberDiff line numberDiff line change
@@ -343,15 +343,15 @@ def _collect_schema_from_table(self):
343343
schema_dict = {k: str(v) for k, v in schema_dict.items()}
344344
self.columns = list(schema_dict.items())
345345

346-
elif table_type == "pyspark":
346+
elif table_type == "pyspark": # pragma: no cover
347347
# Convert PySpark DataFrame to Narwhals to get schema
348-
nw_df = nw.from_native(self.tbl)
349-
if _is_lazy_frame(data=nw_df):
350-
schema_dict = dict(nw_df.collect_schema())
351-
else:
352-
schema_dict = dict(nw_df.schema.items())
353-
schema_dict = {k: str(v) for k, v in schema_dict.items()}
354-
self.columns = list(schema_dict.items())
348+
nw_df = nw.from_native(self.tbl) # pragma: no cover
349+
if _is_lazy_frame(data=nw_df): # pragma: no cover
350+
schema_dict = dict(nw_df.collect_schema()) # pragma: no cover
351+
else: # pragma: no cover
352+
schema_dict = dict(nw_df.schema.items()) # pragma: no cover
353+
schema_dict = {k: str(v) for k, v in schema_dict.items()} # pragma: no cover
354+
self.columns = list(schema_dict.items()) # pragma: no cover
355355

356356
elif table_type in IBIS_BACKENDS:
357357
schema_dict = dict(self.tbl.schema().items())
@@ -888,80 +888,6 @@ def _schema_info_generate_params_dict(
888888
}
889889

890890

891-
def _check_schema_match(
892-
data_tbl: any,
893-
schema: Schema,
894-
complete: bool = True,
895-
in_order: bool = True,
896-
case_sensitive_colnames: bool = True,
897-
case_sensitive_dtypes: bool = True,
898-
full_match_dtypes: bool = True,
899-
) -> bool:
900-
"""
901-
Check if the schema matches the target table.
902-
903-
This function performs schema validation and returns a boolean result.
904-
905-
Parameters
906-
----------
907-
data_tbl
908-
The target table to validate.
909-
schema
910-
The expected schema.
911-
complete
912-
Whether the schema should be complete.
913-
in_order
914-
Whether the schema should be in order.
915-
case_sensitive_colnames
916-
Whether column names are case-sensitive.
917-
case_sensitive_dtypes
918-
Whether data types are case-sensitive.
919-
full_match_dtypes
920-
Whether data types must match exactly.
921-
922-
Returns
923-
-------
924-
bool
925-
True if the schema matches, False otherwise.
926-
"""
927-
validation_info = _get_schema_validation_info(
928-
data_tbl=data_tbl,
929-
schema=schema,
930-
passed=False, # This will be determined by the logic below
931-
complete=complete,
932-
in_order=in_order,
933-
case_sensitive_colnames=case_sensitive_colnames,
934-
case_sensitive_dtypes=case_sensitive_dtypes,
935-
full_match_dtypes=full_match_dtypes,
936-
)
937-
938-
# Determine if the schema validation passed based on the validation info
939-
passed = True
940-
941-
# Check completeness requirement
942-
if complete and not validation_info["columns_full_set"]:
943-
passed = False
944-
945-
# Check order requirement
946-
if in_order and not validation_info["columns_matched_in_order"]:
947-
passed = False
948-
949-
# Check if all expected columns were found
950-
if validation_info["columns_not_found"]:
951-
passed = False
952-
953-
# Check column-specific validations
954-
for col_info in validation_info["columns"].values():
955-
if not col_info["colname_matched"]:
956-
passed = False
957-
if not col_info.get(
958-
"dtype_matched", True
959-
): # dtype_matched may not exist if no dtypes specified
960-
passed = False
961-
962-
return passed
963-
964-
965891
def _get_schema_validation_info(
966892
data_tbl: any,
967893
schema: Schema,

0 commit comments

Comments
 (0)