Skip to content

Commit c85cb0a

Browse files
CodyCBakerPhDCody Bakerpre-commit-ci[bot]bendichter
authored
[New Check] Entire column of a table is not NaN (#231)
* saving state * added test and debug * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * refactor logic call * add early data access skip * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * add flatten for indexed cols * generalized to util function; added None slicing * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * swapped util to return only slice * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Update nwbinspector/utils.py Co-authored-by: Ben Dichter <[email protected]> * Update nwbinspector/checks/tables.py Co-authored-by: Ben Dichter <[email protected]> * Update nwbinspector/utils.py Co-authored-by: Ben Dichter <[email protected]> * Update nwbinspector/checks/tables.py Co-authored-by: Ben Dichter <[email protected]> * debug Co-authored-by: Cody Baker <[email protected]> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Ben Dichter <[email protected]>
1 parent 2ee62e2 commit c85cb0a

File tree

2 files changed

+75
-15
lines changed

2 files changed

+75
-15
lines changed

nwbinspector/checks/tables.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -188,3 +188,22 @@ def check_table_values_for_dict(table: DynamicTable, nelems: int = 200):
188188
if is_string_json_loadable(string=string):
189189
message += " This string is also JSON loadable, so call `json.loads(...)` on the string to unpack."
190190
yield InspectorMessage(message=message)
191+
192+
193+
@register_check(importance=Importance.BEST_PRACTICE_SUGGESTION, neurodata_type=DynamicTable)
194+
def check_col_not_nan(table: DynamicTable, nelems: Optional[int] = 200):
195+
"""Check if all of the values in a single column of a table are NaN."""
196+
for column in table.columns:
197+
if not hasattr(column, "data") or isinstance(column, VectorIndex) or isinstance(column.data[0], str):
198+
continue
199+
if nelems is not None and not all(np.isnan(column[:nelems]).flatten()):
200+
continue
201+
202+
if all(
203+
np.isnan(
204+
column[slice(0, None, np.ceil(len(column.data) / nelems).astype(int) if nelems else None)]
205+
).flatten()
206+
):
207+
yield InspectorMessage(
208+
message=f"Column {column.name} has all NaN values. Consider removing it from the table."
209+
)

tests/unit_tests/test_tables.py

Lines changed: 56 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
check_column_binary_capability,
1818
check_single_row,
1919
check_table_values_for_dict,
20+
check_col_not_nan,
2021
)
2122
from nwbinspector.utils import get_package_version
2223

@@ -237,8 +238,8 @@ def test_check_single_row_ignore_units():
237238

238239
def test_check_single_row_ignore_electrodes():
239240
table = ElectrodeTable(
240-
name="electrodes", # default name when building through nwbfile
241-
)
241+
name="electrodes",
242+
) # default name when building through nwbfile
242243
if get_package_version(name="pynwb") >= version.Version("2.1.0"):
243244
table.add_row(
244245
location="unknown",
@@ -291,7 +292,7 @@ def test_check_table_values_for_dict_pass():
291292
assert check_table_values_for_dict(table=table) is None
292293

293294

294-
def test_check_table_values_for_dict():
295+
def test_check_table_values_for_dict_fail():
295296
table = DynamicTable(name="test_table", description="")
296297
table.add_column(name="test_column", description="")
297298
table.add_row(test_column=str(dict(a=1)))
@@ -308,19 +309,59 @@ def test_check_table_values_for_dict():
308309
)
309310

310311

311-
def test_check_table_values_for_dict_json_case():
312+
def test_check_table_values_for_dict_json_case_fail():
312313
table = DynamicTable(name="test_table", description="")
313314
table.add_column(name="test_column", description="")
314315
table.add_row(test_column=json.dumps(dict(a=1)))
315-
assert check_table_values_for_dict(table=table)[0] == InspectorMessage(
316-
message=(
317-
"The column 'test_column' contains a string value that contains a dictionary! Please unpack "
318-
"dictionaries as additional rows or columns of the table. This string is also JSON loadable, so call "
319-
"`json.loads(...)` on the string to unpack."
316+
assert check_table_values_for_dict(table=table) == [
317+
InspectorMessage(
318+
message=(
319+
"The column 'test_column' contains a string value that contains a dictionary! Please unpack "
320+
"dictionaries as additional rows or columns of the table. This string is also JSON loadable, so call "
321+
"`json.loads(...)` on the string to unpack."
322+
),
323+
importance=Importance.BEST_PRACTICE_VIOLATION,
324+
check_function_name="check_table_values_for_dict",
325+
object_type="DynamicTable",
326+
object_name="test_table",
327+
location="/",
328+
)
329+
]
330+
331+
332+
def test_check_col_not_nan_pass():
333+
table = DynamicTable(name="test_table", description="")
334+
for name in ["test_column_not_nan", "test_column_string"]:
335+
table.add_column(name=name, description="")
336+
table.add_row(test_column_not_nan=1.0, test_column_string="abc")
337+
assert check_col_not_nan(table=table) is None
338+
339+
340+
def test_check_col_not_nan_fail():
341+
table = DynamicTable(name="test_table", description="")
342+
for name in ["test_column_not_nan_1", "test_column_nan_1", "test_column_not_nan_2", "test_column_nan_2"]:
343+
table.add_column(name=name, description="")
344+
for _ in range(400):
345+
table.add_row(
346+
test_column_not_nan_1=1.0, test_column_nan_1=np.nan, test_column_not_nan_2=1.0, test_column_nan_2=np.nan
347+
)
348+
assert check_col_not_nan(table=table) == [
349+
InspectorMessage(
350+
message="Column test_column_nan_1 has all NaN values. Consider removing it from the table.",
351+
importance=Importance.BEST_PRACTICE_SUGGESTION,
352+
check_function_name="check_col_not_nan",
353+
object_type="DynamicTable",
354+
object_name="test_table",
355+
location="/",
356+
file_path=None,
320357
),
321-
importance=Importance.BEST_PRACTICE_VIOLATION,
322-
check_function_name="check_table_values_for_dict",
323-
object_type="DynamicTable",
324-
object_name="test_table",
325-
location="/",
326-
)
358+
InspectorMessage(
359+
message="Column test_column_nan_2 has all NaN values. Consider removing it from the table.",
360+
importance=Importance.BEST_PRACTICE_SUGGESTION,
361+
check_function_name="check_col_not_nan",
362+
object_type="DynamicTable",
363+
object_name="test_table",
364+
location="/",
365+
file_path=None,
366+
),
367+
]

0 commit comments

Comments
 (0)