Skip to content

Commit 8e85ed6

Browse files
[Add Check]: string-valued table elements contain dictionaries (#163)
* added util, check, and tests for both * yield over cols * narrowed logic * added failing test * fix test names * fixed test * added json sub-case and test Co-authored-by: Ben Dichter <[email protected]>
1 parent 361885a commit 8e85ed6

File tree

4 files changed

+167
-2
lines changed

4 files changed

+167
-2
lines changed

nwbinspector/checks/tables.py

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
from pynwb.file import TimeIntervals, Units
99

1010
from ..register_checks import register_check, InspectorMessage, Importance
11-
from ..utils import format_byte_size, is_ascending_series
11+
from ..utils import format_byte_size, is_ascending_series, is_dict_in_string, is_string_json_loadable
1212

1313

1414
@register_check(importance=Importance.CRITICAL, neurodata_type=DynamicTableRegion)
@@ -159,6 +159,23 @@ def check_single_row(
159159
)
160160

161161

162+
@register_check(importance=Importance.BEST_PRACTICE_VIOLATION, neurodata_type=DynamicTable)
163+
def check_table_values_for_dict(table: DynamicTable, nelems: int = 200):
164+
"""Check if any values in a row or column of a table contain a string casting of a Python dictionary."""
165+
for column in table.columns:
166+
if not hasattr(column, "data") or isinstance(column, VectorIndex) or not isinstance(column.data[0], str):
167+
continue
168+
for string in column.data[:nelems]:
169+
if is_dict_in_string(string=string):
170+
message = (
171+
f"The column '{column.name}' contains a string value that contains a dictionary! Please "
172+
"unpack dictionaries as additional rows or columns of the table."
173+
)
174+
if is_string_json_loadable(string=string):
175+
message += " This string is also JSON loadable, so call `json.loads(...)` on the string to unpack."
176+
yield InspectorMessage(message=message)
177+
178+
162179
# @register_check(importance="Best Practice Violation", neurodata_type=pynwb.core.DynamicTable)
163180
# def check_column_data_is_not_none(nwbfile):
164181
# """Check column values in DynamicTable to enssure they are not None."""

nwbinspector/utils.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
"""Commonly reused logic for evaluating conditions; must not have external dependencies."""
2+
import re
3+
import json
24
import numpy as np
35
from typing import TypeVar, Optional, List
46
from pathlib import Path
@@ -7,6 +9,8 @@
79
FilePathType = TypeVar("FilePathType", str, Path)
810
OptionalListOfStrings = Optional[List[str]]
911

12+
dict_regex = r"({.+:.+})"
13+
1014

1115
def format_byte_size(byte_size: int, units: str = "SI"):
1216
"""
@@ -46,3 +50,25 @@ def check_regular_series(series: np.ndarray, tolerance_decimals: int = 9):
4650

4751
def is_ascending_series(series: np.ndarray, nelems=None):
4852
return np.all(np.diff(series[:nelems]) > 0)
53+
54+
55+
def is_dict_in_string(string: str):
56+
"""
57+
Determine if the string value contains an encoded Python dictionary.
58+
59+
Can also be the direct results of string casting a dictionary, *e.g.*, ``str(dict(a=1))``.
60+
"""
61+
return any(re.findall(pattern=dict_regex, string=string))
62+
63+
64+
def is_string_json_loadable(string: str):
65+
"""
66+
Determine if the serialized dictionary is a JSON object.
67+
68+
Rather than constructing a complicated regex pattern, a simple try/except of the json.load should suffice.
69+
"""
70+
try:
71+
json.loads(string)
72+
return True
73+
except json.JSONDecodeError:
74+
return False

tests/test_utils.py

Lines changed: 72 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from hdmf.testing import TestCase
22

3-
from nwbinspector.utils import format_byte_size, check_regular_series
3+
from nwbinspector import Importance
4+
from nwbinspector.utils import format_byte_size, check_regular_series, is_dict_in_string
45

56

67
def test_format_byte_size():
@@ -23,3 +24,73 @@ def test_format_byte_size_units_exception(self):
2324
def test_check_regular_series():
2425
assert check_regular_series(series=[1, 2, 3])
2526
assert not check_regular_series(series=[1, 2, 4])
27+
28+
29+
def test_is_dict_in_string_false_1():
30+
string = "not a dict"
31+
assert is_dict_in_string(string=string) is False
32+
33+
34+
def test_is_dict_in_string_false_2():
35+
string = "not a dict, {but also fancy format text!}"
36+
assert is_dict_in_string(string=string) is False
37+
38+
39+
def test_is_dict_in_string_false_3():
40+
string = "[not] a dict, {[but] also} fancy format text!"
41+
assert is_dict_in_string(string=string) is False
42+
43+
44+
def test_is_dict_in_string_true_1():
45+
string = str(dict(a=1))
46+
assert is_dict_in_string(string=string) is True
47+
48+
49+
def test_is_dict_in_string_true_2():
50+
string = str([dict(a=1), dict(b=2)])
51+
assert is_dict_in_string(string=string) is True
52+
53+
54+
def test_is_dict_in_string_true_3():
55+
string = str(dict(a=dict(b=2)))
56+
assert is_dict_in_string(string=string) is True
57+
58+
59+
def test_is_dict_in_string_true_4():
60+
string = "some text: {'then': 'a dict'}"
61+
assert is_dict_in_string(string=string) is True
62+
63+
64+
def test_is_dict_in_string_true_5():
65+
string = "{'then': 'a dict'} more text"
66+
assert is_dict_in_string(string=string) is True
67+
68+
69+
def test_is_dict_in_string_true_6():
70+
"""Not a JSON encodable object."""
71+
string = str({1.2: Importance.CRITICAL})
72+
assert is_dict_in_string(string=string) is True
73+
74+
75+
def test_is_dict_in_string_true_7():
76+
"""
77+
A more aggressive demonstration of the general dictionary regex.
78+
79+
But it is technically possible to achieve via `str({custom_object_1: custom_object_2})` if 'custom_object_1' is
80+
hashable and both custom objects have manual `__repr__` that do not include apostrophe's within their return.
81+
82+
Example
83+
-------
84+
from dataclasses import dataclass
85+
86+
@dataclass
87+
class Test():
88+
prop = 1
89+
90+
def __repr__(self):
91+
return "This is a test"
92+
93+
str({1: Test()})
94+
"""
95+
string = "example, {this is not a dict: but it sure looks like one}!"
96+
assert is_dict_in_string(string=string) is True

tests/unit_tests/test_tables.py

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import platform
2+
import json
23
from unittest import TestCase
34

45
import pytest
@@ -13,6 +14,7 @@
1314
check_dynamic_table_region_data_validity,
1415
check_column_binary_capability,
1516
check_single_row,
17+
check_table_values_for_dict,
1618
)
1719
from nwbinspector.register_checks import InspectorMessage, Importance
1820

@@ -258,6 +260,55 @@ def test_check_single_row_fail():
258260
)
259261

260262

263+
def test_check_table_values_for_dict_non_str():
264+
table = DynamicTable(name="test_table", description="")
265+
table.add_column(name="test_column", description="")
266+
table.add_row(test_column=123)
267+
assert check_table_values_for_dict(table=table) is None
268+
269+
270+
def test_check_table_values_for_dict_pass():
271+
table = DynamicTable(name="test_table", description="")
272+
table.add_column(name="test_column", description="")
273+
table.add_row(test_column="123")
274+
assert check_table_values_for_dict(table=table) is None
275+
276+
277+
def test_check_table_values_for_dict():
278+
table = DynamicTable(name="test_table", description="")
279+
table.add_column(name="test_column", description="")
280+
table.add_row(test_column=str(dict(a=1)))
281+
assert check_table_values_for_dict(table=table)[0] == InspectorMessage(
282+
message=(
283+
"The column 'test_column' contains a string value that contains a dictionary! Please unpack "
284+
"dictionaries as additional rows or columns of the table."
285+
),
286+
importance=Importance.BEST_PRACTICE_VIOLATION,
287+
check_function_name="check_table_values_for_dict",
288+
object_type="DynamicTable",
289+
object_name="test_table",
290+
location="/",
291+
)
292+
293+
294+
def test_check_table_values_for_dict_json_case():
295+
table = DynamicTable(name="test_table", description="")
296+
table.add_column(name="test_column", description="")
297+
table.add_row(test_column=json.dumps(dict(a=1)))
298+
assert check_table_values_for_dict(table=table)[0] == InspectorMessage(
299+
message=(
300+
"The column 'test_column' contains a string value that contains a dictionary! Please unpack "
301+
"dictionaries as additional rows or columns of the table. This string is also JSON loadable, so call "
302+
"`json.loads(...)` on the string to unpack."
303+
),
304+
importance=Importance.BEST_PRACTICE_VIOLATION,
305+
check_function_name="check_table_values_for_dict",
306+
object_type="DynamicTable",
307+
object_name="test_table",
308+
location="/",
309+
)
310+
311+
261312
@pytest.mark.skip(reason="TODO")
262313
def test_check_column_data_is_not_none():
263314
pass

0 commit comments

Comments
 (0)