Skip to content

Commit 08e9530

Browse files
committed
make clean_str_attr function more robust
1 parent 56144bc commit 08e9530

File tree

2 files changed

+56
-8
lines changed

2 files changed

+56
-8
lines changed

src/pynxtools/dataconverter/helpers.py

Lines changed: 19 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1191,17 +1191,28 @@ def nested_dict_to_slash_separated_path(
11911191

11921192

11931193
def clean_str_attr(
1194-
attr: Optional[Union[str, bytes]], encoding="utf-8"
1194+
attr: Optional[Union[str, bytes]], encoding: str = "utf-8"
11951195
) -> Optional[str]:
11961196
"""
1197-
Cleans the string attribute which means it will decode bytes to str if necessary.
1198-
If `attr` is not str, bytes or None it raises a TypeError.
1197+
Return the attribute as a string.
1198+
1199+
- If `attr` is `bytes`, decode it using the given encoding.
1200+
- If `attr` is already a string, return it unchanged.
1201+
- If `attr` is `None`, return `None`.
1202+
- Otherwise, raise TypeError.
1203+
1204+
Args:
1205+
attr: A string, bytes, or None.
1206+
encoding: The character encoding to use when decoding bytes.
1207+
1208+
Returns:
1209+
The attribute as a string, or None if input was None.
1210+
1211+
Raises:
1212+
TypeError: If `attr` is not str, bytes, or None.
11991213
"""
1200-
if attr is None:
1214+
if attr is None or isinstance(attr, str):
12011215
return attr
12021216
if isinstance(attr, bytes):
12031217
return attr.decode(encoding)
1204-
if isinstance(attr, str):
1205-
return attr
1206-
1207-
return attr
1218+
raise TypeError(f"Expected str, bytes, or None; got {type(attr).__name__}")

tests/dataconverter/test_helpers.py

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -217,3 +217,40 @@ def test_warning_on_definition_changed_by_reader(caplog):
217217

218218
assert "/ENTRY[entry]/definition" in template.keys()
219219
assert template["/ENTRY[entry]/definition"] == "NXtest"
220+
221+
222+
@pytest.mark.parametrize(
223+
"attr,encoding,expected",
224+
[
225+
(None, "utf-8", None), # None stays None
226+
("hello", "utf-8", "hello"), # string unchanged
227+
(b"hello", "utf-8", "hello"), # UTF-8 bytes
228+
("café".encode("latin-1"), "latin-1", "café"), # custom encoding
229+
("café".encode(), "utf-8", "café"), # UTF-8 with Unicode
230+
("", "utf-8", ""), # empty string
231+
(b"", "utf-8", ""), # empty bytes
232+
],
233+
)
234+
def test_clean_str_attr_valid(attr, encoding, expected):
235+
assert helpers.clean_str_attr(attr, encoding) == expected
236+
237+
238+
@pytest.mark.parametrize(
239+
"attr,expected_type",
240+
[
241+
(123, "int"),
242+
([b"test"], "list"),
243+
({}, "dict"),
244+
],
245+
)
246+
def test_clean_str_attr_invalid_type(attr, expected_type):
247+
with pytest.raises(
248+
TypeError, match=f"Expected str, bytes, or None; got {expected_type}"
249+
):
250+
helpers.clean_str_attr(attr)
251+
252+
253+
def test_clean_str_attr_invalid_encoding():
254+
with pytest.raises(UnicodeDecodeError):
255+
# invalid in UTF-8
256+
helpers.clean_str_attr(b"\xff", encoding="utf-8")

0 commit comments

Comments
 (0)