Skip to content

Commit 980be18

Browse files
authored
test: add coverage for TextHandler regex paths and TextHandlers.re() (#194)
2 parents cc6c0db + 5123590 commit 980be18

File tree

1 file changed

+93
-0
lines changed

1 file changed

+93
-0
lines changed

tests/parser/test_parser_advanced.py

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -250,6 +250,68 @@ def test_text_handler_regex(self):
250250
matches = text3.re(r"He l lo", clean_match=True, case_sensitive=False)
251251
assert len(matches) == 1
252252

253+
def test_text_handler_regex_check_match(self):
254+
"""Test TextHandler.re() with check_match=True returns bool"""
255+
text = TextHandler("Price: $10.99")
256+
assert text.re(r"\$[\d.]+", check_match=True) is True
257+
assert text.re(r"no-match-pattern", check_match=True) is False
258+
259+
def test_text_handler_regex_replace_entities_false(self):
260+
"""Test TextHandler.re() with replace_entities=False preserves entities"""
261+
text = TextHandler("Hello & World")
262+
results = text.re(r"&", replace_entities=False)
263+
assert len(results) == 1
264+
assert results[0] == "&"
265+
266+
def test_text_handler_regex_with_groups(self):
267+
"""Test TextHandler.re() with capture groups flattens results"""
268+
text = TextHandler("name=Alice age=30 name=Bob age=25")
269+
results = text.re(r"name=(\w+) age=(\d+)")
270+
assert len(results) == 4
271+
assert "Alice" in results
272+
assert "30" in results
273+
274+
def test_text_handler_re_first_with_default(self):
275+
"""Test TextHandler.re_first() returns default when no match"""
276+
text = TextHandler("no numbers here")
277+
result = text.re_first(r"\d+", default="N/A")
278+
assert result == "N/A"
279+
280+
def test_text_handler_re_first_returns_first_match(self):
281+
"""Test TextHandler.re_first() returns first match"""
282+
text = TextHandler("a1 b2 c3")
283+
result = text.re_first(r"\d")
284+
assert result == "1"
285+
assert isinstance(result, TextHandler)
286+
287+
def test_text_handler_clean_with_entities(self):
288+
"""Test TextHandler.clean() with remove_entities=True"""
289+
text = TextHandler("Hello\t&\nWorld")
290+
cleaned = text.clean(remove_entities=True)
291+
assert "&" not in cleaned
292+
assert "&" in cleaned
293+
assert "\t" not in cleaned
294+
assert "\n" not in cleaned
295+
296+
def test_text_handler_clean_without_entities(self):
297+
"""Test TextHandler.clean() preserves entities by default"""
298+
text = TextHandler("Hello\t&\nWorld")
299+
cleaned = text.clean(remove_entities=False)
300+
assert "&" in cleaned
301+
302+
def test_text_handler_json_valid(self):
303+
"""Test TextHandler.json() with valid JSON"""
304+
text = TextHandler('{"key": "value", "num": 42}')
305+
data = text.json()
306+
assert data["key"] == "value"
307+
assert data["num"] == 42
308+
309+
def test_text_handler_json_invalid(self):
310+
"""Test TextHandler.json() raises on invalid JSON"""
311+
text = TextHandler("not json")
312+
with pytest.raises(Exception):
313+
text.json()
314+
253315
def test_text_handlers_operations(self):
254316
"""Test TextHandlers list operations"""
255317
handlers = TextHandlers([
@@ -266,6 +328,37 @@ def test_text_handlers_operations(self):
266328
assert handlers.get("default") == "First"
267329
assert TextHandlers([]).get("default") == "default"
268330

331+
def test_text_handlers_re(self):
332+
"""Test TextHandlers.re() flattens results across all elements"""
333+
handlers = TextHandlers([
334+
TextHandler("a1 b2"),
335+
TextHandler("c3 d4"),
336+
])
337+
results = handlers.re(r"[a-z]\d")
338+
assert isinstance(results, TextHandlers)
339+
assert len(results) == 4
340+
assert results[0] == "a1"
341+
assert results[3] == "d4"
342+
343+
def test_text_handlers_re_empty(self):
344+
"""Test TextHandlers.re() on empty list"""
345+
handlers = TextHandlers([])
346+
results = handlers.re(r"\d+")
347+
assert isinstance(results, TextHandlers)
348+
assert len(results) == 0
349+
350+
def test_text_handlers_re_no_matches(self):
351+
"""Test TextHandlers.re() when no element matches"""
352+
handlers = TextHandlers([TextHandler("abc"), TextHandler("def")])
353+
results = handlers.re(r"\d+")
354+
assert len(results) == 0
355+
356+
def test_text_handlers_extract(self):
357+
"""Test TextHandlers.extract() returns self"""
358+
handlers = TextHandlers([TextHandler("a"), TextHandler("b")])
359+
assert handlers.extract() is handlers
360+
assert handlers.get_all() is handlers
361+
269362

270363
class TestSelectorsAdvanced:
271364
"""Test advanced Selectors functionality"""

0 commit comments

Comments
 (0)