Skip to content

Commit 30f0802

Browse files
committed
feat (server): new method to check regex regions of a text
1 parent 3e70695 commit 30f0802

File tree

3 files changed

+60
-1
lines changed

3 files changed

+60
-1
lines changed

β€Žcoverage-badge.svgβ€Ž

Lines changed: 1 addition & 1 deletion
Loading

β€Žlanguage_tool_python/server.pyβ€Ž

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import json
77
import logging
88
import random
9+
import re
910
import socket
1011
import subprocess
1112
import time
@@ -610,6 +611,41 @@ def check(self, text: str) -> List[Match]:
610611
matches = response["matches"]
611612
return [Match(match, text) for match in matches]
612613

614+
def check_matching_regions(
615+
self, text: str, pattern: str, flags: int = 0
616+
) -> List[Match]:
617+
"""
618+
Check only the parts of the text that match a regex pattern.
619+
The returned Match objects can be applied to the original text with
620+
:func:`language_tool_python.utils.correct`.
621+
622+
:param text: The full text.
623+
:param pattern: Regular expression defining the regions to check
624+
:param flags: Regex flags (re.IGNORECASE, re.MULTILINE, etc.)
625+
:return: List of Match with offsets adjusted to the original text
626+
:rtype: List[Match]
627+
"""
628+
629+
# Find all matching regions
630+
matches_iter = re.finditer(pattern, text, flags)
631+
regions = [(m.start(), m.group()) for m in matches_iter]
632+
633+
if not regions:
634+
return [] # No regions to check
635+
636+
all_matches: List[Match] = []
637+
638+
for start_offset, region_text in regions:
639+
region_matches = self.check(region_text)
640+
641+
# Adjust offsets for the original text
642+
for match in region_matches:
643+
match.offset += start_offset
644+
645+
all_matches.extend(region_matches)
646+
647+
return sorted(all_matches, key=lambda m: m.offset)
648+
613649
def _create_params(self, text: str) -> Dict[str, str]:
614650
"""
615651
Create a dictionary of parameters for the language tool server request.

β€Žtests/test_match.pyβ€Ž

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -182,3 +182,26 @@ def test_special_char_in_text() -> None:
182182
tool.correct(text)
183183
== "The sun was setting πŸŒ…, casting a warm glow over the park. Birds chipped softly 🐦 as the day slowly fade into night."
184184
)
185+
186+
187+
def test_check_with_regex() -> None:
188+
"""
189+
Test the check_matching_regions method for selective grammar checking.
190+
This test verifies that LanguageTool can limit its grammar checking to specific
191+
regions of text defined by a regular expression, allowing for targeted error detection.
192+
Additionally, the test is performed with some special characters in the text to ensure
193+
correct handling of offsets.
194+
195+
:raises AssertionError: If the detected matches do not correspond to the specified regions.
196+
"""
197+
import language_tool_python
198+
199+
with language_tool_python.LanguageTool("en-US") as tool:
200+
text = '❗ He said "❗ I has a problem" but she replied ❗ "It are fine ❗".'
201+
matches = tool.check_matching_regions(text, r'"[^"]*"')
202+
203+
assert len(matches) == 2
204+
assert (
205+
language_tool_python.utils.correct(text, matches)
206+
== '❗ He said "❗ I have a problem" but she replied ❗ "It is fine ❗".'
207+
)

0 commit comments

Comments
Β (0)