3737 Sequence ,
3838 Set ,
3939 Tuple ,
40+ Callable ,
4041)
4142
4243# autogenerated by setuptools_scm
@@ -829,6 +830,34 @@ def apply_uri_ignore_words(
829830 return check_matches
830831
831832
833+ def line_tokenizer_factory (
834+ uri_ignore_words : Set [str ],
835+ uri_regex : Pattern [str ],
836+ word_regex : Pattern [str ],
837+ ignore_word_regex : Optional [Pattern [str ]],
838+ ) -> Callable [[str ], Iterable [re .Match [str ]]]:
839+ def line_tokenizer (line : str ) -> Iterable [Match [str ]]:
840+ # If all URI spelling errors will be ignored, erase any URI before
841+ # extracting words. Otherwise, apply ignores after extracting words.
842+ # This ensures that if a URI ignore word occurs both inside a URI and
843+ # outside, it will still be a spelling error.
844+ if "*" in uri_ignore_words :
845+ line = uri_regex .sub (" " , line )
846+ check_matches = extract_words_iter (line , word_regex , ignore_word_regex )
847+ if "*" not in uri_ignore_words :
848+ check_matches = apply_uri_ignore_words (
849+ check_matches ,
850+ line ,
851+ word_regex ,
852+ ignore_word_regex ,
853+ uri_regex ,
854+ uri_ignore_words ,
855+ )
856+ return check_matches
857+
858+ return line_tokenizer
859+
860+
832861def parse_file (
833862 filename : str ,
834863 colors : TermColors ,
@@ -906,6 +935,13 @@ def parse_file(
906935 except OSError :
907936 return bad_count
908937
938+ line_tokenizer = line_tokenizer_factory (
939+ uri_ignore_words ,
940+ uri_regex ,
941+ word_regex ,
942+ ignore_word_regex ,
943+ )
944+
909945 for i , line in enumerate (lines ):
910946 if line .rstrip () in exclude_lines :
911947 continue
@@ -922,23 +958,7 @@ def parse_file(
922958 fixed_words = set ()
923959 asked_for = set ()
924960
925- # If all URI spelling errors will be ignored, erase any URI before
926- # extracting words. Otherwise, apply ignores after extracting words.
927- # This ensures that if a URI ignore word occurs both inside a URI and
928- # outside, it will still be a spelling error.
929- if "*" in uri_ignore_words :
930- line = uri_regex .sub (" " , line )
931- check_matches = extract_words_iter (line , word_regex , ignore_word_regex )
932- if "*" not in uri_ignore_words :
933- check_matches = apply_uri_ignore_words (
934- check_matches ,
935- line ,
936- word_regex ,
937- ignore_word_regex ,
938- uri_regex ,
939- uri_ignore_words ,
940- )
941- for match in check_matches :
961+ for match in line_tokenizer (line ):
942962 word = match .group ()
943963 if word in ignore_words_cased :
944964 continue
0 commit comments