Skip to content

Commit 1366fff

Browse files
[3.12] pythongh-124130: Increase test coverage for \b and \B in regular expressions (pythonGH-124330) (pythonGH-124414)
(cherry picked from commit b82f076) Co-authored-by: Serhiy Storchaka <[email protected]>
1 parent d26d0a1 commit 1366fff

File tree

1 file changed

+113
-7
lines changed

1 file changed

+113
-7
lines changed

Lib/test/test_re.py

Lines changed: 113 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -820,31 +820,137 @@ def test_named_unicode_escapes(self):
820820
self.checkPatternError(br'\N{LESS-THAN SIGN}', r'bad escape \N', 0)
821821
self.checkPatternError(br'[\N{LESS-THAN SIGN}]', r'bad escape \N', 1)
822822

823-
def test_string_boundaries(self):
823+
def test_word_boundaries(self):
824824
# See http://bugs.python.org/issue10713
825-
self.assertEqual(re.search(r"\b(abc)\b", "abc").group(1),
826-
"abc")
825+
self.assertEqual(re.search(r"\b(abc)\b", "abc").group(1), "abc")
826+
self.assertEqual(re.search(r"\b(abc)\b", "abc", re.ASCII).group(1), "abc")
827+
self.assertEqual(re.search(br"\b(abc)\b", b"abc").group(1), b"abc")
828+
self.assertEqual(re.search(br"\b(abc)\b", b"abc", re.LOCALE).group(1), b"abc")
829+
self.assertEqual(re.search(r"\b(ьюя)\b", "ьюя").group(1), "ьюя")
830+
self.assertIsNone(re.search(r"\b(ьюя)\b", "ьюя", re.ASCII))
831+
# There's a word boundary between a word and a non-word.
832+
self.assertTrue(re.match(r".\b", "a="))
833+
self.assertTrue(re.match(r".\b", "a=", re.ASCII))
834+
self.assertTrue(re.match(br".\b", b"a="))
835+
self.assertTrue(re.match(br".\b", b"a=", re.LOCALE))
836+
self.assertTrue(re.match(r".\b", "я="))
837+
self.assertIsNone(re.match(r".\b", "я=", re.ASCII))
838+
# There's a word boundary between a non-word and a word.
839+
self.assertTrue(re.match(r".\b", "=a"))
840+
self.assertTrue(re.match(r".\b", "=a", re.ASCII))
841+
self.assertTrue(re.match(br".\b", b"=a"))
842+
self.assertTrue(re.match(br".\b", b"=a", re.LOCALE))
843+
self.assertTrue(re.match(r".\b", "=я"))
844+
self.assertIsNone(re.match(r".\b", "=я", re.ASCII))
845+
# There is no word boundary inside a word.
846+
self.assertIsNone(re.match(r".\b", "ab"))
847+
self.assertIsNone(re.match(r".\b", "ab", re.ASCII))
848+
self.assertIsNone(re.match(br".\b", b"ab"))
849+
self.assertIsNone(re.match(br".\b", b"ab", re.LOCALE))
850+
self.assertIsNone(re.match(r".\b", "юя"))
851+
self.assertIsNone(re.match(r".\b", "юя", re.ASCII))
852+
# There is no word boundary between a non-word characters.
853+
self.assertIsNone(re.match(r".\b", "=-"))
854+
self.assertIsNone(re.match(r".\b", "=-", re.ASCII))
855+
self.assertIsNone(re.match(br".\b", b"=-"))
856+
self.assertIsNone(re.match(br".\b", b"=-", re.LOCALE))
857+
# There is no non-boundary match between a word and a non-word.
858+
self.assertIsNone(re.match(r".\B", "a="))
859+
self.assertIsNone(re.match(r".\B", "a=", re.ASCII))
860+
self.assertIsNone(re.match(br".\B", b"a="))
861+
self.assertIsNone(re.match(br".\B", b"a=", re.LOCALE))
862+
self.assertIsNone(re.match(r".\B", "я="))
863+
self.assertTrue(re.match(r".\B", "я=", re.ASCII))
864+
# There is no non-boundary match between a non-word and a word.
865+
self.assertIsNone(re.match(r".\B", "=a"))
866+
self.assertIsNone(re.match(r".\B", "=a", re.ASCII))
867+
self.assertIsNone(re.match(br".\B", b"=a"))
868+
self.assertIsNone(re.match(br".\B", b"=a", re.LOCALE))
869+
self.assertIsNone(re.match(r".\B", "=я"))
870+
self.assertTrue(re.match(r".\B", "=я", re.ASCII))
871+
# There's a non-boundary match inside a word.
872+
self.assertTrue(re.match(r".\B", "ab"))
873+
self.assertTrue(re.match(r".\B", "ab", re.ASCII))
874+
self.assertTrue(re.match(br".\B", b"ab"))
875+
self.assertTrue(re.match(br".\B", b"ab", re.LOCALE))
876+
self.assertTrue(re.match(r".\B", "юя"))
877+
self.assertTrue(re.match(r".\B", "юя", re.ASCII))
878+
# There's a non-boundary match between a non-word characters.
879+
self.assertTrue(re.match(r".\B", "=-"))
880+
self.assertTrue(re.match(r".\B", "=-", re.ASCII))
881+
self.assertTrue(re.match(br".\B", b"=-"))
882+
self.assertTrue(re.match(br".\B", b"=-", re.LOCALE))
827883
# There's a word boundary at the start of a string.
828884
self.assertTrue(re.match(r"\b", "abc"))
885+
self.assertTrue(re.match(r"\b", "abc", re.ASCII))
886+
self.assertTrue(re.match(br"\b", b"abc"))
887+
self.assertTrue(re.match(br"\b", b"abc", re.LOCALE))
888+
self.assertTrue(re.match(r"\b", "ьюя"))
889+
self.assertIsNone(re.match(r"\b", "ьюя", re.ASCII))
890+
# There's a word boundary at the end of a string.
891+
self.assertTrue(re.fullmatch(r".+\b", "abc"))
892+
self.assertTrue(re.fullmatch(r".+\b", "abc", re.ASCII))
893+
self.assertTrue(re.fullmatch(br".+\b", b"abc"))
894+
self.assertTrue(re.fullmatch(br".+\b", b"abc", re.LOCALE))
895+
self.assertTrue(re.fullmatch(r".+\b", "ьюя"))
896+
self.assertIsNone(re.search(r"\b", "ьюя", re.ASCII))
829897
# A non-empty string includes a non-boundary zero-length match.
830-
self.assertTrue(re.search(r"\B", "abc"))
898+
self.assertEqual(re.search(r"\B", "abc").span(), (1, 1))
899+
self.assertEqual(re.search(r"\B", "abc", re.ASCII).span(), (1, 1))
900+
self.assertEqual(re.search(br"\B", b"abc").span(), (1, 1))
901+
self.assertEqual(re.search(br"\B", b"abc", re.LOCALE).span(), (1, 1))
902+
self.assertEqual(re.search(r"\B", "ьюя").span(), (1, 1))
903+
self.assertEqual(re.search(r"\B", "ьюя", re.ASCII).span(), (0, 0))
831904
# There is no non-boundary match at the start of a string.
832-
self.assertFalse(re.match(r"\B", "abc"))
905+
self.assertIsNone(re.match(r"\B", "abc"))
906+
self.assertIsNone(re.match(r"\B", "abc", re.ASCII))
907+
self.assertIsNone(re.match(br"\B", b"abc"))
908+
self.assertIsNone(re.match(br"\B", b"abc", re.LOCALE))
909+
self.assertIsNone(re.match(r"\B", "ьюя"))
910+
self.assertTrue(re.match(r"\B", "ьюя", re.ASCII))
911+
# There is no non-boundary match at the end of a string.
912+
self.assertIsNone(re.fullmatch(r".+\B", "abc"))
913+
self.assertIsNone(re.fullmatch(r".+\B", "abc", re.ASCII))
914+
self.assertIsNone(re.fullmatch(br".+\B", b"abc"))
915+
self.assertIsNone(re.fullmatch(br".+\B", b"abc", re.LOCALE))
916+
self.assertIsNone(re.fullmatch(r".+\B", "ьюя"))
917+
self.assertTrue(re.fullmatch(r".+\B", "ьюя", re.ASCII))
833918
# However, an empty string contains no word boundaries, and also no
834919
# non-boundaries.
835-
self.assertIsNone(re.search(r"\B", ""))
920+
self.assertIsNone(re.search(r"\b", ""))
921+
self.assertIsNone(re.search(r"\b", "", re.ASCII))
922+
self.assertIsNone(re.search(br"\b", b""))
923+
self.assertIsNone(re.search(br"\b", b"", re.LOCALE))
836924
# This one is questionable and different from the perlre behaviour,
837925
# but describes current behavior.
838-
self.assertIsNone(re.search(r"\b", ""))
926+
self.assertIsNone(re.search(r"\B", ""))
927+
self.assertIsNone(re.search(r"\B", "", re.ASCII))
928+
self.assertIsNone(re.search(br"\B", b""))
929+
self.assertIsNone(re.search(br"\B", b"", re.LOCALE))
839930
# A single word-character string has two boundaries, but no
840931
# non-boundary gaps.
841932
self.assertEqual(len(re.findall(r"\b", "a")), 2)
933+
self.assertEqual(len(re.findall(r"\b", "a", re.ASCII)), 2)
934+
self.assertEqual(len(re.findall(br"\b", b"a")), 2)
935+
self.assertEqual(len(re.findall(br"\b", b"a", re.LOCALE)), 2)
842936
self.assertEqual(len(re.findall(r"\B", "a")), 0)
937+
self.assertEqual(len(re.findall(r"\B", "a", re.ASCII)), 0)
938+
self.assertEqual(len(re.findall(br"\B", b"a")), 0)
939+
self.assertEqual(len(re.findall(br"\B", b"a", re.LOCALE)), 0)
843940
# If there are no words, there are no boundaries
844941
self.assertEqual(len(re.findall(r"\b", " ")), 0)
942+
self.assertEqual(len(re.findall(r"\b", " ", re.ASCII)), 0)
943+
self.assertEqual(len(re.findall(br"\b", b" ")), 0)
944+
self.assertEqual(len(re.findall(br"\b", b" ", re.LOCALE)), 0)
845945
self.assertEqual(len(re.findall(r"\b", " ")), 0)
946+
self.assertEqual(len(re.findall(r"\b", " ", re.ASCII)), 0)
947+
self.assertEqual(len(re.findall(br"\b", b" ")), 0)
948+
self.assertEqual(len(re.findall(br"\b", b" ", re.LOCALE)), 0)
846949
# Can match around the whitespace.
847950
self.assertEqual(len(re.findall(r"\B", " ")), 2)
951+
self.assertEqual(len(re.findall(r"\B", " ", re.ASCII)), 2)
952+
self.assertEqual(len(re.findall(br"\B", b" ")), 2)
953+
self.assertEqual(len(re.findall(br"\B", b" ", re.LOCALE)), 2)
848954

849955
def test_bigcharset(self):
850956
self.assertEqual(re.match("([\u2222\u2223])",

0 commit comments

Comments
 (0)