@@ -820,31 +820,137 @@ def test_named_unicode_escapes(self):
820820 self .checkPatternError (br'\N{LESS-THAN SIGN}' , r'bad escape \N' , 0 )
821821 self .checkPatternError (br'[\N{LESS-THAN SIGN}]' , r'bad escape \N' , 1 )
822822
823- def test_string_boundaries (self ):
823+ def test_word_boundaries (self ):
824824 # See http://bugs.python.org/issue10713
825- self .assertEqual (re .search (r"\b(abc)\b" , "abc" ).group (1 ),
826- "abc" )
825+ self .assertEqual (re .search (r"\b(abc)\b" , "abc" ).group (1 ), "abc" )
826+ self .assertEqual (re .search (r"\b(abc)\b" , "abc" , re .ASCII ).group (1 ), "abc" )
827+ self .assertEqual (re .search (br"\b(abc)\b" , b"abc" ).group (1 ), b"abc" )
828+ self .assertEqual (re .search (br"\b(abc)\b" , b"abc" , re .LOCALE ).group (1 ), b"abc" )
829+ self .assertEqual (re .search (r"\b(ьюя)\b" , "ьюя" ).group (1 ), "ьюя" )
830+ self .assertIsNone (re .search (r"\b(ьюя)\b" , "ьюя" , re .ASCII ))
831+ # There's a word boundary between a word and a non-word.
832+ self .assertTrue (re .match (r".\b" , "a=" ))
833+ self .assertTrue (re .match (r".\b" , "a=" , re .ASCII ))
834+ self .assertTrue (re .match (br".\b" , b"a=" ))
835+ self .assertTrue (re .match (br".\b" , b"a=" , re .LOCALE ))
836+ self .assertTrue (re .match (r".\b" , "я=" ))
837+ self .assertIsNone (re .match (r".\b" , "я=" , re .ASCII ))
838+ # There's a word boundary between a non-word and a word.
839+ self .assertTrue (re .match (r".\b" , "=a" ))
840+ self .assertTrue (re .match (r".\b" , "=a" , re .ASCII ))
841+ self .assertTrue (re .match (br".\b" , b"=a" ))
842+ self .assertTrue (re .match (br".\b" , b"=a" , re .LOCALE ))
843+ self .assertTrue (re .match (r".\b" , "=я" ))
844+ self .assertIsNone (re .match (r".\b" , "=я" , re .ASCII ))
845+ # There is no word boundary inside a word.
846+ self .assertIsNone (re .match (r".\b" , "ab" ))
847+ self .assertIsNone (re .match (r".\b" , "ab" , re .ASCII ))
848+ self .assertIsNone (re .match (br".\b" , b"ab" ))
849+ self .assertIsNone (re .match (br".\b" , b"ab" , re .LOCALE ))
850+ self .assertIsNone (re .match (r".\b" , "юя" ))
851+ self .assertIsNone (re .match (r".\b" , "юя" , re .ASCII ))
852+ # There is no word boundary between a non-word characters.
853+ self .assertIsNone (re .match (r".\b" , "=-" ))
854+ self .assertIsNone (re .match (r".\b" , "=-" , re .ASCII ))
855+ self .assertIsNone (re .match (br".\b" , b"=-" ))
856+ self .assertIsNone (re .match (br".\b" , b"=-" , re .LOCALE ))
857+ # There is no non-boundary match between a word and a non-word.
858+ self .assertIsNone (re .match (r".\B" , "a=" ))
859+ self .assertIsNone (re .match (r".\B" , "a=" , re .ASCII ))
860+ self .assertIsNone (re .match (br".\B" , b"a=" ))
861+ self .assertIsNone (re .match (br".\B" , b"a=" , re .LOCALE ))
862+ self .assertIsNone (re .match (r".\B" , "я=" ))
863+ self .assertTrue (re .match (r".\B" , "я=" , re .ASCII ))
864+ # There is no non-boundary match between a non-word and a word.
865+ self .assertIsNone (re .match (r".\B" , "=a" ))
866+ self .assertIsNone (re .match (r".\B" , "=a" , re .ASCII ))
867+ self .assertIsNone (re .match (br".\B" , b"=a" ))
868+ self .assertIsNone (re .match (br".\B" , b"=a" , re .LOCALE ))
869+ self .assertIsNone (re .match (r".\B" , "=я" ))
870+ self .assertTrue (re .match (r".\B" , "=я" , re .ASCII ))
871+ # There's a non-boundary match inside a word.
872+ self .assertTrue (re .match (r".\B" , "ab" ))
873+ self .assertTrue (re .match (r".\B" , "ab" , re .ASCII ))
874+ self .assertTrue (re .match (br".\B" , b"ab" ))
875+ self .assertTrue (re .match (br".\B" , b"ab" , re .LOCALE ))
876+ self .assertTrue (re .match (r".\B" , "юя" ))
877+ self .assertTrue (re .match (r".\B" , "юя" , re .ASCII ))
878+ # There's a non-boundary match between a non-word characters.
879+ self .assertTrue (re .match (r".\B" , "=-" ))
880+ self .assertTrue (re .match (r".\B" , "=-" , re .ASCII ))
881+ self .assertTrue (re .match (br".\B" , b"=-" ))
882+ self .assertTrue (re .match (br".\B" , b"=-" , re .LOCALE ))
827883 # There's a word boundary at the start of a string.
828884 self .assertTrue (re .match (r"\b" , "abc" ))
885+ self .assertTrue (re .match (r"\b" , "abc" , re .ASCII ))
886+ self .assertTrue (re .match (br"\b" , b"abc" ))
887+ self .assertTrue (re .match (br"\b" , b"abc" , re .LOCALE ))
888+ self .assertTrue (re .match (r"\b" , "ьюя" ))
889+ self .assertIsNone (re .match (r"\b" , "ьюя" , re .ASCII ))
890+ # There's a word boundary at the end of a string.
891+ self .assertTrue (re .fullmatch (r".+\b" , "abc" ))
892+ self .assertTrue (re .fullmatch (r".+\b" , "abc" , re .ASCII ))
893+ self .assertTrue (re .fullmatch (br".+\b" , b"abc" ))
894+ self .assertTrue (re .fullmatch (br".+\b" , b"abc" , re .LOCALE ))
895+ self .assertTrue (re .fullmatch (r".+\b" , "ьюя" ))
896+ self .assertIsNone (re .search (r"\b" , "ьюя" , re .ASCII ))
829897 # A non-empty string includes a non-boundary zero-length match.
830- self .assertTrue (re .search (r"\B" , "abc" ))
898+ self .assertEqual (re .search (r"\B" , "abc" ).span (), (1 , 1 ))
899+ self .assertEqual (re .search (r"\B" , "abc" , re .ASCII ).span (), (1 , 1 ))
900+ self .assertEqual (re .search (br"\B" , b"abc" ).span (), (1 , 1 ))
901+ self .assertEqual (re .search (br"\B" , b"abc" , re .LOCALE ).span (), (1 , 1 ))
902+ self .assertEqual (re .search (r"\B" , "ьюя" ).span (), (1 , 1 ))
903+ self .assertEqual (re .search (r"\B" , "ьюя" , re .ASCII ).span (), (0 , 0 ))
831904 # There is no non-boundary match at the start of a string.
832- self .assertFalse (re .match (r"\B" , "abc" ))
905+ self .assertIsNone (re .match (r"\B" , "abc" ))
906+ self .assertIsNone (re .match (r"\B" , "abc" , re .ASCII ))
907+ self .assertIsNone (re .match (br"\B" , b"abc" ))
908+ self .assertIsNone (re .match (br"\B" , b"abc" , re .LOCALE ))
909+ self .assertIsNone (re .match (r"\B" , "ьюя" ))
910+ self .assertTrue (re .match (r"\B" , "ьюя" , re .ASCII ))
911+ # There is no non-boundary match at the end of a string.
912+ self .assertIsNone (re .fullmatch (r".+\B" , "abc" ))
913+ self .assertIsNone (re .fullmatch (r".+\B" , "abc" , re .ASCII ))
914+ self .assertIsNone (re .fullmatch (br".+\B" , b"abc" ))
915+ self .assertIsNone (re .fullmatch (br".+\B" , b"abc" , re .LOCALE ))
916+ self .assertIsNone (re .fullmatch (r".+\B" , "ьюя" ))
917+ self .assertTrue (re .fullmatch (r".+\B" , "ьюя" , re .ASCII ))
833918 # However, an empty string contains no word boundaries, and also no
834919 # non-boundaries.
835- self .assertIsNone (re .search (r"\B" , "" ))
920+ self .assertIsNone (re .search (r"\b" , "" ))
921+ self .assertIsNone (re .search (r"\b" , "" , re .ASCII ))
922+ self .assertIsNone (re .search (br"\b" , b"" ))
923+ self .assertIsNone (re .search (br"\b" , b"" , re .LOCALE ))
836924 # This one is questionable and different from the perlre behaviour,
837925 # but describes current behavior.
838- self .assertIsNone (re .search (r"\b" , "" ))
926+ self .assertIsNone (re .search (r"\B" , "" ))
927+ self .assertIsNone (re .search (r"\B" , "" , re .ASCII ))
928+ self .assertIsNone (re .search (br"\B" , b"" ))
929+ self .assertIsNone (re .search (br"\B" , b"" , re .LOCALE ))
839930 # A single word-character string has two boundaries, but no
840931 # non-boundary gaps.
841932 self .assertEqual (len (re .findall (r"\b" , "a" )), 2 )
933+ self .assertEqual (len (re .findall (r"\b" , "a" , re .ASCII )), 2 )
934+ self .assertEqual (len (re .findall (br"\b" , b"a" )), 2 )
935+ self .assertEqual (len (re .findall (br"\b" , b"a" , re .LOCALE )), 2 )
842936 self .assertEqual (len (re .findall (r"\B" , "a" )), 0 )
937+ self .assertEqual (len (re .findall (r"\B" , "a" , re .ASCII )), 0 )
938+ self .assertEqual (len (re .findall (br"\B" , b"a" )), 0 )
939+ self .assertEqual (len (re .findall (br"\B" , b"a" , re .LOCALE )), 0 )
843940 # If there are no words, there are no boundaries
844941 self .assertEqual (len (re .findall (r"\b" , " " )), 0 )
942+ self .assertEqual (len (re .findall (r"\b" , " " , re .ASCII )), 0 )
943+ self .assertEqual (len (re .findall (br"\b" , b" " )), 0 )
944+ self .assertEqual (len (re .findall (br"\b" , b" " , re .LOCALE )), 0 )
845945 self .assertEqual (len (re .findall (r"\b" , " " )), 0 )
946+ self .assertEqual (len (re .findall (r"\b" , " " , re .ASCII )), 0 )
947+ self .assertEqual (len (re .findall (br"\b" , b" " )), 0 )
948+ self .assertEqual (len (re .findall (br"\b" , b" " , re .LOCALE )), 0 )
846949 # Can match around the whitespace.
847950 self .assertEqual (len (re .findall (r"\B" , " " )), 2 )
951+ self .assertEqual (len (re .findall (r"\B" , " " , re .ASCII )), 2 )
952+ self .assertEqual (len (re .findall (br"\B" , b" " )), 2 )
953+ self .assertEqual (len (re .findall (br"\B" , b" " , re .LOCALE )), 2 )
848954
849955 def test_bigcharset (self ):
850956 self .assertEqual (re .match ("([\u2222 \u2223 ])" ,
0 commit comments