1818
1919
2020with  imports_under_tool ("i18n" ):
21-     from  pygettext  import  parse_spec 
21+     import  pygettext 
22+     from  pygettext  import  make_escapes , parse_spec 
2223
2324
2425def  normalize_POT_file (pot ):
@@ -517,6 +518,110 @@ def test_parse_keyword_spec(self):
517518                self .assertEqual (str (cm .exception ), message )
518519
519520
521+ class  TestCharacterEscapes (unittest .TestCase ):
522+     # Pygettext always escapes the following characters: 
523+     special_chars  =  {
524+         '\\ ' : r'\\' ,
525+         '\t ' : r'\t' ,
526+         '\r ' : r'\r' ,
527+         '\n ' : r'\n' ,
528+         '\" ' : r'\"' ,
529+     }
530+ 
531+     def  tearDownClass ():
532+         # Reset the global 'escapes' dict to the default 
533+         make_escapes (pass_nonascii = True )
534+ 
535+     def  test_special_chars (self ):
536+         # special_chars are always escaped regardless of the 
537+         # --escape option 
538+         for  pass_nonascii  in  (True , False ):
539+             make_escapes (pass_nonascii = pass_nonascii )
540+             with  self .subTest (pass_nonascii = pass_nonascii ):
541+                 for  char  in  self .special_chars :
542+                     self .assertEqual (pygettext .escape (char , encoding = 'utf-8' ),
543+                                      self .special_chars [char ])
544+ 
545+     def  _char_to_octal_escape (self , char ):
546+         """Convert a character to its octal escape representation.""" 
547+         return  r"\%03o"  %  ord (char )
548+ 
549+     def  _octal_escape_to_string (self , escaped ):
550+         """Convert an octal escape representation to string.""" 
551+         octal_escapes  =  re .findall (r'\\([0-7]{3})' , escaped )
552+         bytestr  =  bytes ([int (n , 8 ) for  n  in  octal_escapes ])
553+         return  bytestr .decode ('utf-8' )
554+ 
555+     def  test_not_escaped (self ):
556+         """ 
557+         Test escaping when the --escape is not used. 
558+ 
559+         When --escape is not used, only some characters withing the ASCII 
560+         range are escaoped. Characters >= 128 are not escaped. 
561+         """ 
562+         # This is the same as invoking pygettext without 
563+         # the --escape option (the default behavior). 
564+         make_escapes (pass_nonascii = True )
565+         # The encoding option is not used when --escape is not passed 
566+         encoding  =  'foo' 
567+ 
568+         # First 32 characters use octal escapes (except for special chars) 
569+         for  i  in  range (32 ):
570+             char  =  chr (i )
571+             if  char  in  self .special_chars :
572+                 continue 
573+             self .assertEqual (pygettext .escape (char , encoding = encoding ),
574+                              self ._char_to_octal_escape (char ))
575+ 
576+         # Characters 32-126 are not escaped (except for special chars) 
577+         for  i  in  range (32 , 127 ):
578+             char  =  chr (i )
579+             if  char  in  self .special_chars :
580+                 continue 
581+             self .assertEqual (pygettext .escape (char , encoding = encoding ), char )
582+ 
583+         # chr(127) uses octal escape 
584+         self .assertEqual (pygettext .escape (chr (127 ), encoding = encoding ),
585+                          '\\ 177' )
586+ 
587+         # All characters >= 128 are not escaped 
588+         for  i  in  range (128 , 256 ):
589+             char  =  chr (i )
590+             self .assertEqual (pygettext .escape (char , encoding = encoding ), char )
591+ 
592+ 
593+     def  test_escaped (self ):
594+         """ 
595+         Test escaping when --escape is used. 
596+ 
597+         When --escape is used, all characters are escaped, including 
598+         """ 
599+         make_escapes (pass_nonascii = False )
600+         encoding  =  'utf-8' 
601+ 
602+         # First 32 characters use octal escapes (except for special chars) 
603+         for  i  in  range (32 ):
604+             char  =  chr (i )
605+             if  char  in  self .special_chars :
606+                 continue 
607+             self .assertEqual (pygettext .escape (char , encoding = encoding ),
608+                              self ._char_to_octal_escape (char ))
609+ 
610+         # Characters 32-126 are not escaped (except for special chars) 
611+         for  i  in  range (32 , 127 ):
612+             char  =  chr (i )
613+             if  char  in  self .special_chars :
614+                 continue 
615+             self .assertEqual (pygettext .escape (char , encoding = encoding ), char )
616+ 
617+         # Characters >= 127 are escaped 
618+         for  i  in  range (127 , 256 ):
619+             char  =  chr (i )
620+             escaped  =  pygettext .escape (char , encoding = encoding )
621+             decoded_char  =  self ._octal_escape_to_string (escaped )
622+             self .assertEqual (char , decoded_char )
623+ 
624+ 
520625def  extract_from_snapshots ():
521626    snapshots  =  {
522627        'messages.py' : (),
@@ -526,6 +631,8 @@ def extract_from_snapshots():
526631        'custom_keywords.py' : ('--keyword=foo' , '--keyword=nfoo:1,2' ,
527632                               '--keyword=pfoo:1c,2' ,
528633                               '--keyword=npfoo:1c,2,3' , '--keyword=_:1,2' ),
634+         # Test escaping non-ASCII characters 
635+         'escapes.py' : ('--escape' ,),
529636    }
530637
531638    for  filename , args  in  snapshots .items ():
0 commit comments