@@ -332,12 +332,14 @@ def test_strcoll(self):
332332 self .assertLess (locale .strcoll ('a' , 'b' ), 0 )
333333 self .assertEqual (locale .strcoll ('a' , 'a' ), 0 )
334334 self .assertGreater (locale .strcoll ('b' , 'a' ), 0 )
335+ self .assertLess (locale .strcoll ('A' , 'B' ), 0 )
335336 # embedded null character
336337 self .assertRaises (ValueError , locale .strcoll , 'a\0 ' , 'a' )
337338 self .assertRaises (ValueError , locale .strcoll , 'a' , 'a\0 ' )
338339
339340 def test_strxfrm (self ):
340341 self .assertLess (locale .strxfrm ('a' ), locale .strxfrm ('b' ))
342+ self .assertLess (locale .strxfrm ('A' ), locale .strxfrm ('B' ))
341343 # embedded null character
342344 self .assertRaises (ValueError , locale .strxfrm , 'a\0 ' )
343345
@@ -351,8 +353,7 @@ def setUp(self):
351353 enc = codecs .lookup (locale .getencoding () or 'ascii' ).name
352354 if enc not in ('utf-8' , 'iso8859-1' , 'cp1252' ):
353355 raise unittest .SkipTest ('encoding not suitable' )
354- if enc != 'iso8859-1' and (sys .platform == 'darwin' or is_android or
355- sys .platform .startswith ('freebsd' )):
356+ if enc != 'iso8859-1' and (sys .platform == 'darwin' or is_android ):
356357 raise unittest .SkipTest ('wcscoll/wcsxfrm have known bugs' )
357358 BaseLocalizedTest .setUp (self )
358359
@@ -363,6 +364,10 @@ def setUp(self):
363364 "gh-124108: NetBSD doesn't support UTF-8 for LC_COLLATE" )
364365 def test_strcoll_with_diacritic (self ):
365366 self .assertLess (locale .strcoll ('à' , 'b' ), 0 )
367+ self .assertLess (locale .strcoll ('À' , 'B' ), 0 )
368+ self .assertLess (locale .strcoll ('å' , 'b' ), 0 )
369+ self .assertLess (locale .strcoll ('\xc5 ' , 'B' ), 0 )
370+ self .assertLess (locale .strcoll ('\u212b ' , 'B' ), 0 )
366371
367372 @unittest .skipIf (sys .platform .startswith ('aix' ),
368373 'bpo-29972: broken test on AIX' )
@@ -371,6 +376,28 @@ def test_strcoll_with_diacritic(self):
371376 "gh-124108: NetBSD doesn't support UTF-8 for LC_COLLATE" )
372377 def test_strxfrm_with_diacritic (self ):
373378 self .assertLess (locale .strxfrm ('à' ), locale .strxfrm ('b' ))
379+ self .assertLess (locale .strxfrm ('À' ), locale .strxfrm ('B' ))
380+ self .assertLess (locale .strxfrm ('å' ), locale .strxfrm ('b' ))
381+ # gh-130567: Should not fail with OSError EINVAL.
382+ self .assertLess (locale .strxfrm ('\xc5 ' ), locale .strxfrm ('B' ))
383+ self .assertLess (locale .strxfrm ('\u212b ' ), locale .strxfrm ('B' ))
384+
385+ def test_strxfrm_strcoll_consistency (self ):
386+ enc = codecs .lookup (locale .getencoding () or 'ascii' ).name
387+ if enc != 'utf-8' :
388+ self .skipTest ('strcoll() and strxfrm() can be inconsistent on non-UTF-8 locale' )
389+ def check (a , b ):
390+ r = locale .strcoll (a , b )
391+ if r < 0 :
392+ self .assertLess (locale .strxfrm (a ), locale .strxfrm (b ))
393+ elif r > 0 :
394+ self .assertGreater (locale .strxfrm (a ), locale .strxfrm (b ))
395+ else :
396+ self .assertEqual (locale .strxfrm (a ), locale .strxfrm (b ))
397+ check ('à' , 'À' )
398+ check ('å' , '\xc5 ' ) # 'Å' U+00C5 LATIN CAPITAL LETTER A WITH RING ABOVE
399+ check ('å' , '\u212b ' ) # 'Å' U+212B ANGSTROM SIGN
400+ check ('\xc5 ' , '\u212b ' )
374401
375402
376403class NormalizeTest (unittest .TestCase ):
0 commit comments